In [None]:
import pandas as pd
import numpy as np
import json
from sklearn.preprocessing import MinMaxScaler

# Load Stock Data
with open('alphaVanData_AMC_full_daily.json', 'r') as stock_file:
    stock_data = json.load(stock_file)

# Extract Time Series (Daily)
stock_df = pd.DataFrame.from_dict(stock_data["Time Series (Daily)"], orient="index")
stock_df = stock_df.sort_index()  # Sort by date
stock_df = stock_df.astype(float)  # Convert columns to numeric
stock_df = stock_df[["4. close", "5. volume"]]  # Use Close and Volume

# Normalize Stock Data
scaler = MinMaxScaler()
stock_df_scaled = scaler.fit_transform(stock_df)

# Load Social Media Data
with open('redditData.json', 'r') as reddit_file:
    reddit_data = json.load(reddit_file)

# Convert Social Media JSON to DataFrame
reddit_df = pd.DataFrame(reddit_data)  # Fill [insert logic to handle redditData.json format] here
reddit_df["date"] = pd.to_datetime(reddit_df["date"])  # Parse dates
reddit_df = reddit_df.sort_values("date")

# Aggregate Sentiment Data by Date
reddit_df["sentiment_score"] = reddit_df["upvotes"] * reddit_df["sentiment"]  # Example calculation
social_features = reddit_df.groupby("date")[["mentions", "sentiment_score"]].sum()

# Combine Stock and Social Media Data
combined_df = pd.merge(stock_df, social_features, left_index=True, right_index=True, how="inner")

########
# Define short squeeze criteria
price_increase_threshold = 0.2  # 20% price increase within a day
volume_increase_threshold = 2  # volume doubles compared to the previous day

# Create sequences and label targets based on short squeeze criteria
X, y = [], []
for i in range(len(combined_df) - sequence_length - 1):  # Avoid index out of range
    # Get the sequence
    X.append(combined_df.iloc[i:i + sequence_length].values)
    
    # Check short squeeze conditions for the target day
    current_price = combined_df.iloc[i + sequence_length - 1, 0]  # 'close' price on the last day of the sequence
    next_price = combined_df.iloc[i + sequence_length, 0]  # 'close' price on the next day
    current_volume = combined_df.iloc[i + sequence_length - 1, 1]  # 'volume' on the last day of the sequence
    next_volume = combined_df.iloc[i + sequence_length, 1]  # 'volume' on the next day
    
    price_increase = (next_price - current_price) / current_price
    volume_increase = next_volume / current_volume
    
    # Label as 1 if both criteria are met, otherwise 0
    if price_increase > price_increase_threshold and volume_increase > volume_increase_threshold:
        y.append(1)
    else:
        y.append(0)

X, y = np.array(X), np.array(y)


In [None]:
from sklearn.model_selection import train_test_split

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow_addons.layers import TCN

# Build TCN Model
TCNmodel = Sequential([
    TCN(input_shape=(sequence_length, X.shape[2]), return_sequences=False),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Predict probability of short squeeze
])

# Compile the Model
TCNmodel.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
TCNmodel.summary()


In [None]:
history = TCNmodel.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=20,  # optimal number of epochs
    batch_size=32,  # optimal batch size
    verbose=1
)


In [None]:
# Evaluate on the Test Set
test_loss, test_accuracy = TCNmodel.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

# Predict on New Data
predictions = TCNmodel.predict(X_test)


In [None]:
# Save the Model
TCNmodel.save('short_squeeze_tcn_model.h5')

# Save Scalers
import joblib
joblib.dump(scaler, 'stock_data_scaler.pkl')  # Save the stock data scaler
# FILL: logic to save social media scaler if used


In [None]:
import matplotlib.pyplot as plt

# Plot training and validation accuracy
plt.plot(history.history['accuracy'], label='Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.show()

# Compare predictions with true labels
plt.plot(y_test, label='True Values')
plt.plot(predictions, label='Predictions')
plt.legend()
plt.show()