In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [8]:
data = pd.read_csv('/Users/keremsmacbook/Desktop/42/gitHub/Presidential_Tweets_Impacts_SP500/PresidentialTweetsImpectOnS&P500/Data/Processed/aligned_dataset.csv')
data['Date'] = pd.to_datetime(data['Date'])

In [9]:
scalers = {}
def normalize_features(df, features):
    for feature in features:
        scaler = MinMaxScaler()
        df[feature] = scaler.fit_transform(df[[feature]])
        scalers[feature] = scaler
    return df

features = ['Close', 'avg_vader_sentiment', 'avg_finbert_sentiment']
data = normalize_features(data, features)

def create_sequences(df, feature_columns, target_column, seq_length):
    X, y = [], []
    for i in range(len(df) - seq_length):
        X.append(df[feature_columns].iloc[i:i+seq_length].values)
        y.append(df[target_column].iloc[i+seq_length])
    return np.array(X), np.array(y)

SEQ_LENGTH = 30

X_close, y_close = create_sequences(data, feature_columns=['Close'], target_column='Close', seq_length=SEQ_LENGTH)

X_close_vader, y_close_vader = create_sequences(
    data, feature_columns=['Close', 'avg_vader_sentiment'], target_column='Close', seq_length=SEQ_LENGTH
)

X_close_finbert, y_close_finbert = create_sequences(
    data, feature_columns=['Close', 'avg_finbert_sentiment'], target_column='Close', seq_length=SEQ_LENGTH
)

In [28]:
from keras.src.layers import Dropout


def build_lstm(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', return_sequences=False, input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

from tensorflow.keras.callbacks import EarlyStopping

def train_lstm(X_train, y_train, X_test, y_test, input_shape, epochs=300, batch_size=32):
    model = build_lstm(input_shape)
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    history = model.fit(
        X_train, y_train, validation_data=(X_test, y_test),
        epochs=epochs, batch_size=batch_size, verbose=1, callbacks=[early_stopping]
    )
    return model, history

def train_test_split(X, y, split_ratio=0.8):
    split = int(len(X) * split_ratio)
    return X[:split], X[split:], y[:split], y[split:]

In [29]:
X_train_close, X_test_close, y_train_close, y_test_close = train_test_split(X_close, y_close)
model_close, history_close = train_lstm(
    X_train_close, y_train_close, X_test_close, y_test_close, input_shape=(SEQ_LENGTH, 1)
)

X_train_close_vader, X_test_close_vader, y_train_close_vader, y_test_close_vader = train_test_split(X_close_vader, y_close_vader)
model_close_vader, history_close_vader = train_lstm(
    X_train_close_vader, y_train_close_vader, X_test_close_vader, y_test_close_vader, input_shape=(SEQ_LENGTH, 2)
)

X_train_close_finbert, X_test_close_finbert, y_train_close_finbert, y_test_close_finbert = train_test_split(X_close_finbert, y_close_finbert)
model_close_finbert, history_close_finbert = train_lstm(
    X_train_close_finbert, y_train_close_finbert, X_test_close_finbert, y_test_close_finbert, input_shape=(SEQ_LENGTH, 2)
)


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300


In [30]:
from sklearn.metrics import mean_squared_error, r2_score

def evaluate_model(model, X_test, y_test, scaler):
    predictions = model.predict(X_test)

    predictions = scaler.inverse_transform(predictions)
    y_test = scaler.inverse_transform(y_test.reshape(-1, 1))

    mse = mean_squared_error(y_test, predictions)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, predictions)

    epsilon = 1e-10
    mape = np.mean(np.abs((y_test - predictions) / (y_test + epsilon))) * 100

    return mse, rmse, r2, mape

print("Scenario 1: Close only")
mse_close, rmse_close, r2_close, mape_close = evaluate_model(
    model_close, X_test_close, y_test_close, scalers['Close']
)
print(f"MSE: {mse_close}, RMSE: {rmse_close}, R²: {r2_close}, MAPE: {mape_close}%")

print("\nScenario 2: Close + Vader Sentiment")
mse_close_vader, rmse_close_vader, r2_close_vader, mape_close_vader = evaluate_model(
    model_close_vader, X_test_close_vader, y_test_close_vader, scalers['Close']
)
print(f"MSE: {mse_close_vader}, RMSE: {rmse_close_vader}, R²: {r2_close_vader}, MAPE: {mape_close_vader}%")

print("\nScenario 3: Close + FinBERT Sentiment")
mse_close_finbert, rmse_close_finbert, r2_close_finbert, mape_close_finbert = evaluate_model(
    model_close_finbert, X_test_close_finbert, y_test_close_finbert, scalers['Close']
)
print(f"MSE: {mse_close_finbert}, RMSE: {rmse_close_finbert}, R²: {r2_close_finbert}, MAPE: {mape_close_finbert}%")


Scenario 1: Close only
MSE: 5848.796328484404, RMSE: 76.47742365224134, R²: 0.9381592673464991, MAPE: 1.9269371275303686%

Scenario 2: Close + Vader Sentiment
MSE: 6432.939665909325, RMSE: 80.20560869359028, R²: 0.9319829791100477, MAPE: 2.0770193573453573%

Scenario 3: Close + FinBERT Sentiment
MSE: 4785.912170385523, RMSE: 69.18028744075528, R²: 0.9493973976165724, MAPE: 1.7274171371515195%
