In [24]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
warnings.filterwarnings("ignore")
import warnings

file_path = '/Users/keremsmacbook/Desktop/42/gitHub/Presidential_Tweets_Impacts_SP500/PresidentialTweetsImpectOnS&P500/Data/Processed/aligned_dataset.csv'
aligned_dataset = pd.read_csv(file_path)

aligned_dataset['Date'] = pd.to_datetime(aligned_dataset['Date'])
aligned_dataset.sort_values('Date', inplace=True)
aligned_dataset.set_index('Date', inplace=True)

train_size = int(len(aligned_dataset) * 0.8)
train_data = aligned_dataset.iloc[:train_size]
test_data = aligned_dataset.iloc[train_size:]


In [26]:
print(train_data[['Close', 'avg_vader_sentiment']].corr())

                        Close  avg_vader_sentiment
Close                1.000000            -0.090787
avg_vader_sentiment -0.090787             1.000000


In [27]:
print(train_data[['Close', 'avg_finbert_sentiment']].corr())

                         Close  avg_finbert_sentiment
Close                  1.00000               -0.07586
avg_finbert_sentiment -0.07586                1.00000


In [30]:
def calculate_metrics(actual, predicted):
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, predicted)
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    r2 = r2_score(actual, predicted)
    return mse, rmse, mae, mape, r2

def run_auto_arima(train, test, target_column, exog_columns=None):
    if exog_columns:
        model = auto_arima(
            train[target_column],
            exogenous=train[exog_columns],
            seasonal=False,
            stepwise=True,
            suppress_warnings=True
        )
    else:
        model = auto_arima(
            train[target_column],
            seasonal=False,
            stepwise=True,
            suppress_warnings=True
        )

    if exog_columns:
        forecast = model.predict(n_periods=len(test), exogenous=test[exog_columns])
    else:
        forecast = model.predict(n_periods=len(test))

    mse, rmse, mae, mape, r2 = calculate_metrics(test[target_column], forecast)
    print(f"\nMetrics for {target_column} with exog={exog_columns or 'None'}:")
    print(f"MSE: {mse:.2f}, RMSE: {rmse:.2f}, MAE: {mae:.2f}, MAPE: {mape:.2f}%, R²: {r2:.2f}")

    return model, forecast




print("Scenario 1: Only Close")
arima_close, forecast_close = run_auto_arima(train_data, test_data, target_column='Close')

print("\nScenario 2: Close + avg_vader_sentiment")
arima_close_vader, forecast_close_vader = run_auto_arima(
    train_data, test_data, target_column='Close', exog_columns=['avg_vader_sentiment']
)

print("\nScenario 3: Close + avg_finbert_sentiment")
arima_close_finbert, forecast_close_finbert = run_auto_arima(
    train_data, test_data, target_column='Close', exog_columns=['avg_finbert_sentiment']
)

Scenario 1: Only Close

Metrics for Close with exog=None:
MSE: 773893.15, RMSE: 879.71, MAE: 815.64, MAPE: nan%, R²: -5.81

Scenario 2: Close + avg_vader_sentiment

Metrics for Close with exog=['avg_vader_sentiment']:
MSE: 773893.15, RMSE: 879.71, MAE: 815.64, MAPE: nan%, R²: -5.81

Scenario 3: Close + avg_finbert_sentiment

Metrics for Close with exog=['avg_finbert_sentiment']:
MSE: 773893.15, RMSE: 879.71, MAE: 815.64, MAPE: nan%, R²: -5.81
