In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima

In [None]:
# All settings are at the top for easy changes
CONFIG = {
    "symbols": ['AMZN', '^GSPC', '^VIX', 'AAPL', '^TNX'],
    "feature_cols": ['GSPC', 'VIX', 'AAPL', 'TNX'], 
    "target_col": 'AMZN',
    "start_date": '2018-01-01',
    "end_date": '2024-12-31',
    "train_split_ratio": 0.8,
    "refit_interval": 20
}

In [None]:
print("Fetching data...")
data = yf.download(CONFIG['symbols'], start=CONFIG['start_date'], end=CONFIG['end_date'])['Close']
data = data.ffill().dropna()
data.columns = [col.replace('^', '') for col in CONFIG['symbols']]

# Immediately inspect the data
data.head()

In [None]:
print("Preparing data...")
train_size = int(len(data) * CONFIG['train_split_ratio'])
train_df, test_df = data.iloc[:train_size], data.iloc[train_size:]

y_train_log = np.log(train_df[CONFIG['target_col']])
y_test = test_df[CONFIG['target_col']]

scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(train_df[CONFIG['feature_cols']]),
                       index=train_df.index, columns=CONFIG['feature_cols'])
X_test = pd.DataFrame(scaler.transform(test_df[CONFIG['feature_cols']]),
                      index=test_df.index, columns=CONFIG['feature_cols'])

In [None]:
# This cell can be re-run independently if you want to experiment with model parameters
print("Finding best ARIMA order...")
model_auto = auto_arima(y_train_log,
                        exogenous=X_train,
                        seasonal=False,
                        stationary=True,
                        trace=True,
                        stepwise=True,
                        suppress_warnings=True,
                        error_action='ignore')

# The summary is printed directly in the notebook output
best_order = model_auto.order
print(f"\nBest Order: {best_order}")
model_auto.summary()

In [None]:
print(f"Running rolling forecast (refitting every {CONFIG['refit_interval']} days)...")
history_log = list(y_train_log)
exog_history = X_train.copy()
predictions = []
model_fit = None

for i in range(len(y_test)):
    try:
        # Refit the model periodically
        if i % CONFIG['refit_interval'] == 0 or model_fit is None:
            print(f"Refitting model at step {i}...")
            model = ARIMA(history_log, exog=exog_history, order=best_order)
            
            # THE FIX: Remove method_kwargs, as it's no longer needed for this model.
            model_fit = model.fit()

        # Forecast the next step
        next_exog = X_test.iloc[i:i+1]
        forecast_log = model_fit.forecast(steps=1, exog=next_exog)
        prediction = np.exp(forecast_log.iloc[0])
        predictions.append(prediction)

        # Update history for the next iteration
        history_log.append(np.log(y_test.iloc[i]))
        exog_history = pd.concat([exog_history, next_exog])

    except Exception as e:
        print(f"Forecast failed at step {i}: {e}")
        predictions.append(np.nan)

final_predictions = pd.Series(predictions, index=y_test.index).dropna()

In [None]:
aligned_actuals = y_test.loc[final_predictions.index]
rmse = np.sqrt(mean_squared_error(aligned_actuals, final_predictions))
mae = mean_absolute_error(aligned_actuals, final_predictions)
mape = mean_absolute_percentage_error(aligned_actuals, final_predictions)

print("\n--- Rolling Forecast Performance ---")
print(f"RMSE  {rmse:.2f}") # Root Mean Squared Error
print(f"MAE:   {mae:.2f}") # Mean Absolute Error
print(f"MAPE:  {mape:.2%}") # Mean Absolute Percentage Error


In [None]:
plt.figure(figsize=(14, 7))
plt.plot(aligned_actuals, label='Actual Prices', color='green')
plt.plot(final_predictions, label='Rolling Forecast', linestyle='--', color='red')
plt.title(f'{CONFIG["target_col"]} Rolling Forecast vs Actual Price')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.grid(True)
plt.show()