In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib

from src.data_ingestion import fetch_and_save_data
from src.preprocessing import preprocess_data
from src.modeling import train_auto_arima

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('colorblind')

In [None]:
TICKERS = ['TSLA', 'BND', 'SPY']
RAW_DATA_PATH = 'data/raw/financial_data.csv'
raw_data = pd.read_csv(RAW_DATA_PATH, header=[0, 1], index_col=0, parse_dates=True)
adj_close_prices = preprocess_data(raw_data)
tsla_prices = adj_close_prices['TSLA']

In [None]:
train_end = '2023-12-31'
train_data = tsla_prices[:train_end]
test_data = tsla_prices[train_end:]

print(f"Training data size: {len(train_data)}")
print(f"Test data size: {len(test_data)}")

plt.figure(figsize=(12, 6))
train_data.plot(label='Training Data')
test_data.plot(label='Test Data')
plt.title('TSLA Price - Train/Test Split')
plt.legend()
plt.show()

In [None]:
arima_model = train_auto_arima(train_data)

In [None]:
MODEL_PATH_ARIMA = 'models/arima_model.pkl'
joblib.dump(arima_model, MODEL_PATH_ARIMA)
print(f"ARIMA model saved to {MODEL_PATH_ARIMA}")

In [None]:
n_periods = len(test_data)
arima_predictions, conf_int = arima_model.predict(n_periods=n_periods, return_conf_int=True)
arima_predictions = pd.Series(arima_predictions, index=test_data.index)

In [None]:
mae_arima = mean_absolute_error(test_data, arima_predictions)
rmse_arima = np.sqrt(mean_squared_error(test_data, arima_predictions))
mape_arima = np.mean(np.abs((test_data - arima_predictions) / test_data)) * 100

print(f"ARIMA Model Performance on Test Set:")
print(f"  Mean Absolute Error (MAE): ${mae_arima:.2f}")
print(f"  Root Mean Squared Error (RMSE): ${rmse_arima:.2f}")
print(f"  Mean Absolute Percentage Error (MAPE): {mape_arima:.2f}%")

# Save metrics for later comparison
%store mae_arima
%store rmse_arima
%store mape_arima

In [None]:
plt.figure(figsize=(14, 7))
plt.plot(train_data, label='Training Data')
plt.plot(test_data, label='Actual Test Data', color='orange')
plt.plot(arima_predictions, label='ARIMA Forecast', color='green', linestyle='--')

# Plot confidence intervals
plt.fill_between(test_data.index,
                 conf_int[:, 0],
                 conf_int[:, 1],
                 color='k', alpha=.15, label='Confidence Interval')

plt.title('ARIMA Forecast vs. Actual TSLA Prices')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.show()