In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import load_model

# Import custom functions
from src.preprocessing import preprocess_data
from src.modeling import create_lstm_dataset, build_and_train_lstm

# Style setup for plots
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('colorblind')

In [None]:
RAW_DATA_PATH = 'data/raw/financial_data.csv'
raw_data = pd.read_csv(RAW_DATA_PATH, header=[0, 1], index_col=0, parse_dates=True)
adj_close_prices = preprocess_data(raw_data)
tsla_prices = adj_close_prices['TSLA'].values.reshape(-1, 1)

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(tsla_prices)

training_size = int(len(scaled_data) * 0.85) # Use a slightly different split for demonstration
train_data_scaled = scaled_data[:training_size]
test_data_scaled = scaled_data[training_size - 60:] # Include overlap for first prediction

In [None]:
TIME_STEP = 60
X_train, y_train = create_lstm_dataset(train_data_scaled, TIME_STEP)
X_test, y_test = create_lstm_dataset(test_data_scaled, TIME_STEP)

In [None]:
lstm_model = build_and_train_lstm(X_train, y_train, TIME_STEP)

In [None]:
MODEL_PATH_LSTM = 'models/lstm_model.h5'
lstm_model.save(MODEL_PATH_LSTM)
print(f"LSTM model saved to {MODEL_PATH_LSTM}")

In [None]:
# Reshape test data for prediction
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
test_predict = lstm_model.predict(X_test)

# Inverse transform predictions to get actual price values
test_predict_unscaled = scaler.inverse_transform(test_predict)
y_test_unscaled = scaler.inverse_transform(y_test.reshape(-1, 1))

In [None]:
mae_lstm = mean_absolute_error(y_test_unscaled, test_predict_unscaled)
rmse_lstm = np.sqrt(mean_squared_error(y_test_unscaled, test_predict_unscaled))
mape_lstm = np.mean(np.abs((y_test_unscaled - test_predict_unscaled) / y_test_unscaled)) * 100

print(f"LSTM Model Performance on Test Set:")
print(f"  Mean Absolute Error (MAE): ${mae_lstm:.2f}")
print(f"  Root Mean Squared Error (RMSE): ${rmse_lstm:.2f}")
print(f"  Mean Absolute Percentage Error (MAPE): {mape_lstm:.2f}%")

In [None]:
# Create a DataFrame for plotting
plot_df = pd.DataFrame(index=adj_close_prices.index)
plot_df['Actual Price'] = adj_close_prices['TSLA'].values

# Shift the test predictions to align them on the plot
test_predict_plot = np.empty_like(scaled_data)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_data_scaled) + 1 : len(scaled_data), :] = test_predict_unscaled

plot_df['LSTM Predictions'] = test_predict_plot

plt.figure(figsize=(14, 7))
plt.plot(plot_df['Actual Price'], label='Actual Price')
plt.plot(plot_df['LSTM Predictions'], label='LSTM Forecast', linestyle='--')
plt.axvline(x=adj_close_prices.index[training_size], color='r', linestyle='--', label='Train-Test Split')
plt.title('LSTM Forecast vs. Actual TSLA Prices')
plt.legend()
plt.show()

In [None]:
# Retrieve saved ARIMA metrics
%store -r mae_arima
%store -r rmse_arima
%store -r mape_arima

comparison_data = {
    'Metric': ['MAE ($)', 'RMSE ($)', 'MAPE (%)'],
    'ARIMA': [f'{mae_arima:.2f}', f'{rmse_arima:.2f}', f'{mape_arima:.2f}'],
    'LSTM': [f'{mae_lstm:.2f}', f'{rmse_lstm:.2f}', f'{mape_lstm:.2f}']
}
comparison_df = pd.DataFrame(comparison_data)
print("\n--- Model Performance Comparison ---")
print(comparison_df)