In [None]:
# --- Imports
import os, pickle, warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")

# Plot defaults
plt.rcParams['figure.figsize'] = (12,5)
plt.rcParams['axes.grid'] = True

# Paths 
DATA_DIR = "../data"
MODELS_DIR = "../models"
os.makedirs(MODELS_DIR, exist_ok=True)

CLOSE_FILE = f"{DATA_DIR}/close_prices_clean.csv"   # from Task 1


In [None]:
# Load the cleaned close prices from Task 1
close_prices_clean = pd.read_csv(CLOSE_FILE, index_col=0, parse_dates=True)

if 'TSLA' in close_prices_clean.columns:
    ts = close_prices_clean['TSLA'].dropna().sort_index()
else:
    ts_col = [c for c in close_prices_clean.columns if 'TSLA' in c][0]
    ts = close_prices_clean[ts_col].dropna().sort_index()

print("Last available date:", ts.index.max().date(), "Last price:", round(ts.iloc[-1],2))

# Forecast horizon (trading days): ~126 for 6 months, ~252 for 12 months
H_6M  = 126
H_12M = 252
H     = H_12M  

# Create future business-day index starting next business day
last_day = ts.index[-1]
future_idx = pd.bdate_range(start=last_day + pd.tseries.offsets.BDay(), periods=H)


ARIMA Forecast (with confidence intervals)

In [None]:
use_pmdarima = True
try:
    import pmdarima as pm
except Exception:
    use_pmdarima = False
    from statsmodels.tsa.arima.model import ARIMA

if use_pmdarima:
    # Auto-select (p,d,q) on *full* history, then forecast into the future
    arima_model = pm.auto_arima(
        ts,
        start_p=0, start_q=0, max_p=5, max_q=5,
        d=None, seasonal=False, stepwise=True,
        suppress_warnings=True, error_action='ignore'
    )
    print(arima_model.summary())
    arima_fc, arima_ci = arima_model.predict(n_periods=H, return_conf_int=True)
    arima_pred  = pd.Series(arima_fc, index=future_idx, name="ARIMA_Forecast")
    arima_lower = pd.Series(arima_ci[:,0], index=future_idx, name="ARIMA_Lower")
    arima_upper = pd.Series(arima_ci[:,1], index=future_idx, name="ARIMA_Upper")
else:
    # Simple ARIMA fallback (1,1,1) –  grid search if you like
    model = ARIMA(ts, order=(1,1,1))
    fit = model.fit()
    print(fit.summary())
    fc_res = fit.get_forecast(steps=H)
    arima_pred  = pd.Series(fc_res.predicted_mean, index=future_idx, name="ARIMA_Forecast")
    arima_ci_df = fc_res.conf_int(alpha=0.05)
    arima_lower = pd.Series(arima_ci_df.iloc[:,0].values, index=future_idx, name="ARIMA_Lower")
    arima_upper = pd.Series(arima_ci_df.iloc[:,1].values, index=future_idx, name="ARIMA_Upper")

# Plot last year of history + forecast
hist_tail = ts.last('365D')
plt.figure(figsize=(12,5))
plt.plot(hist_tail, label="History (last 12m)")
plt.plot(arima_pred, label="ARIMA forecast")
plt.fill_between(arima_pred.index, arima_lower, arima_upper, alpha=0.15, label="95% CI")
plt.title("TSLA – ARIMA Future Forecast")
plt.legend(); plt.show()

# Save CSV
arima_out = pd.concat([arima_pred, arima_lower, arima_upper], axis=1)
arima_out.to_csv(f"{DATA_DIR}/tsla_forecast_arima.csv")
print("Saved:", f"{DATA_DIR}/tsla_forecast_arima.csv")


LSTM Forecast (recursive, with simple uncertainty band)

In [None]:
# ---- LSTM imports
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler

# Window length (same as Task 2)
WIN = 60

#  Load scaler & model 
scaler_path = f"{MODELS_DIR}/tsla_scaler.pkl"
model_path  = f"{MODELS_DIR}/tsla_lstm.h5"

scaler = MinMaxScaler(feature_range=(0,1))
X, y = [], []

# Fit scaler on full series
scaled = scaler.fit_transform(ts.values.reshape(-1,1))

# Build supervised sequences on full data
for i in range(WIN, len(scaled)):
    X.append(scaled[i-WIN:i, 0])
    y.append(scaled[i, 0])
X = np.array(X).reshape(-1, WIN, 1)
y = np.array(y).reshape(-1, 1)

# Try load; else train a small model
if os.path.exists(model_path) and os.path.exists(scaler_path):
    with open(scaler_path, "rb") as f:
        scaler = pickle.load(f)
    lstm_model = load_model(model_path)
    print("Loaded saved LSTM and scaler.")
else:
    print("No saved model found — training a compact LSTM on full history...")
    lstm_model = Sequential([
        LSTM(64, return_sequences=True, input_shape=(WIN,1)),
        Dropout(0.2),
        LSTM(64),
        Dense(1)
    ])
    lstm_model.compile(optimizer="adam", loss="mse")
    lstm_model.fit(X, y, epochs=30, batch_size=32, verbose=1, validation_split=0.1)
    # Save for reuse
    lstm_model.save(model_path)
    with open(scaler_path, "wb") as f:
        pickle.dump(scaler, f)
    print("Saved model & scaler to /models")

# ===== Recursive multi-step forecast =====
# Start from the last WIN scaled points
last_window = scaled[-WIN:].flatten().tolist()
pred_scaled = []

for _ in range(H):
    x_in = np.array(last_window[-WIN:]).reshape(1, WIN, 1)
    next_scaled = lstm_model.predict(x_in, verbose=0)[0,0]
    pred_scaled.append(next_scaled)
    last_window.append(next_scaled)

lstm_pred = scaler.inverse_transform(np.array(pred_scaled).reshape(-1,1)).flatten()
lstm_pred = pd.Series(lstm_pred, index=future_idx, name="LSTM_Forecast")

# ===== Simple uncertainty band from historical one-step errors (Task 2 style) =====
# Compute residual std from last available in-sample sequences (rough proxy)
y_pred_in = lstm_model.predict(X, verbose=0).reshape(-1)
resid_std = np.std((y.reshape(-1) - y_pred_in))  # in scaled space
# Convert std to price-space by multiplying by last scale span
# Better: estimate in price space. Simpler: map +/- 1.96*std around forecast via inverse transform:

# Build CI by perturbing scaled preds by +/- z*std then invert
z = 1.96
upper_scaled = np.array(pred_scaled) + z*resid_std
lower_scaled = np.array(pred_scaled) - z*resid_std
lstm_upper = scaler.inverse_transform(upper_scaled.reshape(-1,1)).flatten()
lstm_lower = scaler.inverse_transform(lower_scaled.reshape(-1,1)).flatten()

lstm_upper = pd.Series(lstm_upper, index=future_idx, name="LSTM_Upper")
lstm_lower = pd.Series(lstm_lower, index=future_idx, name="LSTM_Lower")

# Plot last 12m history + forecast
hist_tail = ts.last('365D')
plt.figure(figsize=(12,5))
plt.plot(hist_tail, label="History (last 12m)")
plt.plot(lstm_pred, label="LSTM forecast")
plt.fill_between(lstm_pred.index, lstm_lower, lstm_upper, alpha=0.15, label="Uncertainty band (approx.)")
plt.title("TSLA – LSTM Future Forecast (recursive)")
plt.legend(); plt.show()

# Save CSV
lstm_out = pd.concat([lstm_pred, lstm_lower, lstm_upper], axis=1)
lstm_out.to_csv(f"{DATA_DIR}/tsla_forecast_lstm.csv")
print("Saved:", f"{DATA_DIR}/tsla_forecast_lstm.csv")


Compare the two future forecasts

In [None]:
both = pd.DataFrame({
    "ARIMA": arima_pred,
    "LSTM":  lstm_pred
})
display(both.head())

plt.figure(figsize=(12,5))
plt.plot(ts.last('365D'), label="History (last 12m)")
plt.plot(arima_pred, label="ARIMA forecast")
plt.plot(lstm_pred, label="LSTM forecast")
plt.title("TSLA – ARIMA vs LSTM (future 12m)")
plt.legend(); plt.show()
