# Task 2: TSLA Forecasting — ARIMA vs LSTM
Clean, modular notebook using utilities in `src/`.

Steps:
1. Load configuration and utilities
2. Fetch and prepare prices (with fallback)
3. Extract TSLA and split chronologically at `SPLIT_DATE`
4. Train Auto-ARIMA and LSTM
5. Forecast test period and evaluate (MAE, RMSE, MAPE)
6. Plot and compare


In [1]:
# Setup: ensure project root is on sys.path
import os, sys
from pathlib import Path
ROOT = Path.cwd().resolve().parents[0] if (Path.cwd().name == 'notebooks') else Path.cwd().resolve()
sys.path.append(str(ROOT))
ROOT


WindowsPath('C:/Users/senta/OneDrive/Documents/Proj/10 Ac/Time-Series-Forecasting-for-Portfolio-Management-Optimization')

In [2]:
# Imports
import pandas as pd
import matplotlib.pyplot as plt

from src.constants.config import (
    TICKERS, START_DATE, END_DATE, INTERVAL, AUTO_ADJUST, TARGET_TICKER, SPLIT_DATE
)
from src.utils.data_loader import fetch_yfinance_data, merge_adjusted_close
from src.utils.preprocessing import fill_missing_dates, handle_missing, extract_series, time_split_by_date
from src.models.arima import fit_auto_arima, forecast_arima
from src.models.lstm import train_lstm, forecast_lstm
from src.utils.evaluation import evaluate_all


ModuleNotFoundError: No module named 'tensorflow'

## 1) Fetch and build prices (with fallback)

In [None]:
def build_prices():
    raw = fetch_yfinance_data(TICKERS, START_DATE, END_DATE, interval=INTERVAL, auto_adjust=AUTO_ADJUST)
    prices = merge_adjusted_close(raw, column='Adj Close')
    if prices is None or prices.empty:
        # Retry with auto_adjust=False to guarantee 'Adj Close' exists
        raw2 = fetch_yfinance_data(TICKERS, START_DATE, END_DATE, interval=INTERVAL, auto_adjust=False)
        prices = merge_adjusted_close(raw2, column='Adj Close')
    if prices is None or prices.empty:
        raise RuntimeError('No price data merged. Check network or date range.')
    prices = fill_missing_dates(prices)
    prices = handle_missing(prices, method='interpolate')
    prices = prices.dropna(axis=1, how='all')
    return prices

prices = build_prices()
prices.info()
prices.tail()


## 2) Extract TSLA and chronological split

In [None]:
tsla = extract_series(prices, TARGET_TICKER)
train, test = time_split_by_date(tsla, SPLIT_DATE)
len(train), len(test), train.index.min(), train.index.max(), test.index.min(), test.index.max()


## 3) Auto-ARIMA fit and forecast

In [None]:
arima_model = fit_auto_arima(train, seasonal=False, m=1, stepwise=True, trace=False)
pred_arima = forecast_arima(arima_model, steps=len(test), index=test.index)
metrics_arima = evaluate_all(test, pred_arima)
metrics_arima


## 4) LSTM fit and forecast

In [None]:
LOOKBACK, EPOCHS, BATCH, UNITS, DROPOUT = 60, 20, 32, 64, 0.0
lstm_model, scaler, last_window = train_lstm(
    train, lookback=LOOKBACK, epochs=EPOCHS, batch_size=BATCH, units=UNITS, dropout=DROPOUT
)
pred_lstm = forecast_lstm(lstm_model, scaler, last_window, steps=len(test), index=test.index)
metrics_lstm = evaluate_all(test, pred_lstm)
metrics_lstm


## 5) Compare and plot

In [None]:
print('ARIMA:', metrics_arima)
print('LSTM :', metrics_lstm)

compare = pd.DataFrame({'Actual': test, 'ARIMA': pred_arima, 'LSTM': pred_lstm})
ax = compare.plot(figsize=(12, 6))
ax.set_title('TSLA — Actual vs Forecasts (Test)')
ax.set_xlabel('Date'); ax.set_ylabel('Price')
plt.tight_layout()
plt.show()
compare.tail()
