# Task 3: Forecast Future Market Trends — TSLA
Use models from Task 2 (ARIMA and LSTM) to forecast future TSLA prices for 6–12 months, visualize with confidence intervals, and analyze trends and risks.

In [None]:
# Setup: ensure project root is on sys.path
import os, sys
from pathlib import Path
ROOT = Path.cwd().resolve().parents[0] if (Path.cwd().name == 'notebooks') else Path.cwd().resolve()
sys.path.append(str(ROOT))
ROOT


In [None]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import timedelta

from src.constants.config import (
    TICKERS, START_DATE, END_DATE, INTERVAL, AUTO_ADJUST, TARGET_TICKER
)
from src.utils.data_loader import fetch_yfinance_data, merge_adjusted_close
from src.utils.preprocessing import fill_missing_dates, handle_missing, extract_series
from src.models.arima import fit_auto_arima, forecast_arima_with_ci
from src.models.lstm import train_lstm, forecast_lstm, make_sequences
from src.utils.evaluation import normal_prediction_interval


## 1) Build prices (reusing Task 2 logic with fallback)

In [None]:
def build_prices():
    raw = fetch_yfinance_data(TICKERS, START_DATE, END_DATE, interval=INTERVAL, auto_adjust=AUTO_ADJUST)
    prices = merge_adjusted_close(raw, column='Adj Close')
    if prices is None or prices.empty:
        raw2 = fetch_yfinance_data(TICKERS, START_DATE, END_DATE, interval=INTERVAL, auto_adjust=False)
        prices = merge_adjusted_close(raw2, column='Adj Close')
    if prices is None or prices.empty:
        raise RuntimeError('No price data merged. Check network or date range.')
    prices = fill_missing_dates(prices)
    prices = handle_missing(prices, method='interpolate')
    prices = prices.dropna(axis=1, how='all')
    return prices

prices = build_prices()
tsla = extract_series(prices, TARGET_TICKER)
tsla.tail()


## 2) Define forecast horizon and future index

In [None]:
# Choose horizon in months (6 or 12)
HORIZON_MONTHS = 6
# Approximate business days per month ~21
STEPS = int(21 * HORIZON_MONTHS)
last_date = tsla.index.max()
future_index = pd.bdate_range(start=last_date + pd.offsets.BDay(1), periods=STEPS)
last_date, STEPS, future_index[0], future_index[-1]


## 3) ARIMA: fit on full series and forecast with confidence intervals

In [None]:
arima_model = fit_auto_arima(tsla, seasonal=False, m=1, stepwise=True, trace=False)
arima_mean, arima_lower, arima_upper = forecast_arima_with_ci(arima_model, steps=STEPS, index=future_index)
arima_mean.head()


## 4) LSTM: train on full series and forecast; build approximate CIs

In [None]:
LOOKBACK, EPOCHS, BATCH, UNITS, DROPOUT = 60, 20, 32, 64, 0.0
try:
    lstm_model, scaler, last_window = train_lstm(tsla, lookback=LOOKBACK, epochs=EPOCHS, batch_size=BATCH, units=UNITS, dropout=DROPOUT)
    lstm_forecast = forecast_lstm(lstm_model, scaler, last_window, steps=STEPS, index=future_index)
    # Build approximate CI: use in-sample residual std
    values = pd.to_numeric(tsla, errors='coerce').dropna().astype(float).values.reshape(-1, 1)
    scaled = scaler.transform(values)
    X_all, y_all = make_sequences(scaled.squeeze(), LOOKBACK)
    yhat_scaled = lstm_model.predict(X_all, verbose=0).squeeze()
    # inverse transform predictions and true
    yhat = scaler.inverse_transform(yhat_scaled.reshape(-1,1)).squeeze()
    y_true = values[LOOKBACK:].squeeze()
    # Create aligned Series for residual std
    idx_align = tsla.index[LOOKBACK:]
    y_true_s = pd.Series(y_true, index=idx_align)
    y_pred_s = pd.Series(yhat, index=idx_align)
    lstm_lower_off, lstm_upper_off = normal_prediction_interval(y_true_s, y_pred_s, steps=STEPS, z=1.96, index=future_index)
    lstm_lower = lstm_forecast + lstm_lower_off
    lstm_upper = lstm_forecast + lstm_upper_off
except Exception as e:
    print('LSTM unavailable or failed:', e)
    lstm_forecast = pd.Series(dtype=float)
    lstm_lower = pd.Series(dtype=float)
    lstm_upper = pd.Series(dtype=float)

lstm_forecast.head() if not lstm_forecast.empty else 'LSTM skipped'


## 5) Visualization: historical + forecasts with confidence intervals

In [None]:
hist_window = 252 * 2  # last ~2 years
hist = tsla.iloc[-hist_window:] if len(tsla) > hist_window else tsla
fig, ax = plt.subplots(figsize=(12,6))
hist.plot(ax=ax, label='Historical')
# ARIMA
arima_mean.plot(ax=ax, label='ARIMA forecast', color='C1')
ax.fill_between(arima_mean.index, arima_lower.values, arima_upper.values, color='C1', alpha=0.2, label='ARIMA CI')
# LSTM (if available)
if not lstm_forecast.empty:
    lstm_forecast.plot(ax=ax, label='LSTM forecast', color='C2')
    ax.fill_between(lstm_forecast.index, lstm_lower.values, lstm_upper.values, color='C2', alpha=0.2, label='LSTM approx CI')
ax.set_title(f'TSLA Forecast — {int(HORIZON_MONTHS)} months ahead')
ax.set_xlabel('Date'); ax.set_ylabel('Price')
ax.legend()
plt.tight_layout()
fig_path = ROOT / 'reports' / 'figures' / f'tsla_task3_forecast_{HORIZON_MONTHS}m.png'
(ROOT / 'reports' / 'figures').mkdir(parents=True, exist_ok=True)
plt.savefig(fig_path, dpi=150)
plt.show()
fig_path


## 6) Save outputs

In [None]:
out = pd.DataFrame({'ARIMA_mean': arima_mean, 'ARIMA_lower': arima_lower, 'ARIMA_upper': arima_upper})
if not lstm_forecast.empty:
    out['LSTM_mean'] = lstm_forecast
    out['LSTM_lower'] = lstm_lower
    out['LSTM_upper'] = lstm_upper
(ROOT / 'data' / 'processed').mkdir(parents=True, exist_ok=True)
csv_path = ROOT / 'data' / 'processed' / f'tsla_task3_forecasts_{HORIZON_MONTHS}m.csv'
out.to_csv(csv_path)
csv_path


## 7) Interpretation Guide
- __Trend__: Is the forecast line generally rising, falling, or flat over the horizon?
- __Confidence intervals__: Do bands widen over time? Wider bands imply greater uncertainty in long-term forecasts.
- __Volatility and risk__: Large or rapidly widening bands indicate higher volatility and lower certainty; plan risk management accordingly.
- __Opportunities__: If trend is upward and CIs aren't excessively wide, consider bullish strategies.
- __Risks__: Downward trend or very wide CIs point to caution; stress-test scenarios.
