# Time Series Modeling and Forecasting

This notebook implements various forecasting models for financial time series.

In [None]:
import pandas as pd
import numpy as np
import sys
import os

# Add src directory to path to import custom modules
sys.path.append(os.path.join('..', 'src'))

from data_loading import load_financial_dataset
from data_cleaning import clean_financial_data
from feature_engineering import engineer_features
from modeling import (
    random_forest_forecast, linear_regression_forecast, arima_forecast, 
    prophet_forecast, lstm_forecast, evaluate_model_performance
)
from visualization import plot_forecast_results

# Load and prepare data
df = load_financial_dataset()
df_clean = clean_financial_data(df)
df_featured = engineer_features(df_clean)

# Focus on a specific stock for modeling (e.g., S&P 500)
stock_to_model = 'S&P 500'  # You can change this to any stock in your dataset
df_stock = df_featured[df_featured['Stock Index'] == stock_to_model].copy()

print(f'Modeling {stock_to_model} data with shape: {df_stock.shape}')
print(f'Date range: {df_stock["Date"].min()} to {df_stock["Date"].max()}')

# Define target variable
target_col = 'Close Price'
print(f'\nTarget variable: {target_col}')

In [None]:
# Train Random Forest model
print('\nTraining Random Forest model...')
rf_results = random_forest_forecast(df_stock, target_col=target_col)

print(f'Random Forest Results:')
print(f'  - MSE: {rf_results["mse"]:.4f}')
print(f'  - MAE: {rf_results["mae"]:.4f}')
print(f'  - R2: {rf_results["r2"]:.4f}')

# Evaluate performance
rf_performance = evaluate_model_performance(rf_results['actual'], rf_results['predictions'])
print(f'  - Detailed Performance: {rf_performance}')

In [None]:
# Train Linear Regression model
print('\nTraining Linear Regression model...')
lr_results = linear_regression_forecast(df_stock, target_col=target_col)

print(f'Linear Regression Results:')
print(f'  - MSE: {lr_results["mse"]:.4f}')
print(f'  - MAE: {lr_results["mae"]:.4f}')
print(f'  - R2: {lr_results["r2"]:.4f}')

# Evaluate performance
lr_performance = evaluate_model_performance(lr_results['actual'], lr_results['predictions'])
print(f'  - Detailed Performance: {lr_performance}')

In [None]:
# Time series models (ARIMA, Prophet, LSTM)
target_series = df_stock[target_col].dropna()

# Apply ARIMA model
print('\nTraining ARIMA model...')
try:
    arima_forecast_vals, arima_conf_int, arima_model = arima_forecast(target_series, order=(1,1,1), forecast_steps=30)
    print(f'ARIMA model trained successfully')
    print(f'First 5 forecasted values: {arima_forecast_vals[:5]}')
except Exception as e:
    print(f'ARIMA model failed: {e}')
    arima_forecast_vals = None

# Apply Prophet model
print('\nTraining Prophet model...')
try:
    prophet_forecast_df, prophet_model = prophet_forecast(df_stock[['Date', target_col]], target_col=target_col, forecast_periods=30)
    print(f'Prophet model trained successfully')
    print(f'Last 5 actual values: {target_series.tail(5).values}')
    print(f'First 5 forecasted values: {prophet_forecast_df["yhat"].tail(5).values}')
except Exception as e:
    print(f'Prophet model failed: {e}')
    prophet_forecast_df = None

In [None]:
# Visualize forecasting results
print('\nVisualizing forecasting results...')

# Plot Random Forest results
plot_forecast_results(
    actual=rf_results['actual'],
    predicted=rf_results['predictions'],
    title=f'{stock_to_model} - Random Forest: Actual vs Predicted'
)

# Plot Linear Regression results
plot_forecast_results(
    actual=lr_results['actual'],
    predicted=lr_results['predictions'],
    title=f'{stock_to_model} - Linear Regression: Actual vs Predicted'
)

# If we have ARIMA results, visualize them
if arima_forecast_vals is not None:
    # Create a series with actual values and forecast values
    # For visualization, we'll just plot the last actual values with forecast
    actual_for_arima = target_series.tail(50)  # Last 50 actual values
    plot_forecast_results(
        actual=actual_for_arima,
        predicted=actual_for_arima,  # Using actual as placeholder for predicted
        forecast=arima_forecast_vals[:10],  # First 10 forecasted values
        title=f'{stock_to_model} - ARIMA Forecast'
    )

In [None]:
# Model comparison
print('\nModel Comparison Summary:')
print('='*60)
print(f'Random Forest    - MSE: {rf_results["mse"]:.4f}, MAE: {rf_results["mae"]:.4f}, R2: {rf_results["r2"]:.4f}')
print(f'Linear Regression - MSE: {lr_results["mse"]:.4f}, MAE: {lr_results["mae"]:.4f}, R2: {lr_results["r2"]:.4f}')
print('='*60)

# Determine best model based on R2 score
if rf_results['r2'] > lr_results['r2']:
    best_model = 'Random Forest'
    best_r2 = rf_results['r2']
else:
    best_model = 'Linear Regression'
    best_r2 = lr_results['r2']

print(f'\nBest performing model: {best_model} (R2: {best_r2:.4f})')

print('\nModeling completed!')