In [31]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import pickle
import time
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.insert(0, str(Path.cwd().parent))
from src.ar_models import (
    fit_ar_frequentist, fit_ar_bayesian, fit_ar_hierarchical,
    forecast_ar_multihorizon, get_ar_prior
)
from src.results_manager import ResultsManager

In [33]:
DATA_DIR = Path('../data/processed')
FIG_DIR = Path('../figures')
FIG_DIR.mkdir(exist_ok=True)

TICKERS = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA']  # 7 stocks
PRIOR_SETS = ['weak', 'medium', 'informative']
HORIZONS = [1, 5, 22]
N_SAMPLES = 5000
N_BURNIN = 1000

# Plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)

# Load data
with open(DATA_DIR / 'train_test_split.pkl', 'rb') as f:
    data = pickle.load(f)

results_mgr = ResultsManager()

print(f"Stocks: {TICKERS}")
print(f"Prior sets: {PRIOR_SETS}")
print(f"Horizons: {HORIZONS}")

Stocks: ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA']
Prior sets: ['weak', 'medium', 'informative']
Horizons: [1, 5, 22]


In [35]:
print("PART 1: AR(1) FREQUENTIST (OLS)")

freq_results = {}
for ticker in TICKERS:
    train = data['train'][ticker].values
    test = data['test'][ticker].values
    
    print(f"\n{ticker}...", end=" ")
    
    # Fit
    result = fit_ar_frequentist(train)
    freq_results[ticker] = result
    
    # Forecast multi-horizon
    forecasts = forecast_ar_multihorizon(train[-1], test, result['phi'], HORIZONS)
    freq_results[ticker]['forecasts'] = forecasts
    
    # Save
    results_mgr.save_ar(ticker, 'freq', result)

PART 1: AR(1) FREQUENTIST (OLS)

AAPL... Saved AR: AAPL_ar_freq.pkl

MSFT... Saved AR: MSFT_ar_freq.pkl

GOOGL... Saved AR: GOOGL_ar_freq.pkl

AMZN... Saved AR: AMZN_ar_freq.pkl

TSLA... Saved AR: TSLA_ar_freq.pkl

META... Saved AR: META_ar_freq.pkl

NVDA... Saved AR: NVDA_ar_freq.pkl


In [37]:
print("PART 2: AR(1) BAYESIAN (Gibbs)")

bayes_results = {}
for ticker in TICKERS:
    bayes_results[ticker] = {}
    train = data['train'][ticker].values
    test = data['test'][ticker].values
    
    for prior_set in PRIOR_SETS:
        print(f"\n{ticker} + {prior_set}...", end=" ")
        
        # Fit
        result = fit_ar_bayesian(train, prior_set=prior_set, 
                                n_samples=N_SAMPLES, n_burnin=N_BURNIN)
        bayes_results[ticker][prior_set] = result
        
        # Forecast multi-horizon
        forecasts = forecast_ar_multihorizon(train[-1], test, 
                                            result['phi_mean'], HORIZONS)
        bayes_results[ticker][prior_set]['forecasts'] = forecasts
        
        # Save
        results_mgr.save_ar(ticker, 'bayes', result, prior_set=prior_set)

PART 2: AR(1) BAYESIAN (Gibbs)

AAPL + weak... Saved AR: AAPL_ar_bayes_weak.pkl

AAPL + medium... Saved AR: AAPL_ar_bayes_medium.pkl

AAPL + informative... Saved AR: AAPL_ar_bayes_informative.pkl

MSFT + weak... Saved AR: MSFT_ar_bayes_weak.pkl

MSFT + medium... Saved AR: MSFT_ar_bayes_medium.pkl

MSFT + informative... Saved AR: MSFT_ar_bayes_informative.pkl

GOOGL + weak... Saved AR: GOOGL_ar_bayes_weak.pkl

GOOGL + medium... Saved AR: GOOGL_ar_bayes_medium.pkl

GOOGL + informative... Saved AR: GOOGL_ar_bayes_informative.pkl

AMZN + weak... Saved AR: AMZN_ar_bayes_weak.pkl

AMZN + medium... Saved AR: AMZN_ar_bayes_medium.pkl

AMZN + informative... Saved AR: AMZN_ar_bayes_informative.pkl

TSLA + weak... Saved AR: TSLA_ar_bayes_weak.pkl

TSLA + medium... Saved AR: TSLA_ar_bayes_medium.pkl

TSLA + informative... Saved AR: TSLA_ar_bayes_informative.pkl

META + weak... Saved AR: META_ar_bayes_weak.pkl

META + medium... Saved AR: META_ar_bayes_medium.pkl

META + informative... Saved AR: MET

In [38]:
print("PART 3: AR(1) HIERARCHICAL (All 7 stocks together)")

# Prepare data dict
data_dict = {ticker: data['train'][ticker].values for ticker in TICKERS}

print("Fitting hierarchical model...", end=" ")
hier_result = fit_ar_hierarchical(data_dict, n_samples=N_SAMPLES, n_burnin=N_BURNIN)
print(f"Done! Time={hier_result['meta']['runtime']:.1f}s")

# Forecast each ticker
hier_forecasts = {}
for ticker in TICKERS:
    test = data['test'][ticker].values
    train = data['train'][ticker].values
    phi_hier = hier_result[ticker]['phi_mean']
    
    forecasts = forecast_ar_multihorizon(train[-1], test, phi_hier, HORIZONS)
    hier_forecasts[ticker] = forecasts

results_mgr.save_hierarchical(model_type='ar', results=hier_result)

PART 3: AR(1) HIERARCHICAL (All 7 stocks together)
Fitting hierarchical model... Done! Time=3.7s
Saved Hierarchical AR: hierarchical_ar.pkl


WindowsPath('../results/hierarchical/hierarchical_ar.pkl')

In [39]:
# Create comparison table
comparison_data = []

for ticker in TICKERS:
    # Frequentist
    comparison_data.append({
        'Stock': ticker,
        'Model': 'AR-Freq',
        'Prior': 'N/A',
        'Phi': freq_results[ticker]['phi'],
        'h=1_MSE': freq_results[ticker]['forecasts']['h_1']['mse'],
        'h=5_MSE': freq_results[ticker]['forecasts']['h_5']['mse'],
        'h=22_MSE': freq_results[ticker]['forecasts']['h_22']['mse'],
    })
    
    # Bayesian (all priors)
    for prior_set in PRIOR_SETS:
        comparison_data.append({
            'Stock': ticker,
            'Model': 'AR-Bayes',
            'Prior': prior_set,
            'Phi': bayes_results[ticker][prior_set]['phi_mean'],
            'h=1_MSE': bayes_results[ticker][prior_set]['forecasts']['h_1']['mse'],
            'h=5_MSE': bayes_results[ticker][prior_set]['forecasts']['h_5']['mse'],
            'h=22_MSE': bayes_results[ticker][prior_set]['forecasts']['h_22']['mse'],
        })
    
    # Hierarchical
    comparison_data.append({
        'Stock': ticker,
        'Model': 'AR-Hier',
        'Prior': 'Hier',
        'Phi': hier_result[ticker]['phi_mean'],
        'h=1_MSE': hier_forecasts[ticker]['h_1']['mse'],
        'h=5_MSE': hier_forecasts[ticker]['h_5']['mse'],
        'h=22_MSE': hier_forecasts[ticker]['h_22']['mse'],
    })

comp_df = pd.DataFrame(comparison_data)

print("\n" + comp_df.to_string(index=False))

# Save
comp_df.to_csv(FIG_DIR / 'ar_comparison.csv', index=False)


Stock    Model       Prior       Phi  h=1_MSE  h=5_MSE  h=22_MSE
 AAPL  AR-Freq         N/A  0.001724 0.000057 0.000231  0.000323
 AAPL AR-Bayes        weak  0.017681 0.000068 0.000233  0.000323
 AAPL AR-Bayes      medium  0.302663 0.000400 0.000256  0.000328
 AAPL AR-Bayes informative  0.347634 0.000477 0.000259  0.000329
 AAPL  AR-Hier        Hier  0.039128 0.000083 0.000235  0.000324
 MSFT  AR-Freq         N/A -0.017063 0.000107 0.000357  0.000360
 MSFT AR-Bayes        weak -0.001886 0.000109 0.000357  0.000360
 MSFT AR-Bayes      medium  0.297835 0.000135 0.000364  0.000361
 MSFT AR-Bayes informative  0.351830 0.000141 0.000366  0.000362
 MSFT  AR-Hier        Hier  0.031353 0.000111 0.000358  0.000360
GOOGL  AR-Freq         N/A  0.002749 0.000254 0.000099  0.000538
GOOGL AR-Bayes        weak -0.013904 0.000247 0.000097  0.000537
GOOGL AR-Bayes      medium  0.300146 0.000399 0.000134  0.000545
GOOGL AR-Bayes informative  0.347803 0.000425 0.000141  0.000547
GOOGL  AR-Hier        Hi

In [40]:
print("SHRINKAGE ANALYSIS: Individual vs Hierarchical")

shrinkage_data = []
for ticker in TICKERS:
    ind_phi = bayes_results[ticker]['medium']['phi_mean']
    hier_phi = hier_result[ticker]['phi_mean']
    group_mean = hier_result['group']['mu_phi_mean']
    
    shrinkage_amt = abs(hier_phi - ind_phi)
    
    shrinkage_data.append({
        'Stock': ticker,
        'Individual φ': ind_phi,
        'Hierarchical φ': hier_phi,
        'Group Mean': group_mean,
        'Shrinkage Amt': shrinkage_amt,
    })

shrink_df = pd.DataFrame(shrinkage_data)
print("\n" + shrink_df.to_string(index=False))

# Save
shrink_df.to_csv(FIG_DIR / 'ar_shrinkage.csv', index=False)

SHRINKAGE ANALYSIS: Individual vs Hierarchical

Stock  Individual φ  Hierarchical φ  Group Mean  Shrinkage Amt
 AAPL      0.302663        0.039128    0.050011       0.263535
 MSFT      0.297835        0.031353    0.050011       0.266482
GOOGL      0.300146        0.049308    0.050011       0.250838
 AMZN      0.299381        0.053435    0.050011       0.245947
 TSLA      0.303172        0.052350    0.050011       0.250822
 META      0.300554        0.062444    0.050011       0.238109
 NVDA      0.298662        0.060807    0.050011       0.237855


In [41]:
print("PLOTTING")

# PLOT 1: MSE by Horizon (across all models)
fig, axes = plt.subplots(1, 3, figsize=(16, 5))

for idx, h in enumerate(HORIZONS):
    col = f'h={h}_MSE'
    ax = axes[idx]
    
    # Group by Stock and Model
    pivot_data = comp_df.pivot_table(values=col, index='Stock', 
                                      columns='Model', aggfunc='min')
    
    pivot_data.plot(kind='bar', ax=ax, width=0.8)
    ax.set_title(f'MSE by Model (h={h} steps)', fontsize=12, fontweight='bold')
    ax.set_xlabel('Stock')
    ax.set_ylabel('MSE')
    ax.legend(title='Model', fontsize=9)
    ax.grid(axis='y', alpha=0.3)
    ax.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig(FIG_DIR / 'ar_mse_by_horizon.png', dpi=300, bbox_inches='tight')
print("Saved: ar_mse_by_horizon.png")
plt.close()

# Phi Estimates (Shrinkage)
fig, ax = plt.subplots(figsize=(12, 6))

x = np.arange(len(TICKERS))
width = 0.25

ind_phis = [bayes_results[t]['medium']['phi_mean'] for t in TICKERS]
hier_phis = [hier_result[t]['phi_mean'] for t in TICKERS]
group_mean = hier_result['group']['mu_phi_mean']

ax.bar(x - width, ind_phis, width, label='Individual (Bayes-Medium)', alpha=0.8)
ax.bar(x, hier_phis, width, label='Hierarchical', alpha=0.8)
ax.axhline(y=group_mean, color='red', linestyle='--', linewidth=2, label='Group Mean')

ax.set_xlabel('Stock')
ax.set_ylabel('φ estimate')
ax.set_title('AR(1) Coefficient: Individual vs Hierarchical Shrinkage', 
             fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(TICKERS, rotation=45)
ax.legend(fontsize=10)
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig(FIG_DIR / 'ar_phi_shrinkage.png', dpi=300, bbox_inches='tight')
print("Saved: ar_phi_shrinkage.png")
plt.close()

# Winner Count
fig, ax = plt.subplots(figsize=(10, 6))

# Count wins for each model across all stocks & horizons
winner_counts = {'AR-Freq': 0, 'AR-Bayes': 0, 'AR-Hier': 0}

for stock in TICKERS:
    stock_df = comp_df[comp_df['Stock'] == stock]
    
    # For each horizon, find best model
    for h in HORIZONS:
        col = f'h={h}_MSE'
        best_row = stock_df.loc[stock_df[col].idxmin()]
        model = best_row['Model']
        winner_counts[model] += 1

colors = ['#FF6B6B', '#4ECDC4', '#45B7D1']
ax.bar(winner_counts.keys(), winner_counts.values(), color=colors, alpha=0.8, edgecolor='black', linewidth=2)
ax.set_ylabel('Number of Wins')
ax.set_title('Model Wins Across All Stocks & Horizons (h=1,5,22)', 
             fontsize=14, fontweight='bold')
ax.grid(axis='y', alpha=0.3)

# Add value labels on bars
for i, (model, count) in enumerate(winner_counts.items()):
    ax.text(i, count + 0.1, str(count), ha='center', fontweight='bold', fontsize=12)

plt.tight_layout()
plt.savefig(FIG_DIR / 'ar_winner_count.png', dpi=300, bbox_inches='tight')
print("Saved: ar_winner_count.png")
plt.close()

PLOTTING
Saved: ar_mse_by_horizon.png
Saved: ar_phi_shrinkage.png
Saved: ar_winner_count.png
