In [1]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import pickle
import time
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.insert(0, str(Path.cwd().parent))
from src.ar_models import (
    fit_ar_frequentist, fit_ar_bayesian, fit_ar_hierarchical,
    forecast_ar_posterior_predictive, get_ar_prior
)
from src.results_manager import ResultsManager

In [2]:
DATA_DIR = Path('../data/processed')
FIG_DIR = Path('../figures')
FIG_DIR.mkdir(exist_ok=True)

TICKERS = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA']  # 7 stocks
PRIOR_SETS = ['weak', 'medium', 'informative']
HORIZONS = [1, 5, 22]
N_SAMPLES = 5000
N_BURNIN = 1000

# Plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)

# Load data
with open(DATA_DIR / 'train_test_split.pkl', 'rb') as f:
    data = pickle.load(f)

results_mgr = ResultsManager()

print(f"Stocks: {TICKERS}")
print(f"Prior sets: {PRIOR_SETS}")
print(f"Horizons: {HORIZONS}")
print(get_ar_prior('weak'))
print(get_ar_prior('medium'))
print(get_ar_prior('informative'))

Stocks: ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA']
Prior sets: ['weak', 'medium', 'informative']
Horizons: [1, 5, 22]
{'phi_mean': 0.0, 'phi_std': 1.0, 'sigma2_alpha': 1.0, 'sigma2_beta': 1.0, 'description': 'Weak (diffuse)'}
{'phi_mean': 0.0, 'phi_std': 0.2, 'sigma2_alpha': 10.0, 'sigma2_beta': 10.0, 'description': 'Medium'}
{'phi_mean': 0.0, 'phi_std': 0.1, 'sigma2_alpha': 100.0, 'sigma2_beta': 100.0, 'description': 'Informative '}


In [3]:
from scipy import stats

def forecast_ar_frequentist_prediction_intervals(last_train, test_data, phi, sigma2, 
                                                  n_obs, horizons=[1, 5, 22]):
    """
    PROPER Frequentist Prediction Interval using Student-t distribution
    
    For AR(1): y_{t+h} = œÜ * y_{t+h-1} + Œµ_{t+h}
    
    Prediction interval accounts for:
    1. Uncertainty in œÜ and œÉ¬≤ estimates
    2. Future shocks
    3. Degrees of freedom (t-distribution, not z)
    
    Formula for h-step ahead:
    PI = y_hat ¬± t_{Œ±/2, n-2} * œÉ_h-step
    
    where œÉ_h-step = œÉ * sqrt(1 + œÜ¬≤ + œÜ‚Å¥ + ... + œÜ^(2(h-1)))
    """
    forecasts = {}
    
    # Degrees of freedom = n - 2 (for AR(1) with intercept)
    df = max(n_obs - 2, 1)
    t_critical = stats.t.ppf(0.975, df)  # 95% two-tailed
    
    for h in horizons:
        # Point forecast (deterministic)
        fc_point = np.zeros(h)
        phi_clip = np.clip(phi, -0.999, 0.999)
        
        if h == 1:
            fc_point[0] = phi_clip * last_train
        else:
            fc_point[0] = phi_clip * last_train
            for t in range(1, h):
                fc_point[t] = phi_clip * fc_point[t-1]
        
        # Variance grows with horizon
        sigma_h_step = np.zeros(h)
        sigma_h_step[0] = np.sqrt(sigma2)  # 1-step: just shock variance
        
        for t in range(1, h):
            # Sum of powers of œÜ¬≤
            phi_sq = phi_clip ** 2
            power_sum = sum([phi_sq ** k for k in range(t)])
            sigma_h_step[t] = np.sqrt(sigma2 * (1 + power_sum))
            
            # Cap at reasonable value (prevent blowup)
            sigma_h_step[t] = np.clip(sigma_h_step[t], 0, 100)
        
        # Prediction interval (t-critical, not z-critical!)
        max_test_len = min(h, len(test_data))
        actual = test_data[:max_test_len]
        
        fc_trunc = fc_point[:max_test_len]
        sigma_trunc = sigma_h_step[:max_test_len]
        
        ci_lower = fc_trunc - t_critical * sigma_trunc
        ci_upper = fc_trunc + t_critical * sigma_trunc
        
        # PIS
        pis_scores = []
        for t in range(len(actual)):
            lower, upper, y = ci_lower[t], ci_upper[t], actual[t]
            if y < lower:
                pis = (upper - lower) + (2/0.95) * (lower - y)
            elif y > upper:
                pis = (upper - lower) + (2/0.95) * (y - upper)
            else:
                pis = (upper - lower)
            pis_scores.append(pis)
        
        coverage = float(np.mean((actual >= ci_lower) & (actual <= ci_upper)))
        mse = float(np.mean((actual - fc_trunc) ** 2))
        
        forecasts[f'h_{h}'] = {
            'forecast_mean': fc_trunc.tolist(),
            'ci_lower': ci_lower.tolist(),
            'ci_upper': ci_upper.tolist(),
            'actual': actual.tolist(),
            'coverage': coverage,
            'mse': mse,
            'rmse': float(np.sqrt(mse)),
            'interval_width': float(np.mean(ci_upper - ci_lower)),
            'pis': float(np.mean(pis_scores)),
            'df': int(df),
            't_critical': float(t_critical)
        }
    
    return forecasts

print("PART 1: AR(1) FREQUENTIST - WITH PROPER PREDICTION INTERVALS (t-distribution)")

freq_results = {}
for ticker in TICKERS:
    train = data['train'][ticker].values
    test = data['test'][ticker].values
    
    print(f"{ticker}...", end=" ")
    
    # Fit
    result = fit_ar_frequentist(train)
    freq_results[ticker] = result
    
    # Use PROPER prediction intervals (not asymptotic)
    forecasts = forecast_ar_frequentist_prediction_intervals(
        last_train=train[-1],
        test_data=test,
        phi=result['phi'],
        sigma2=result['sigma2'],
        n_obs=len(train),
        horizons=HORIZONS
    )
    
    freq_results[ticker]['forecasts'] = forecasts
    
    # Save
    results_mgr.save_ar(ticker, 'freq', result)
    print(f"‚úì")


PART 1: AR(1) FREQUENTIST - WITH PROPER PREDICTION INTERVALS (t-distribution)
AAPL... Saved AR: AAPL_ar_freq.pkl
‚úì
MSFT... Saved AR: MSFT_ar_freq.pkl
‚úì
GOOGL... Saved AR: GOOGL_ar_freq.pkl
‚úì
AMZN... Saved AR: AMZN_ar_freq.pkl
‚úì
TSLA... Saved AR: TSLA_ar_freq.pkl
‚úì
META... Saved AR: META_ar_freq.pkl
‚úì
NVDA... Saved AR: NVDA_ar_freq.pkl
‚úì


In [4]:
print("PART 2: AR(1) BAYESIAN (Gibbs) - WITH PREDICTION INTERVALS")

bayes_results = {}
for ticker in TICKERS:
    bayes_results[ticker] = {}
    train = data['train'][ticker].values
    test = data['test'][ticker].values
    
    for prior_set in PRIOR_SETS:
        print(f"{ticker} + {prior_set}...", end=" ")
        
        # Fit
        result = fit_ar_bayesian(train, prior_set=prior_set, 
                                n_samples=N_SAMPLES, n_burnin=N_BURNIN)
        bayes_results[ticker][prior_set] = result
        
        # Forecast using POSTERIOR SAMPLES (generates CI)
        forecasts = forecast_ar_posterior_predictive(
            last_train=train[-1],
            test_data=test,
            phi_samples=result['phi_samples'],
            sigma2_samples=result['sigma2_samples'],
            horizons=HORIZONS
        )
        bayes_results[ticker][prior_set]['forecasts'] = forecasts
        
        # Save
        results_mgr.save_ar(ticker, 'bayes', result, prior_set=prior_set)


PART 2: AR(1) BAYESIAN (Gibbs) - WITH PREDICTION INTERVALS
AAPL + weak... Saved AR: AAPL_ar_bayes_weak.pkl
AAPL + medium... Saved AR: AAPL_ar_bayes_medium.pkl
AAPL + informative... Saved AR: AAPL_ar_bayes_informative.pkl
MSFT + weak... Saved AR: MSFT_ar_bayes_weak.pkl
MSFT + medium... Saved AR: MSFT_ar_bayes_medium.pkl
MSFT + informative... Saved AR: MSFT_ar_bayes_informative.pkl
GOOGL + weak... Saved AR: GOOGL_ar_bayes_weak.pkl
GOOGL + medium... Saved AR: GOOGL_ar_bayes_medium.pkl
GOOGL + informative... Saved AR: GOOGL_ar_bayes_informative.pkl
AMZN + weak... Saved AR: AMZN_ar_bayes_weak.pkl
AMZN + medium... Saved AR: AMZN_ar_bayes_medium.pkl
AMZN + informative... Saved AR: AMZN_ar_bayes_informative.pkl
TSLA + weak... Saved AR: TSLA_ar_bayes_weak.pkl
TSLA + medium... Saved AR: TSLA_ar_bayes_medium.pkl
TSLA + informative... Saved AR: TSLA_ar_bayes_informative.pkl
META + weak... Saved AR: META_ar_bayes_weak.pkl
META + medium... Saved AR: META_ar_bayes_medium.pkl
META + informative... Sav

In [5]:
print("PART 3: AR(1) HIERARCHICAL (All 7 stocks together)")

# Prepare data dict
data_dict = {ticker: data['train'][ticker].values for ticker in TICKERS}

print("Fitting hierarchical model...", end=" ")
hier_result = fit_ar_hierarchical(data_dict, n_samples=N_SAMPLES, n_burnin=N_BURNIN)
print(f"Done! Time={hier_result['meta']['runtime']:.1f}s")

# Forecast each ticker using POSTERIOR SAMPLES
hier_forecasts = {}
for ticker in TICKERS:
    test = data['test'][ticker].values
    train = data['train'][ticker].values
    
    # Use hierarchical posterior samples
    phi_samples = hier_result[ticker]['phi_samples']
    sigma2_samples = hier_result[ticker]['sigma2_samples']
    
    forecasts = forecast_ar_posterior_predictive(
        last_train=train[-1],
        test_data=test,
        phi_samples=phi_samples,
        sigma2_samples=sigma2_samples,
        horizons=HORIZONS
    )
    hier_forecasts[ticker] = forecasts
    print(f"{ticker}...", end=" ")


results_mgr.save_hierarchical(model_type='ar', results=hier_result)


PART 3: AR(1) HIERARCHICAL (All 7 stocks together)
Fitting hierarchical model... Informative Hierarchical AR(1) Priors:
  œÜ group mean Œº_œÜ: N(0, 0.01)
  œÜ group SD œÑ_œÜ¬≤: Gamma(100.0, 100.0)
  œÉ¬≤_k: Gamma(100.0, 100.0)
  Initial œÑ_œÜ: 0.05

Done! Time=3.9s
AAPL... MSFT... GOOGL... AMZN... TSLA... META... NVDA... Saved Hierarchical AR: hierarchical_ar.pkl


WindowsPath('../results/hierarchical/hierarchical_ar.pkl')

In [6]:
print("\n" + "="*80)
print("PART 4: EVALUATION TABLE - PIS + COVERAGE")
print("="*80)

evaluation_data = []

for ticker in TICKERS:
    # Frequentist
    for h in HORIZONS:
        fc = freq_results[ticker]['forecasts'][f'h_{h}']
        evaluation_data.append({
            'Stock': ticker,
            'Model': 'AR-Freq',
            'Prior': 'N/A',
            'Horizon': h,
            'Coverage': fc['coverage'],
            'MSE': fc['mse'],
            'RMSE': fc['rmse'],
            'Interval_Width': fc['interval_width'],
            'PIS': fc['pis'],
            'df': fc.get('df', np.nan),
            't_critical': fc.get('t_critical', np.nan)
        })
    
    # Bayesian
    for prior_set in PRIOR_SETS:
        for h in HORIZONS:
            fc = bayes_results[ticker][prior_set]['forecasts'][f'h_{h}']
            evaluation_data.append({
                'Stock': ticker,
                'Model': 'AR-Bayes',
                'Prior': prior_set,
                'Horizon': h,
                'Coverage': fc['coverage'],
                'MSE': fc['mse'],
                'RMSE': fc['rmse'],
                'Interval_Width': fc['interval_width'],
                'PIS': fc['pis'],
                'df': np.nan,
                't_critical': np.nan
            })
    
    # Hierarchical
    for h in HORIZONS:
        fc = hier_forecasts[ticker][f'h_{h}']
        evaluation_data.append({
            'Stock': ticker,
            'Model': 'AR-Hier',
            'Prior': 'Hierarchical',
            'Horizon': h,
            'Coverage': fc['coverage'],
            'MSE': fc['mse'],
            'RMSE': fc['rmse'],
            'Interval_Width': fc['interval_width'],
            'PIS': fc['pis'],
            'df': np.nan,
            't_critical': np.nan
        })

# ‚úÖ CONVERT TO DATAFRAME FIRST (was missing!)
eval_df = pd.DataFrame(evaluation_data)

# Display
display_cols = ['Model', 'Prior', 'Horizon', 'Coverage', 'PIS', 'Interval_Width', 'MSE']
print("\nFULL EVALUATION (PIS = Primary Metric):\n")
print(eval_df[display_cols].round(4).to_string(index=False))

# Save
eval_df.to_csv(FIG_DIR / 'ar_evaluation_pis.csv', index=False)
print(f"\n‚úì Saved: ar_evaluation_pis.csv")

# ============================================================================
print("\n" + "="*80)
print("INTERVAL WIDTH COMPARISON (Frequentist vs Bayesian)")
print("="*80 + "\n")

comparison = []
for h in HORIZONS:
    # ‚úÖ NOW use eval_df (which is a DataFrame)
    freq_width = eval_df[(eval_df['Model'] == 'AR-Freq') & (eval_df['Horizon'] == h)]['Interval_Width'].mean()
    bayes_weak_width = eval_df[(eval_df['Model'] == 'AR-Bayes') & (eval_df['Prior'] == 'weak') & (eval_df['Horizon'] == h)]['Interval_Width'].mean()
    bayes_med_width = eval_df[(eval_df['Model'] == 'AR-Bayes') & (eval_df['Prior'] == 'medium') & (eval_df['Horizon'] == h)]['Interval_Width'].mean()
    
    comparison.append({
        'Horizon': h,
        'Freq_Width': float(freq_width),
        'Bayes_Weak_Width': float(bayes_weak_width),
        'Bayes_Medium_Width': float(bayes_med_width),
        'Freq_vs_Weak_Ratio': float(freq_width / bayes_weak_width) if bayes_weak_width > 0 else np.nan,
        'Freq_vs_Medium_Ratio': float(freq_width / bayes_med_width) if bayes_med_width > 0 else np.nan
    })

comp_df = pd.DataFrame(comparison)
print(comp_df.round(4).to_string(index=False))
print("\nExpected: Frequentist should be NARROWER than Bayesian (less info)")
print("          Ratio < 1.0 indicates Freq narrower (better)")

comp_df.to_csv(FIG_DIR / 'ar_interval_width_comparison.csv', index=False)

# ============================================================================
# SUMMARY: PIS by Model & Horizon (LOWER IS BETTER)
print("\n" + "="*80)
print("PIS SUMMARY (LOWER = BETTER)")
print("="*80 + "\n")

pis_summary = eval_df.groupby(['Model', 'Prior', 'Horizon']).agg({
    'PIS': 'mean',
    'Coverage': 'mean',
    'MSE': 'mean',
    'Interval_Width': 'mean'
}).round(4).reset_index()

print(pis_summary.to_string(index=False))
pis_summary.to_csv(FIG_DIR / 'ar_pis_summary.csv', index=False)

# ============================================================================
# BEST BY PIS (not coverage)
print("\n" + "="*80)
print("üèÜ BEST MODEL BY PIS (LOWER BETTER)")
print("="*80 + "\n")

best_by_pis = []
for h in HORIZONS:
    h_data = eval_df[eval_df['Horizon'] == h].copy()
    best = h_data.loc[h_data['PIS'].idxmin()]
    best_by_pis.append({
        'Horizon': h,
        'Best_Model': best['Model'],
        'Best_Prior': best['Prior'],
        'PIS': best['PIS'],
        'Coverage': best['Coverage']
    })
    
    print(f"h={h:2d}: {best['Model']:10s} + {best['Prior']:12s} | PIS={best['PIS']:.4f} | Cov={best['Coverage']:.4f}")

best_pis_df = pd.DataFrame(best_by_pis)
best_pis_df.to_csv(FIG_DIR / 'ar_best_by_pis.csv', index=False)

print("\n‚úì All saved!")



PART 4: EVALUATION TABLE - PIS + COVERAGE

FULL EVALUATION (PIS = Primary Metric):

   Model        Prior  Horizon  Coverage     PIS  Interval_Width    MSE
 AR-Freq          N/A        1    1.0000  0.0659          0.0659 0.0001
 AR-Freq          N/A        5    1.0000  0.0878          0.0878 0.0002
 AR-Freq          N/A       22    1.0000  0.0920          0.0920 0.0003
AR-Bayes         weak        1    1.0000 69.6652         69.6652 0.0170
AR-Bayes         weak        5    1.0000 92.8422         92.8422 0.3672
AR-Bayes         weak       22    1.0000 97.1994         97.1994 0.5607
AR-Bayes       medium        1    1.0000 24.1539         24.1539 0.0154
AR-Bayes       medium        5    1.0000 25.9346         25.9346 0.0099
AR-Bayes       medium       22    1.0000 25.8290         25.8290 0.0498
AR-Bayes  informative        1    1.0000  9.6348          9.6348 0.0028
AR-Bayes  informative        5    1.0000  9.7328          9.7328 0.0041
AR-Bayes  informative       22    1.0000  9.7897   

In [7]:
# Parameter estimates comparison (not forecasts)
print("\n" + "="*80)
print("PART 5: PARAMETER ESTIMATES (œÜ) COMPARISON")
print("="*80 + "\n")

phi_data = []

for ticker in TICKERS:
    # Frequentist
    phi_data.append({
        'Stock': ticker,
        'Model': 'AR-Freq',
        'Prior': 'N/A',
        'Phi_Mean': freq_results[ticker]['phi'],
        'Phi_StdErr': freq_results[ticker]['se_phi']
    })
    
    # Bayesian (all priors)
    for prior_set in PRIOR_SETS:
        phi_data.append({
            'Stock': ticker,
            'Model': 'AR-Bayes',
            'Prior': prior_set,
            'Phi_Mean': bayes_results[ticker][prior_set]['phi_mean'],
            'Phi_StdErr': bayes_results[ticker][prior_set]['phi_std']
        })
    
    # Hierarchical
    phi_data.append({
        'Stock': ticker,
        'Model': 'AR-Hier',
        'Prior': 'Hierarchical',
        'Phi_Mean': hier_result[ticker]['phi_mean'],
        'Phi_StdErr': hier_result[ticker]['phi_std']
    })

phi_df = pd.DataFrame(phi_data)
print(phi_df.to_string(index=False))
phi_df.to_csv(FIG_DIR / 'ar_phi_estimates.csv', index=False)
print(f"\n‚úì Saved: ar_phi_estimates.csv")



PART 5: PARAMETER ESTIMATES (œÜ) COMPARISON

Stock    Model        Prior  Phi_Mean  Phi_StdErr
 AAPL  AR-Freq          N/A  0.001724    0.031145
 AAPL AR-Bayes         weak -0.007311    0.997131
 AAPL AR-Bayes       medium -0.001557    0.200460
 AAPL AR-Bayes  informative  0.000663    0.099597
 AAPL  AR-Hier Hierarchical  0.012470    0.970932
 MSFT  AR-Freq          N/A -0.017063    0.031050
 MSFT AR-Bayes         weak  0.001021    0.991460
 MSFT AR-Bayes       medium -0.004803    0.198568
 MSFT AR-Bayes  informative -0.002132    0.100684
 MSFT  AR-Hier Hierarchical  0.018440    0.982728
GOOGL  AR-Freq          N/A  0.002749    0.031062
GOOGL AR-Bayes         weak  0.017637    1.000179
GOOGL AR-Bayes       medium  0.002749    0.199056
GOOGL AR-Bayes  informative  0.002761    0.100666
GOOGL  AR-Hier Hierarchical  0.025263    0.980573
 AMZN  AR-Freq          N/A  0.001654    0.031045
 AMZN AR-Bayes         weak -0.009839    0.995485
 AMZN AR-Bayes       medium  0.004150    0.202644
 AMZ

In [8]:
print("PLOTTING")

# PLOT 1: MSE by Horizon (across all models)
print("\n" + "="*80)
print("PLOTTING EVALUATION METRICS")
print("="*80)

# PLOT 1: COVERAGE BY MODEL & HORIZON
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for idx, h in enumerate(HORIZONS):
    ax = axes[idx]
    
    # Get coverage for this horizon
    h_data = eval_df[eval_df['Horizon'] == h]
    
    # Pivot: rows=Stock, cols=Model
    pivot_cov = h_data.pivot_table(
        values='Coverage', 
        index='Stock', 
        columns='Model',
        aggfunc='mean'
    )
    
    pivot_cov.plot(kind='bar', ax=ax, width=0.8, alpha=0.8)
    ax.axhline(y=0.90, color='red', linestyle='--', linewidth=2, label='Target (90%)')
    ax.set_title(f'Coverage by Model (h={h} days)', fontsize=12, fontweight='bold')
    ax.set_xlabel('Stock')
    ax.set_ylabel('Coverage Probability')
    ax.set_ylim([0.70, 1.0])
    ax.legend(title='Model', fontsize=8, loc='lower left')
    ax.grid(axis='y', alpha=0.3)
    ax.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig(FIG_DIR / 'ar_coverage_by_horizon.png', dpi=300, bbox_inches='tight')
print("‚úì Saved: ar_coverage_by_horizon.png")
plt.close()

# PLOT 2: MSE BY MODEL & HORIZON
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for idx, h in enumerate(HORIZONS):
    ax = axes[idx]
    
    h_data = eval_df[eval_df['Horizon'] == h]
    pivot_mse = h_data.pivot_table(
        values='MSE', 
        index='Stock', 
        columns='Model',
        aggfunc='mean'
    )
    
    pivot_mse.plot(kind='bar', ax=ax, width=0.8, alpha=0.8)
    ax.set_title(f'MSE by Model (h={h} days)', fontsize=12, fontweight='bold')
    ax.set_xlabel('Stock')
    ax.set_ylabel('MSE')
    ax.legend(title='Model', fontsize=8)
    ax.grid(axis='y', alpha=0.3)
    ax.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig(FIG_DIR / 'ar_mse_by_horizon.png', dpi=300, bbox_inches='tight')
print("‚úì Saved: ar_mse_by_horizon.png")
plt.close()

# PLOT 3: PIS BY MODEL & HORIZON (LOWER IS BETTER)
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for idx, h in enumerate(HORIZONS):
    ax = axes[idx]
    
    h_data = eval_df[eval_df['Horizon'] == h]
    pivot_pis = h_data.pivot_table(
        values='PIS', 
        index='Stock', 
        columns='Model',
        aggfunc='mean'
    )
    
    # Invert colors: lower PIS = better (darker green)
    colors = plt.cm.RdYlGn_r(np.linspace(0.3, 1, len(pivot_pis.columns)))
    pivot_pis.plot(kind='bar', ax=ax, width=0.8, alpha=0.8, color=colors)
    
    ax.set_title(f'PIS by Model (h={h} days)\nLower=BETTER', fontsize=12, fontweight='bold')
    ax.set_xlabel('Stock')
    ax.set_ylabel('Prediction Interval Score')
    ax.legend(title='Model', fontsize=8)
    ax.grid(axis='y', alpha=0.3)
    ax.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig(FIG_DIR / 'ar_pis_by_horizon.png', dpi=300, bbox_inches='tight')
print("‚úì Saved: ar_pis_by_horizon.png")
plt.close()

PLOTTING

PLOTTING EVALUATION METRICS
‚úì Saved: ar_coverage_by_horizon.png
‚úì Saved: ar_mse_by_horizon.png
‚úì Saved: ar_pis_by_horizon.png


In [10]:
print("\n" + "="*80)
print("DIAGNOSTIC: BAYESIAN POSTERIOR SAMPLES")
print("="*80)

ticker = 'AAPL'
train = data['train'][ticker].values
test = data['test'][ticker].values

# Check what posterior samples look like
for prior_set in ['weak', 'medium', 'informative']:
    phi_samples = bayes_results[ticker][prior_set]['phi_samples']
    sigma2_samples = bayes_results[ticker][prior_set]['sigma2_samples']
    
    print(f"\n{prior_set} prior:")
    print(f"  œÜ posterior: mean={np.mean(phi_samples):.4f}, std={np.std(phi_samples):.4f}")
    print(f"             min={np.min(phi_samples):.4f}, max={np.max(phi_samples):.4f}")
    print(f"             median={np.median(phi_samples):.4f}")
    print(f"             pct_negative = {np.mean(phi_samples < 0):.2%}")
    
    print(f"  œÉ¬≤ posterior: mean={np.mean(sigma2_samples):.6f}, std={np.std(sigma2_samples):.6f}")
    print(f"              min={np.min(sigma2_samples):.6f}, max={np.max(sigma2_samples):.6f}")
    
    # What does posterior predict for first step?
    fc_first = phi_samples[:100] * train[-1]  # First 100 samples
    print(f"  œÜ * last_train: mean={np.mean(fc_first):.4f}, std={np.std(fc_first):.4f}")
    print(f"                 min={np.min(fc_first):.4f}, max={np.max(fc_first):.4f}")

print(f"\nFrequentist œÜ = {freq_results[ticker]['phi']:.4f}")
print(f"Frequentist œÉ¬≤ = {freq_results[ticker]['sigma2']:.6f}")



DIAGNOSTIC: BAYESIAN POSTERIOR SAMPLES

weak prior:
  œÜ posterior: mean=-0.0073, std=0.9971
             min=-3.7973, max=3.8783
             median=-0.0166
             pct_negative = 50.62%
  œÉ¬≤ posterior: mean=411.594241, std=52.188044
              min=155.552636, max=521.479795
  œÜ * last_train: mean=0.0011, std=0.0365
                 min=-0.0728, max=0.1094

medium prior:
  œÜ posterior: mean=-0.0016, std=0.2005
             min=-0.8222, max=0.7729
             median=0.0009
             pct_negative = 49.88%
  œÉ¬≤ posterior: mean=52.171012, std=2.325450
              min=42.800923, max=60.286188
  œÜ * last_train: mean=0.0003, std=0.0084
                 min=-0.0195, max=0.0171

informative prior:
  œÜ posterior: mean=0.0007, std=0.0996
             min=-0.3612, max=0.3924
             median=0.0013
             pct_negative = 49.60%
  œÉ¬≤ posterior: mean=6.190925, std=0.249069
              min=5.398698, max=7.192868
  œÜ * last_train: mean=-0.0003, std=0.0040
         