In [1]:

import numpy as np
import pandas as pd
from pathlib import Path
from zoneinfo import ZoneInfo  # Built-in from Python 3.9+

import sys
sys.path.append('../src')  # only needed if you're not packaging yet

from vol_utils import compute_log_returns, annualized_volatility_mean_square

import pandas as pd

df = pd.read_csv("../output/all_vols.csv")
df['date'] = pd.to_datetime(df['date'])  # ensure proper datetime type

def add_har_features(df, vol_col='annualized_vol_30min'):
    df = df.sort_values(['ticker', 'date'])
    df['har_1d'] = df.groupby('ticker')[vol_col].shift(1)
    df['har_1w'] = df.groupby('ticker')[vol_col].shift(1).rolling(window=5, min_periods=5).mean().reset_index(0, drop=True)
    df['har_1m'] = df.groupby('ticker')[vol_col].shift(1).rolling(window=21, min_periods=21).mean().reset_index(0, drop=True)
    return df

from statsmodels.api import OLS, add_constant

def run_cross_sectional_har(df, target_col='annualized_vol_30min'):
    results = []
    
    for date, group in df.groupby('date'):
        # Drop NAs from any HAR column
        sub = group.dropna(subset=['har_1d', 'har_1w', 'har_1m', target_col])
        if len(sub) < 10:  # not enough tickers that day
            continue

        X = sub[['har_1d', 'har_1w', 'har_1m']]
        X = add_constant(X)
        y = sub[target_col]

        model = OLS(y, X).fit()
        r2 = model.rsquared
        errors = model.resid
        
        result = {
            'date': date,
            'beta_0': model.params['const'],
            'beta_1d': model.params['har_1d'],
            'beta_1w': model.params['har_1w'],
            'beta_1m': model.params['har_1m'],
            'r2': model.rsquared,
            'rmse': np.sqrt(np.mean(errors**2)),
            'n_obs': len(sub)
        }
        results.append(result)
    
    return pd.DataFrame(results)

vol_df = add_har_features(df, vol_col='annualized_vol_30min')
cs_har_results = run_cross_sectional_har(vol_df)

cs_har_results.plot(x='date', y=['beta_1d', 'beta_1w', 'beta_1m','beta_0'], title="HAR Cross-Sectional Coefficients")

ModuleNotFoundError: No module named 'vol_utils'