In [19]:
import pandas as pd
from arch import arch_model
import numpy as np
import matplotlib.pyplot as plt

In [44]:
data_paths = ['GLDM.csv', 'HYG.csv', 'IEMG.csv', 'VOO.csv', 'TLT.csv']

In [62]:
def generate_volatility_forecast(file_path, limit=None):
    print(f"Processing {file_path}...")
    stock = pd.read_csv(file_path)
    # Parse as UTC first to handle mixed offsets, then convert to Eastern Time
    stock['DateTime'] = pd.to_datetime(stock['DateTime'], utc=True).dt.tz_convert('US/Eastern')
    stock['Returns'] = stock['Close'].pct_change()
    stock.dropna(inplace=True)
    
    if limit:
        stock = stock.iloc[:limit].copy()
    
    stock['Volatility_Forecast'] = np.nan

    # Start with the first 100 data points
    # For each day, recalibrate the model and generate a volatility forecast for the next day
    for index in range(100, len(stock)):
            
        train_data = stock['Returns'].iloc[:index]
        
        # rescale=True automatically scales data for optimization
        model = arch_model(train_data, vol='Garch', p=1, q=1, rescale=True)
        model_fit = model.fit(disp='off', show_warning=False)
        
        forecast = model_fit.forecast(horizon=1)
        
        # The forecast is in the scaled units, so we must divide by the scale factor
        volatility_forecast = np.sqrt(forecast.variance.values[-1, 0]) / model_fit.scale
        stock.at[stock.index[index], 'Volatility_Forecast'] = volatility_forecast

    # Fill the first 100 data points with a rolling standard deviation (proxy for volatility)
    # We use a window of 20 periods (minutes) for the initial moving average
    stock.loc[stock.index[:100], 'Volatility_Forecast'] = stock['Returns'].iloc[:100].rolling(window=20, min_periods=1).std()

    # Backfill the very first few values (which might be NaN due to the rolling window)
    stock['Volatility_Forecast'] = stock['Volatility_Forecast'].bfill()
    
    return stock

In [65]:
import os

# Initialize the DataFrame with the index from GLDM (assuming it's the master timeline)
GLDM = pd.read_csv('GLDM.csv')
dates = pd.to_datetime(GLDM['DateTime'], utc=True).dt.tz_convert('US/Eastern').iloc[1:]
volatility_forecasts = pd.DataFrame(index=dates)

output_file = 'volatility_forecasts.csv'

# Load existing results if available
if os.path.exists(output_file):
    print(f"Loading existing results from {output_file}...")
    existing_df = pd.read_csv(output_file, index_col=0, parse_dates=True)
    # Ensure index is timezone-aware (Eastern) to match
    if existing_df.index.tz is None:
        existing_df.index = existing_df.index.tz_localize('UTC').tz_convert('US/Eastern')
    else:
        existing_df.index = existing_df.index.tz_convert('US/Eastern')
        
    volatility_forecasts = existing_df.combine_first(volatility_forecasts)

for stock_file in data_paths:
    stock_name = stock_file.split('.')[0]
    
    if stock_name in volatility_forecasts.columns and not volatility_forecasts[stock_name].isna().all():
        print(f"Skipping {stock_name}, already calculated.")
        continue

    try:
        result = generate_volatility_forecast(stock_file)
        
        # Align the result to the master index (dates)
        # This handles length mismatches by reindexing to the master timeline
        aligned_series = result.set_index('DateTime')['Volatility_Forecast'].reindex(dates)
        
        volatility_forecasts[stock_name] = aligned_series
        
        # Save progress after each stock
        volatility_forecasts.to_csv(output_file)
        print(f"Saved {stock_name} to {output_file}")
        
    except Exception as e:
        print(f"Error processing {stock_name}: {e}")

volatility_forecasts.head()

Processing GLDM.csv...
Saved GLDM to volatility_forecasts.csv
Processing HYG.csv...
Saved GLDM to volatility_forecasts.csv
Processing HYG.csv...
Saved HYG to volatility_forecasts.csv
Processing IEMG.csv...
Saved HYG to volatility_forecasts.csv
Processing IEMG.csv...
Saved IEMG to volatility_forecasts.csv
Processing VOO.csv...
Saved IEMG to volatility_forecasts.csv
Processing VOO.csv...
Saved VOO to volatility_forecasts.csv
Processing TLT.csv...
Saved VOO to volatility_forecasts.csv
Processing TLT.csv...
Saved TLT to volatility_forecasts.csv
Saved TLT to volatility_forecasts.csv


Unnamed: 0_level_0,GLDM,HYG,IEMG,VOO,TLT
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-10-06 09:31:00-04:00,0.000227,0.003485,0.001051,0.00065,0.036395
2025-10-06 09:32:00-04:00,0.000227,0.000143,0.001025,0.00065,0.00033
2025-10-06 09:33:00-04:00,0.000354,0.000203,0.00102,0.00065,0.000347
2025-10-06 09:34:00-04:00,0.000321,0.000154,0.001019,0.00065,0.00033
2025-10-06 09:35:00-04:00,0.000375,0.000116,0.00102,0.00065,0.00033


In [66]:
volatility_forecasts.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 33153 entries, 2025-10-06 09:31:00-04:00 to 2025-12-03 15:59:00-05:00
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   GLDM    33153 non-null  float64
 1   HYG     33115 non-null  float64
 2   IEMG    33138 non-null  float64
 3   VOO     33150 non-null  float64
 4   TLT     33148 non-null  float64
dtypes: float64(5)
memory usage: 1.5 MB


In [67]:
def calculate_alpha_101(file_path):
    stock = pd.read_csv(file_path)
    # Parse dates for alignment
    stock['DateTime'] = pd.to_datetime(stock['DateTime'], utc=True).dt.tz_convert('US/Eastern')
    stock.set_index('DateTime', inplace=True)
    
    # Alpha #101: (Close - Open) / ((High - Low) + 0.001)
    # Candle Structure: Measures the strength of the close relative to the daily range.
    alpha_101 = (stock['Close'] - stock['Open']) / ((stock['High'] - stock['Low']) + 0.001)
    
    return alpha_101

In [68]:
# Initialize DataFrame with the master index
GLDM = pd.read_csv('GLDM.csv')
dates = pd.to_datetime(GLDM['DateTime'], utc=True).dt.tz_convert('US/Eastern')
alpha_101_df = pd.DataFrame(index=dates)

for stock_file in data_paths:
    stock_name = stock_file.split('.')[0]
    print(f"Calculating Alpha 101 for {stock_name}...")
    
    try:
        signal = calculate_alpha_101(stock_file)
        # Align to master index
        alpha_101_df[stock_name] = signal.reindex(dates)
    except Exception as e:
        print(f"Error processing {stock_name}: {e}")

output_file_a101 = 'A101.csv'
alpha_101_df.to_csv(output_file_a101)
print(f"Saved Alpha 101 signals to {output_file_a101}")
alpha_101_df.head()

Calculating Alpha 101 for GLDM...
Calculating Alpha 101 for HYG...
Calculating Alpha 101 for IEMG...
Calculating Alpha 101 for VOO...
Calculating Alpha 101 for TLT...
Calculating Alpha 101 for VOO...
Calculating Alpha 101 for TLT...
Saved Alpha 101 signals to A101.csv
Saved Alpha 101 signals to A101.csv


Unnamed: 0_level_0,GLDM,HYG,IEMG,VOO,TLT
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-10-06 09:30:00-04:00,-0.366876,0.0,-0.28169,-0.494297,0.731707
2025-10-06 09:31:00-04:00,-0.869565,-0.418182,0.952381,0.024876,0.0
2025-10-06 09:32:00-04:00,-0.3107,-0.714286,0.833333,-0.456432,0.0
2025-10-06 09:33:00-04:00,0.427461,-0.47619,-0.277778,-0.432409,0.3
2025-10-06 09:34:00-04:00,-0.652174,-0.625,0.009091,-0.557769,0.0
