# Implied Volatility (IV) Factor Analysis using Alphalens

This notebook analyzes the predictive power of Implied Volatility (IV) on future stock returns.
We use **Alphalens** to generate a full tear sheet of performance metrics.

## Setup
Ensure you have `alphalens-reloaded` installed:
```bash
pip install alphalens-reloaded
```


In [1]:

import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import alphalens

# Add project root to path to allow importing local modules if needed
sys.path.append(os.path.abspath('../../'))


In [2]:
# 1. Load Cached IV Data
# We assume the cache file 'volatility_research/iv_cache.csv' exists from previous runs.
# If not, you may need to run the data fetching scripts.

iv_cache_path = '../iv_cache.csv'
if os.path.exists(iv_cache_path):
    print(f"Loading IV data from {iv_cache_path}")
    iv_df = pd.read_csv(iv_cache_path, parse_dates=['date'])
else:
    print("IV Cache not found! Please ensure data is fetched.")
    iv_df = pd.DataFrame()

# Clean IV Data
iv_df['date'] = pd.to_datetime(iv_df['date']).dt.tz_localize(None)
iv_df['iv_current'] = pd.to_numeric(iv_df['iv_current'], errors='coerce')
iv_df = iv_df.dropna(subset=['iv_current'])
iv_df = iv_df.rename(columns={'act_symbol': 'asset', 'iv_current': 'factor'})

# Set Index for Factor
# Alphalens expects a MultiIndex (date, asset)
factor_df = iv_df.set_index(['date', 'asset']).sort_index()
factor = factor_df['factor']

print(f"Loaded {len(factor)} factor records.")
factor.head()


Loading IV data from ../iv_cache.csv
Loaded 64156 factor records.


date        asset
2019-02-09  A        0.2673
            AAL      0.3453
            AAP      0.4431
            AAPL     0.2280
            ABBV     0.2393
Name: factor, dtype: float64

In [6]:
factor.index.get_level_values('date').unique()

DatetimeIndex(['2019-02-09', '2019-03-02', '2019-04-06', '2019-05-04',
               '2019-06-01', '2019-07-06', '2019-08-03', '2019-09-07',
               '2019-10-05', '2019-11-02', '2019-12-07', '2020-01-04',
               '2020-02-03', '2020-03-02', '2020-04-01', '2020-05-01',
               '2020-06-01', '2020-07-01', '2020-08-03', '2020-09-02',
               '2020-10-02', '2020-11-02', '2020-12-02', '2021-01-01',
               '2021-02-01', '2021-03-01', '2021-04-02', '2021-05-03',
               '2021-06-02', '2021-07-02', '2021-08-02', '2021-09-01',
               '2021-10-01', '2021-11-01', '2021-12-01', '2022-01-03',
               '2022-02-02', '2022-03-02', '2022-04-01', '2022-05-02',
               '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-02',
               '2022-10-03', '2022-11-02', '2022-12-02', '2023-01-02',
               '2023-02-01', '2023-03-01', '2023-04-03', '2023-05-01',
               '2023-06-02', '2023-07-03', '2023-08-02', '2023-09-01',
      

In [8]:
# 2. Load Price Data
# We use the provided US Stock History CSV
price_path = '../../mean_reversion/us_stock_history_10y.csv'

print(f"Loading prices from {price_path}...")
price_df_raw = pd.read_csv(price_path, index_col=[0, 1], parse_dates=[0])

# Reshape to (Date, Asset) with Close prices
prices = price_df_raw['Close'].unstack()
#prices.index = pd.to_datetime(prices.index).dt.tz_localize(None)

# Ensure Business Day Frequency
# Alphalens requires a recognized frequency to compute forward returns accurately.
prices = prices.asfreq('B', method='ffill')

print(f"Prices Shape: {prices.shape}")
print(f"Price Frequency: {prices.index.freq}")
prices.head()


Loading prices from ../../mean_reversion/us_stock_history_10y.csv...
Prices Shape: (2610, 4654)
Price Frequency: <BusinessDay>


Symbol,NaN,A,AA,AACB,AAL,AAM,AAME,AAMI,AAOI,AAON,...,ZSPC,ZTS,ZUMZ,ZURA,ZVIA,ZVRA,ZWS,ZYBT,ZYME,ZYXI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-11-23,,36.924927,20.527882,,40.423801,,4.291939,14.748291,18.559999,15.269637,...,,43.415047,15.51,,,248.639999,9.123002,,,0.192582
2015-11-24,,37.515285,20.573145,,39.401264,,4.291939,14.821808,18.52,15.463162,...,,43.387306,16.040001,,,262.559998,9.278248,,,0.175074
2015-11-25,,38.133316,21.297396,,39.477722,,4.478545,15.005585,18.379999,15.594256,...,,43.42429,17.09,,,268.799988,9.328476,,,0.175074
2015-11-26,,38.133316,21.297396,,39.477722,,4.478545,15.005585,18.379999,15.594256,...,,43.42429,17.09,,,268.799988,9.328476,,,0.175074
2015-11-27,,38.714432,21.093697,,39.840858,,4.459884,15.042343,18.559999,15.700381,...,,43.525993,16.6,,,266.880005,9.296514,,,0.175074


In [9]:
# 3. Align Factor Dates
# Since our IV data might be sparse or on non-business days, we map them to the nearest valid trading day in our price data.

valid_dates = prices.index
unique_factor_dates = factor.index.get_level_values('date').unique()

def get_nearest_date(d):
    if d in valid_dates: return d
    loc = valid_dates.searchsorted(d)
    if loc < len(valid_dates):
        return valid_dates[loc]
    return valid_dates[-1]

date_map = {d: get_nearest_date(d) for d in unique_factor_dates}

# Apply mapping
# We reset index, map, and set index back
factor_reset = factor.reset_index()
factor_reset['date'] = factor_reset['date'].map(date_map)

# Handle collisions (if multiple factor dates map to same trading day) by taking the mean
factor_aligned = factor_reset.groupby(['date', 'asset'])['factor'].mean()

print(f"Aligned Factor Records: {len(factor_aligned)}")


Aligned Factor Records: 64156


In [10]:
# 4. Run Alphalens
# We generate the clean factor data and forward returns.

from alphalens.utils import get_clean_factor_and_forward_returns
from alphalens.tears import create_full_tear_sheet

# Define Quantiles and Periods (1D, 5D, 20D)
quantiles = 5
periods = (1, 5, 20)

try:
    factor_data = get_clean_factor_and_forward_returns(
        factor=factor_aligned,
        prices=prices,
        quantiles=quantiles,
        periods=periods,
        filter_zscore=None 
    )
    print("Factor data created successfully!")
except Exception as e:
    print(f"Error creating factor data: {e}")


Error creating factor data: Inferred frequency None from passed values does not conform to passed frequency C


In [11]:
# 5. Create Full Tear Sheet
if 'factor_data' in locals():
    create_full_tear_sheet(factor_data)
