# Enhanced Earnings IV Analysis - Meeting Requirements

This notebook demonstrates the enhanced analysis functions based on the meeting discussion:

## Key Features:
1. **Realized Volatility Estimators** - Multiple rolling/exponentially weighted estimators
2. **Volume Analysis** - Option volume vs stock ADV analysis
3. **Kernel Regression** - Enhanced regression from Wolfe paper approach
4. **Single-Name Case Studies** - Comprehensive analysis for individual stocks
5. **Large Cap Universe** - Building filtered universe of liquid stocks

## Meeting Requirements Addressed:
- ✅ Kernel regression interface
- ✅ Realized volatility computation with multiple estimators
- ✅ Volume analysis (option notional vs stock ADV)
- ✅ Single-name vs cross-sectional approach
- ✅ Robust regression model with proper X and y variables
- ✅ Large cap universe with option volume filters

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import wrds
import warnings
warnings.filterwarnings('ignore')

# Import the enhanced analysis functions
from enhanced_analysis import (
    calculate_realized_volatility_estimators,
    analyze_option_volume_vs_stock_adv,
    enhanced_kernel_regression_analysis,
    build_regression_dataset,
    run_single_name_case_study,
    get_large_cap_universe
)

In [None]:
# Connect to WRDS
db = wrds.Connection(wrds_username='your_wrds_username')

# Import your existing pipeline class
# (You'll need to run the cell with your EarningsIVDataPipeline class first)
# from Week3_JoyceXu_Completed import EarningsIVDataPipeline

# Initialize pipeline
# pipeline = EarningsIVDataPipeline(db)

## 1. Single-Name Case Study (Meeting Requirement)

Run comprehensive analysis on one stock to understand the data and construct realized volatility estimators.

In [None]:
# Run single-name case study for AAPL
case_study = run_single_name_case_study(
    pipeline=pipeline,
    ticker='AAPL',
    start_date='2023-01-01',
    end_date='2024-12-31'
)

if case_study:
    print(f"\n📊 Case Study Results for {case_study['ticker']}:")
    print(f"Regression data points: {len(case_study['regression_data'])}")
    print(f"Test R²: {case_study['regression_results']['test_r2']:.3f}")
    print(f"Test RMSE: {case_study['regression_results']['test_rmse']:.4f}")

## 2. Realized Volatility Analysis

Compare different realized volatility estimators and their properties.

In [None]:
# Get stock prices for realized volatility calculation
if 'stock_prices' in pipeline.data:
    stock_prices = pipeline.data['stock_prices']
    
    # Calculate multiple realized volatility estimators
    realized_vol = calculate_realized_volatility_estimators(
        stock_prices, 
        windows=[5, 10, 21, 30]
    )
    
    # Plot different estimators
    if realized_vol is not None:
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        
        # Standard rolling volatility
        axes[0,0].plot(realized_vol['date'], realized_vol['realized_vol_21d'], 'b-', alpha=0.7)
        axes[0,0].set_title('Standard Rolling Volatility (21d)')
        axes[0,0].set_ylabel('Annualized Volatility')
        
        # Exponentially weighted volatility
        axes[0,1].plot(realized_vol['date'], realized_vol['ewm_vol_21d'], 'g-', alpha=0.7)
        axes[0,1].set_title('Exponentially Weighted Volatility (21d)')
        axes[0,1].set_ylabel('Annualized Volatility')
        
        # Compare different windows
        axes[1,0].plot(realized_vol['date'], realized_vol['realized_vol_5d'], 'r-', alpha=0.7, label='5d')
        axes[1,0].plot(realized_vol['date'], realized_vol['realized_vol_21d'], 'b-', alpha=0.7, label='21d')
        axes[1,0].plot(realized_vol['date'], realized_vol['realized_vol_30d'], 'g-', alpha=0.7, label='30d')
        axes[1,0].set_title('Rolling Volatility Comparison')
        axes[1,0].set_ylabel('Annualized Volatility')
        axes[1,0].legend()
        
        # Volatility of volatility
        vol_of_vol = realized_vol['realized_vol_21d'].rolling(21).std()
        axes[1,1].plot(realized_vol['date'], vol_of_vol, 'purple', alpha=0.7)
        axes[1,1].set_title('Volatility of Volatility (21d)')
        axes[1,1].set_ylabel('Volatility')
        
        plt.tight_layout()
        plt.show()
        
        print("\n📈 Realized Volatility Summary:")
        print(f"5-day RV mean: {realized_vol['realized_vol_5d'].mean():.3f}")
        print(f"21-day RV mean: {realized_vol['realized_vol_21d'].mean():.3f}")
        print(f"30-day RV mean: {realized_vol['realized_vol_30d'].mean():.3f}")

## 3. Volume Analysis (Meeting Requirement)

Analyze option volume relative to stock average daily volume and option notional vs stock notional.

In [None]:
# Analyze option volume vs stock ADV
if 'options_filtered' in pipeline.data and 'stock_prices' in pipeline.data:
    volume_analysis = analyze_option_volume_vs_stock_adv(
        options_df=pipeline.data['options_filtered'],
        stock_df=pipeline.data['stock_prices'],
        ticker='AAPL'
    )
    
    if volume_analysis is not None:
        print("\n📊 Volume Analysis Complete!")

## 4. Regression Analysis (Meeting Requirement)

Build regression dataset and run kernel regression to predict post-earnings realized volatility.

In [None]:
# Build regression dataset
if 'earnings_options' in pipeline.data and 'realized_volatility' in pipeline.data:
    regression_data = build_regression_dataset(
        earnings_options_df=pipeline.data['earnings_options'],
        realized_vol_df=pipeline.data['realized_volatility'],
        target_window=21,  # 21-day realized volatility target
        feature_window=10  # 10-day feature window
    )
    
    if regression_data is not None and len(regression_data) > 10:
        # Prepare features and target
        feature_cols = ['avg_iv', 'iv_std', 'avg_volume', 'avg_spread', 'avg_tte', 'avg_moneyness']
        X = regression_data[feature_cols]
        y = regression_data['target_rv']
        
        print(f"\n🔬 Regression Dataset Summary:")
        print(f"Features: {feature_cols}")
        print(f"Target: 21-day realized volatility")
        print(f"Data points: {len(regression_data)}")
        
        # Run kernel regression
        regression_results = enhanced_kernel_regression_analysis(X, y)
        
        print(f"\n✅ Regression Analysis Complete!")
        print(f"Model performance: R² = {regression_results['test_r2']:.3f}")

## 5. Large Cap Universe (Meeting Requirement)

Build universe of large cap stocks with sufficient option volume for cross-sectional analysis.

In [None]:
# Get large cap universe
universe = get_large_cap_universe(
    pipeline=pipeline,
    min_market_cap=1e9,  # $1B+ market cap
    min_option_volume=1000  # 1000+ average daily option volume
)

if universe is not None:
    print(f"\n🏢 Large Cap Universe Summary:")
    print(f"Total stocks: {len(universe)}")
    print(f"Average market cap: ${universe['avg_market_cap'].mean()/1e9:.1f}B")
    print(f"Average option volume: {universe['avg_option_volume'].mean():.0f} contracts/day")
    
    # Show top stocks by market cap
    print(f"\n📈 Top 10 Stocks by Market Cap:")
    top_stocks = universe.nlargest(10, 'avg_market_cap')
    for _, stock in top_stocks.iterrows():
        print(f"{stock['ticker']}: ${stock['avg_market_cap']/1e9:.1f}B, {stock['avg_option_volume']:.0f} opt/day")

## 6. Cross-Sectional Analysis (Meeting Requirement)

Run analysis across multiple stocks to get "things right on average, over many samples."

In [None]:
# Run analysis on multiple stocks (subset for demonstration)
if universe is not None:
    # Take top 5 stocks for demonstration
    demo_tickers = universe.head(5)['ticker'].tolist()
    
    print(f"\n🔬 Running Cross-Sectional Analysis on: {demo_tickers}")
    
    cross_sectional_results = []
    
    for ticker in demo_tickers:
        print(f"\n--- Analyzing {ticker} ---")
        try:
            case_study = run_single_name_case_study(
                pipeline=pipeline,
                ticker=ticker,
                start_date='2023-01-01',
                end_date='2024-12-31'
            )
            
            if case_study:
                cross_sectional_results.append({
                    'ticker': ticker,
                    'test_r2': case_study['regression_results']['test_r2'],
                    'test_rmse': case_study['regression_results']['test_rmse'],
                    'data_points': len(case_study['regression_data'])
                })
        except Exception as e:
            print(f"Error analyzing {ticker}: {e}")
    
    # Summary of cross-sectional results
    if cross_sectional_results:
        results_df = pd.DataFrame(cross_sectional_results)
        
        print(f"\n📊 Cross-Sectional Analysis Summary:")
        print(f"Average Test R²: {results_df['test_r2'].mean():.3f}")
        print(f"Average Test RMSE: {results_df['test_rmse'].mean():.4f}")
        print(f"Total data points: {results_df['data_points'].sum():,}")
        
        # Plot results
        fig, axes = plt.subplots(1, 2, figsize=(12, 5))
        
        axes[0].bar(results_df['ticker'], results_df['test_r2'])
        axes[0].set_title('Test R² by Stock')
        axes[0].set_ylabel('R²')
        
        axes[1].bar(results_df['ticker'], results_df['test_rmse'])
        axes[1].set_title('Test RMSE by Stock')
        axes[1].set_ylabel('RMSE')
        
        plt.tight_layout()
        plt.show()

In [None]:
# Close database connection
db.close()

print("\n🎉 Enhanced Analysis Complete!")
print("\nNext Steps (from meeting):")
print("1. Focus on specific maturity points and ATM moneyness")
print("2. Adjust realized volatility parameters based on 'eye-balling' the graphs")
print("3. Build robust but parsimonious initial regression model")
print("4. Decide between single-name vs cross-sectional approach")
print("5. Settle on universe of large cap stocks with option volume filters")