# Option Volatility Strategy - Exploration Notebook

This notebook provides an interactive environment for exploring the volatility prediction models and strategy.

In [None]:
# Standard imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

# Add src to path
import sys
sys.path.insert(0, '..')

# Project imports
from src.features.volatility import VolatilityCalculator
from src.features.technical import TechnicalIndicators
from src.features.pipeline import FeaturePipeline

# Settings
pd.set_option('display.max_columns', 50)
plt.style.use('seaborn-v0_8-whitegrid')

## 1. Generate Sample Data

For demonstration, we'll generate synthetic price data with realistic properties.

In [None]:
np.random.seed(42)

# Generate 2 years of daily data
n_days = 504
dates = pd.date_range(end=pd.Timestamp.today(), periods=n_days, freq='B')

# Generate price with some volatility clustering
returns = np.zeros(n_days)
volatility = np.zeros(n_days)
volatility[0] = 0.015

for i in range(1, n_days):
    # GARCH-like volatility
    volatility[i] = 0.001 + 0.85 * volatility[i-1] + 0.1 * abs(returns[i-1])
    returns[i] = np.random.normal(0.0003, volatility[i])

prices = 100 * np.cumprod(1 + returns)

# Create OHLCV dataframe
df = pd.DataFrame({
    'open': prices * (1 + np.random.uniform(-0.005, 0.005, n_days)),
    'high': prices * (1 + np.abs(np.random.normal(0, 0.01, n_days))),
    'low': prices * (1 - np.abs(np.random.normal(0, 0.01, n_days))),
    'close': prices,
    'volume': np.random.randint(1_000_000, 10_000_000, n_days)
}, index=dates)

print(f"Data shape: {df.shape}")
print(f"Date range: {df.index.min()} to {df.index.max()}")
df.tail()

## 2. Price Visualization

In [None]:
fig = go.Figure(data=[
    go.Candlestick(
        x=df.index,
        open=df['open'],
        high=df['high'],
        low=df['low'],
        close=df['close']
    )
])

fig.update_layout(
    title='Price Chart',
    xaxis_title='Date',
    yaxis_title='Price',
    height=500
)
fig.show()

## 3. Volatility Calculations

In [None]:
vol_calc = VolatilityCalculator()

# Calculate different volatility measures
df['rv_cc_21d'] = vol_calc.realized_volatility(df['close'], window=21)
df['rv_parkinson_21d'] = vol_calc.realized_volatility_parkinson(df['high'], df['low'], window=21)
df['rv_gk_21d'] = vol_calc.realized_volatility_garman_klass(
    df['open'], df['high'], df['low'], df['close'], window=21
)
df['rv_yz_21d'] = vol_calc.realized_volatility_yang_zhang(
    df['open'], df['high'], df['low'], df['close'], window=21
)

# Plot volatility measures
vol_cols = ['rv_cc_21d', 'rv_parkinson_21d', 'rv_gk_21d', 'rv_yz_21d']

fig = go.Figure()
for col in vol_cols:
    fig.add_trace(go.Scatter(x=df.index, y=df[col], name=col, mode='lines'))

fig.update_layout(
    title='Realized Volatility Measures (21-day)',
    xaxis_title='Date',
    yaxis_title='Annualized Volatility',
    height=400
)
fig.show()

## 4. Technical Indicators

In [None]:
tech = TechnicalIndicators()

# Calculate technical indicators
df['rsi_14'] = tech.rsi(df['close'], window=14)
df['atr_14'] = tech.atr(df['high'], df['low'], df['close'], window=14)
df['bb_width_20'] = tech.bollinger_width(df['close'], window=20)

# Plot RSI
fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True)

axes[0].plot(df.index, df['close'])
axes[0].set_ylabel('Price')
axes[0].set_title('Price')

axes[1].plot(df.index, df['rsi_14'])
axes[1].axhline(y=70, color='r', linestyle='--', alpha=0.5)
axes[1].axhline(y=30, color='g', linestyle='--', alpha=0.5)
axes[1].set_ylabel('RSI')
axes[1].set_title('RSI (14)')

plt.tight_layout()
plt.show()

## 5. Feature Pipeline

In [None]:
# Create feature pipeline
pipeline = FeaturePipeline(
    rv_windows=[5, 10, 21, 63],
    include_technical=True,
    include_macro=True,
    include_lags=True
)

# Generate features
features = pipeline.transform(df)

print(f"Generated {len(features.columns)} features")
print("\nFeature columns:")
print(features.columns.tolist()[:20])  # Show first 20

## 6. Feature Correlation

In [None]:
# Select key features for correlation
key_features = [
    'rv_cc_5d', 'rv_cc_21d', 'rv_cc_63d',
    'rsi_14', 'atr_pct_14', 'bb_width_20'
]
key_features = [f for f in key_features if f in features.columns]

if key_features:
    corr = features[key_features].corr()
    
    fig = px.imshow(
        corr,
        labels=dict(color='Correlation'),
        color_continuous_scale='RdBu_r',
        zmin=-1, zmax=1
    )
    fig.update_layout(title='Feature Correlation Matrix', height=500)
    fig.show()

## 7. Model Training Example

In [None]:
from src.models.baseline import HistoricalMeanModel
from src.models.linear import RidgeVolModel

# Prepare data
# Add target column if not present
if 'rv_cc_21d' not in features.columns:
    features['rv_cc_21d'] = vol_calc.realized_volatility(df['close'], window=21)

# Prepare training data
X, y = pipeline.prepare_training_data(features, target_col='rv_cc_21d', forecast_horizon=21)

print(f"Training data shape: X={X.shape}, y={y.shape}")

# Train/test split (80/20)
split_idx = int(len(X) * 0.8)
X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

print(f"Train: {len(X_train)}, Test: {len(X_test)}")

In [None]:
# Train baseline model
baseline = HistoricalMeanModel(window=21)
baseline.fit(X_train, y_train)
baseline_metrics = baseline.evaluate(X_test, y_test)

print("Baseline Model (Historical Mean):")
print(f"  RMSE: {baseline_metrics['rmse']:.4f}")
print(f"  R2: {baseline_metrics['r2']:.4f}")

In [None]:
# Train Ridge model
ridge = RidgeVolModel(alpha=1.0)
ridge.fit(X_train, y_train)
ridge_metrics = ridge.evaluate(X_test, y_test)

print("\nRidge Model:")
print(f"  RMSE: {ridge_metrics['rmse']:.4f}")
print(f"  R2: {ridge_metrics['r2']:.4f}")

# Feature importance
importance = ridge.get_feature_importance()
print("\nTop 10 Features:")
print(importance.head(10))

## 8. Prediction Visualization

In [None]:
# Get predictions
y_pred_baseline = baseline.predict(X_test)
y_pred_ridge = ridge.predict(X_test)

# Plot
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=y_test.index, y=y_test.values,
    name='Actual', mode='lines'
))
fig.add_trace(go.Scatter(
    x=y_test.index, y=y_pred_baseline,
    name='Baseline', mode='lines'
))
fig.add_trace(go.Scatter(
    x=y_test.index, y=y_pred_ridge,
    name='Ridge', mode='lines'
))

fig.update_layout(
    title='Volatility Prediction Comparison',
    xaxis_title='Date',
    yaxis_title='Volatility',
    height=500
)
fig.show()

## 9. Summary

This notebook demonstrated:
1. Generating realistic price data
2. Calculating multiple volatility measures
3. Computing technical indicators
4. Building a feature pipeline
5. Training and comparing models

For production use, you would:
- Use real market data from Polygon.io
- Include implied volatility from options
- Use walk-forward cross-validation
- Train more sophisticated models (XGBoost, LSTM, TFT)