# Options Arbitrage Analysis Using Heston Model

This notebook demonstrates how to use the Heston model to detect arbitrage opportunities in options markets using data from WRDS.

## Setup and Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

from heston_model import HestonModel
from wrds_connector import WRDSConnector, generate_sample_data
from arbitrage_detector import ArbitrageDetector

%matplotlib inline
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

## 1. Load Options Data

We'll start with sample data for demonstration. To use real WRDS data, set `use_wrds=True` and ensure you have WRDS credentials configured.

In [None]:
# Configuration
ticker = 'AAPL'
use_wrds = False  # Set to True to use WRDS data

if use_wrds:
    # Connect to WRDS
    connector = WRDSConnector()
    
    # Define date range
    start_date = '2023-01-01'
    end_date = '2023-12-31'
    
    # Fetch data
    options_df = connector.fetch_option_data(ticker, start_date, end_date)
    stock_df = connector.fetch_underlying_price(ticker, start_date, end_date)
    
    connector.close()
else:
    # Use sample data
    print("Using sample data for demonstration...")
    options_df, stock_df = generate_sample_data(ticker=ticker)

# Get current stock price
stock_price = stock_df['stock_price'].iloc[-1]

print(f"Stock: {ticker}")
print(f"Current Price: ${stock_price:.2f}")
print(f"Total Options: {len(options_df)}")
print(f"Date Range: {options_df['date'].min()} to {options_df['date'].max()}")

## 2. Explore the Data

In [None]:
# Display sample of options data
print("Sample Options Data:")
options_df.head(10)

In [None]:
# Summary statistics
print("\nOptions by Type:")
print(options_df['cp_flag'].value_counts())

print("\nTime to Maturity Distribution:")
print(options_df['T'].describe())

print("\nStrike Price Range:")
print(f"Min: ${options_df['strike_price'].min():.2f}")
print(f"Max: ${options_df['strike_price'].max():.2f}")

In [None]:
# Visualize implied volatility smile
latest_date = options_df['date'].max()
latest_options = options_df[options_df['date'] == latest_date]

# Select one maturity
maturities = sorted(latest_options['T'].unique())
if len(maturities) > 0:
    selected_T = maturities[0]
    subset = latest_options[latest_options['T'] == selected_T]
    
    calls = subset[subset['cp_flag'] == 'C']
    puts = subset[subset['cp_flag'] == 'P']
    
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(calls['strike_price'], calls['impl_volatility'], 'bo-', label='Calls')
    plt.plot(puts['strike_price'], puts['impl_volatility'], 'ro-', label='Puts')
    plt.axvline(stock_price, color='g', linestyle='--', label='Stock Price')
    plt.xlabel('Strike Price')
    plt.ylabel('Implied Volatility')
    plt.title(f'Volatility Smile (T={selected_T:.2f} years)')
    plt.legend()
    plt.grid(True)
    
    plt.subplot(1, 2, 2)
    plt.plot(calls['strike_price'], calls['mid_price'], 'bo-', label='Calls')
    plt.plot(puts['strike_price'], puts['mid_price'], 'ro-', label='Puts')
    plt.axvline(stock_price, color='g', linestyle='--', label='Stock Price')
    plt.xlabel('Strike Price')
    plt.ylabel('Option Price')
    plt.title(f'Option Prices (T={selected_T:.2f} years)')
    plt.legend()
    plt.grid(True)
    
    plt.tight_layout()
    plt.show()

## 3. Calibrate Heston Model

In [None]:
# Select liquid options for calibration
calibration_data = options_df[
    (options_df['T'] > 0.08) &  # At least 1 month
    (options_df['T'] < 0.5) &   # Less than 6 months
    (options_df['volume'] > 10)  # Reasonable volume
].copy()

# Use calls for calibration
calls_for_calibration = calibration_data[calibration_data['cp_flag'] == 'C'].head(20)

print(f"Calibrating with {len(calls_for_calibration)} call options...")

if len(calls_for_calibration) >= 5:
    market_prices = calls_for_calibration['mid_price'].values
    K_list = calls_for_calibration['strike_price'].values
    T_list = calls_for_calibration['T'].values
    r = 0.05  # 5% risk-free rate
    
    # Calibrate
    heston_params = HestonModel.calibrate(
        market_prices, stock_price, K_list, r, T_list, K_list, option_type='call'
    )
    
    print("\nCalibrated Heston Parameters:")
    for param, value in heston_params.items():
        print(f"  {param}: {value:.6f}")
else:
    print("Insufficient data for calibration")
    heston_params = None

In [None]:
# Compare Heston model prices with market prices
if heston_params is not None:
    comparison_data = []
    
    for idx, row in calls_for_calibration.iterrows():
        model = HestonModel(
            stock_price, row['strike_price'], r, row['T'],
            heston_params['v0'], heston_params['kappa'],
            heston_params['theta'], heston_params['sigma'], heston_params['rho']
        )
        
        heston_price = model.call_price()
        market_price = row['mid_price']
        
        comparison_data.append({
            'strike': row['strike_price'],
            'T': row['T'],
            'market_price': market_price,
            'heston_price': heston_price,
            'error': abs(heston_price - market_price),
            'error_pct': 100 * abs(heston_price - market_price) / market_price
        })
    
    comparison_df = pd.DataFrame(comparison_data)
    
    print("\nModel vs Market Comparison:")
    print(comparison_df)
    
    print(f"\nMean Absolute Error: ${comparison_df['error'].mean():.4f}")
    print(f"Mean Percentage Error: {comparison_df['error_pct'].mean():.2f}%")

## 4. Detect Arbitrage Opportunities

In [None]:
# Use most recent date for arbitrage detection
latest_date = options_df['date'].max()
latest_options = options_df[options_df['date'] == latest_date].copy()

print(f"Analyzing options as of {latest_date}")
print(f"Options analyzed: {len(latest_options)}")

# Initialize detector
detector = ArbitrageDetector(
    latest_options,
    stock_price,
    risk_free_rate=0.05,
    tolerance=0.01  # $0.01 minimum profit
)

# Detect all arbitrage opportunities
arbitrage_results = detector.detect_all_arbitrage()

## 5. Analyze Results

In [None]:
# Put-Call Parity Violations
pcp_violations = arbitrage_results['put_call_parity']

if len(pcp_violations) > 0:
    print("PUT-CALL PARITY VIOLATIONS")
    print("="*70)
    print(pcp_violations)
    
    # Visualize
    plt.figure(figsize=(10, 6))
    plt.bar(range(len(pcp_violations)), pcp_violations['expected_profit'])
    plt.xlabel('Opportunity Index')
    plt.ylabel('Expected Profit ($)')
    plt.title('Put-Call Parity Arbitrage Opportunities')
    plt.grid(True, alpha=0.3)
    plt.show()
else:
    print("No put-call parity violations detected.")

In [None]:
# Butterfly Spread Opportunities
butterfly_arb = arbitrage_results['butterfly']

if len(butterfly_arb) > 0:
    print("\nBUTTERFLY SPREAD ARBITRAGE")
    print("="*70)
    print(butterfly_arb)
else:
    print("\nNo butterfly spread arbitrage detected.")

In [None]:
# Calendar Spread Opportunities
calendar_arb = arbitrage_results['calendar']

if len(calendar_arb) > 0:
    print("\nCALENDAR SPREAD ARBITRAGE")
    print("="*70)
    print(calendar_arb)
else:
    print("\nNo calendar spread arbitrage detected.")

In [None]:
# Box Spread Opportunities
box_arb = arbitrage_results['box_spread']

if len(box_arb) > 0:
    print("\nBOX SPREAD ARBITRAGE")
    print("="*70)
    print(box_arb)
else:
    print("\nNo box spread arbitrage detected.")

In [None]:
# Summary visualization
arb_counts = {k: len(v) for k, v in arbitrage_results.items()}
total_profits = {k: v['expected_profit'].sum() if len(v) > 0 and 'expected_profit' in v.columns else 0 
                 for k, v in arbitrage_results.items()}

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Count of opportunities
axes[0].bar(range(len(arb_counts)), list(arb_counts.values()))
axes[0].set_xticks(range(len(arb_counts)))
axes[0].set_xticklabels([k.replace('_', ' ').title() for k in arb_counts.keys()], rotation=45, ha='right')
axes[0].set_ylabel('Number of Opportunities')
axes[0].set_title('Arbitrage Opportunities by Type')
axes[0].grid(True, alpha=0.3)

# Total profits
axes[1].bar(range(len(total_profits)), list(total_profits.values()))
axes[1].set_xticks(range(len(total_profits)))
axes[1].set_xticklabels([k.replace('_', ' ').title() for k in total_profits.keys()], rotation=45, ha='right')
axes[1].set_ylabel('Total Expected Profit ($)')
axes[1].set_title('Total Expected Profit by Type')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nTotal Arbitrage Opportunities: {sum(arb_counts.values())}")
print(f"Total Expected Profit: ${sum(total_profits.values()):.2f}")

## 6. Export Results

In [None]:
# Save results to CSV files
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

for arb_type, df in arbitrage_results.items():
    if len(df) > 0:
        filename = f"arbitrage_{arb_type}_{ticker}_{timestamp}.csv"
        df.to_csv(filename, index=False)
        print(f"Saved {arb_type} results to {filename}")

## Conclusion

This notebook demonstrated:
1. Loading options data from WRDS (or sample data)
2. Calibrating the Heston model to market prices
3. Detecting various types of arbitrage opportunities
4. Analyzing and visualizing the results

### Next Steps:
- Connect to real WRDS data with your credentials
- Adjust the tolerance parameter based on your transaction costs
- Implement backtesting to validate strategies
- Add more sophisticated arbitrage detection methods
- Consider market microstructure effects