# Data Exploration

Explore the options, stock, and index data in S3.

**Data Available:**
- Options: 35.4M rows (2002, 2004, 2005, 2011, 2012, 2013)
- Stocks: 404K rows
- SPX Index: 3,772 rows (2000-2014)

In [None]:
# Setup - run this first
import sys
sys.path.insert(0, '..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.options_db import OptionsDB
from src.market_data import MarketData

# Set plot style
plt.style.use('seaborn-v0_8-whitegrid')
pd.set_option('display.max_columns', 50)

print('Setup complete')

In [None]:
# Connect to databases
db = OptionsDB()
md = MarketData()

## Available Data Periods

In [None]:
# What data do we have?
db.available_data()

## Options Data Sample

In [None]:
# Sample options data from April 2002
options = db.query_month(2002, 4, limit=1000)
print(f'Shape: {options.shape}')
options.head(10)

In [None]:
# Options data statistics
options.describe()

## Stock Data Sample

In [None]:
# Get AAPL stock data
aapl = md.get_stock('AAPL', year=2002, month=4)
print(f'AAPL April 2002: {len(aapl)} trading days')
aapl

In [None]:
# Plot AAPL price
plt.figure(figsize=(10, 4))
plt.plot(aapl['quotedate'], aapl['close'])
plt.title('AAPL Stock Price - April 2002')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## SPX Index Data

In [None]:
# Get SPX data
spx = md.get_spx(year=2002)
print(f'SPX 2002: {len(spx)} trading days')
print(f'SPX range: {spx["close"].min():.0f} - {spx["close"].max():.0f}')
spx.head()

In [None]:
# Plot SPX
plt.figure(figsize=(12, 4))
plt.plot(spx['date'], spx['close'])
plt.title('S&P 500 Index - 2002')
plt.xlabel('Date')
plt.ylabel('SPX Level')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## IV Distribution

In [None]:
# Get daily IV stats
daily = db.daily_stats(2002, 4)
daily

In [None]:
# Plot average IV over the month
plt.figure(figsize=(10, 4))
plt.plot(daily['DataDate'], daily['avg_iv'])
plt.title('Average Implied Volatility - April 2002')
plt.xlabel('Date')
plt.ylabel('Average IV')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()