# Wheel Strategy Candidates - Optimized Multi-Ticker Scanner

**Purpose**: Collect wheel strategy option candidates across multiple tickers and days

**Features**:
- Multi-ticker support (AAPL, TSLA, expandable to 600+)
- Smart pre-filtering: Only fetch options for days with BB/SMA entry signals
- Batch API calls: Minimize Databento requests
- Trading-day DTE calculation (not calendar days)
- Actual IV calculation from market prices (not hardcoded)
- Delta calculated using actual IV

**Workflow**:
1. Configuration
2. Fetch equity data → Calculate BB & SMA → Identify entry signals
3. Batch fetch options only for qualifying days
4. Parse symbols, calculate trading-day DTE
5. Merge underlying prices
6. Calculate IV and delta
7. Output results

## Stage 1: Imports & Environment Setup

In [None]:
# Core imports
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from pathlib import Path

# Databento
import databento as db

# Options pricing
from py_vollib.black_scholes import implied_volatility
from py_vollib.black_scholes.greeks.analytical import delta

# Market calendar
import pandas_market_calendars as mcal

# Environment
from dotenv import load_dotenv

print("✓ All imports successful")

In [None]:
# Load environment variables
env_path = Path("/Users/samuelminer/Projects/nissan_options/wheel_strategy/.env")
load_dotenv(env_path, override=True)

# Verify API key
assert os.getenv("DATABENTO_API_KEY"), "DATABENTO_API_KEY not found in .env"

# Initialize Databento client
client = db.Historical()

# Initialize NYSE calendar for trading day calculations
nyse = mcal.get_calendar('NYSE')

print("✓ Environment loaded")
print("✓ Databento client initialized")
print("✓ NYSE calendar initialized")

In [None]:
# Configuration
CONFIG = {
    # Tickers to scan
    'tickers': ['AAPL', 'TSLA'],
    
    # Number of entry signal days to collect per ticker
    'num_days': 10,  # Last 10 trading days with BB/SMA entry signals
    
    # Historical data for Bollinger Bands calculation
    'lookback_days': 504,  # 2 years of trading days
    
    # Bollinger Bands parameters
    'bb_window': 20,  # 20-day SMA
    'bb_std': 2.0,    # 2 standard deviations
    
    # Options collection parameters
    'collection_time': '15:45',  # Eastern Time
    'min_dte': 30,
    'max_dte': 45,
    'option_type': 'P',  # Puts only for wheel strategy
    
    # Greeks calculation
    'risk_free_rate': 0.05,  # 5%
    
    # Data settings
    'timezone': 'America/New_York',
    'equity_dataset': 'EQUS.MINI',  # Consolidated US equities
    'options_dataset': 'OPRA.PILLAR',  # Options data
}

print("Configuration:")
for key, value in CONFIG.items():
    print(f"  {key}: {value}")

## Stage 2: Helper Functions

In [None]:
def parse_opra_symbol(symbol: str):
    """
    Parse OSI option symbol format: "AAPL 230721P00180000"
    
    Returns:
        tuple: (root, expiration_date, call_put, strike)
    """
    try:
        parts = symbol.split()
        if len(parts) != 2:
            return None, None, None, None
        
        root = parts[0]
        rest = parts[1]
        
        # Parse date: YYMMDD (first 6 chars)
        date_str = rest[:6]
        year = 2000 + int(date_str[:2])
        month = int(date_str[2:4])
        day = int(date_str[4:6])
        expiration = datetime(year, month, day)
        
        # Parse call/put (7th char)
        call_put = rest[6]
        
        # Parse strike (remaining 8 chars, divide by 1000)
        strike = int(rest[7:]) / 1000.0
        
        return root, expiration, call_put, strike
    except Exception:
        return None, None, None, None

# Test
root, exp, cp, strike = parse_opra_symbol("AAPL 230721P00180000")
print(f"Test: AAPL 230721P00180000 → {root}, {exp}, {cp}, ${strike}")

In [None]:
def calculate_dte_trading_days(event_date, exp_date, nyse_calendar):
    """
    Calculate trading days to expiration (excludes weekends and holidays)
    
    Args:
        event_date: Date of quote
        exp_date: Expiration date
        nyse_calendar: pandas_market_calendars NYSE calendar
    
    Returns:
        int: Number of trading days
    """
    # Normalize to dates (remove time component)
    event_dt = pd.Timestamp(event_date).normalize()
    exp_dt = pd.Timestamp(exp_date).normalize()
    
    # Get schedule for the date range
    schedule = nyse_calendar.schedule(start_date=event_dt, end_date=exp_dt)
    
    # Count sessions between (exclusive of event_date, inclusive of exp_date)
    sessions = schedule.index
    trading_days = len(sessions[(sessions > event_dt) & (sessions <= exp_dt)])
    
    return trading_days

# Test
test_dte = calculate_dte_trading_days(
    datetime(2023, 6, 6),
    datetime(2023, 7, 21),
    nyse
)
print(f"Test: Trading days from 2023-06-06 to 2023-07-21 = {test_dte}")

In [None]:
def compute_iv(row, r=0.05):
    """
    Calculate implied volatility from option market price
    
    Args:
        row: DataFrame row with mid, underlying_last, strike, dte, call_put
        r: Risk-free rate (default 5%)
    
    Returns:
        float: Implied volatility or np.nan if calculation fails
    """
    price = row["mid"]  # (bid + ask) / 2
    S = row["underlying_last"]
    K = row["strike"]
    t = row["dte"] / 365.0
    flag = "p" if row["call_put"] == "P" else "c"

    # Validation
    if not (np.isfinite(price) and np.isfinite(S) and np.isfinite(K) and t > 0):
        return np.nan
    if price <= 0 or S <= 0 or K <= 0:
        return np.nan

    try:
        return implied_volatility(price, S, K, t, r, flag)
    except Exception:
        return np.nan

print("✓ compute_iv() defined")

In [None]:
def calculate_delta(row, r=0.05):
    """
    Calculate delta using Black-Scholes with actual IV
    
    Args:
        row: DataFrame row with underlying_last, strike, dte, iv, call_put
        r: Risk-free rate (default 5%)
    
    Returns:
        float: Delta or np.nan if calculation fails
    """
    S = row['underlying_last']
    K = row['strike']
    t = row['dte'] / 365.0
    sigma = row['iv']  # Use calculated IV!
    flag = 'p' if row['call_put'] == 'P' else 'c'

    # Validation
    if not np.isfinite(sigma) or sigma <= 0:
        return np.nan
    if not (np.isfinite(S) and np.isfinite(K) and t > 0):
        return np.nan

    try:
        return delta(flag, S, K, t, r, sigma)
    except Exception:
        return np.nan

print("✓ calculate_delta() defined")

## Stage 3: Fetch Equity Data & Identify Entry Signals

**Goal**: Find last 10 trading days per ticker where price triggers BB or SMA entry signal

In [None]:
# Fetch equity data for all tickers
print("Fetching equity data...")

equity_data = {}  # ticker -> DataFrame

# Calculate date range
end_date = pd.Timestamp.utcnow().normalize() - pd.Timedelta(days=1)
start_date = end_date - pd.Timedelta(days=CONFIG['lookback_days'])

for ticker in CONFIG['tickers']:
    print(f"\n  Fetching {ticker}...")
    
    try:
        data = client.timeseries.get_range(
            dataset=CONFIG['equity_dataset'],
            symbols=ticker,
            schema='ohlcv-1d',
            stype_in='raw_symbol',
            start=start_date,
            end=end_date,
        )
        
        df = data.to_df(tz=CONFIG['timezone'])
        df.index.name = 'date'
        
        equity_data[ticker] = df
        print(f"    ✓ {len(df)} days fetched")
        
    except Exception as e:
        print(f"    ✗ Error: {e}")
        continue

print(f"\n✓ Equity data fetched for {len(equity_data)} tickers")

In [None]:
# Calculate Bollinger Bands and identify entry signals
print("Calculating Bollinger Bands and entry signals...")

entry_signals = {}  # ticker -> DataFrame with signals

for ticker, df in equity_data.items():
    print(f"\n  Processing {ticker}...")
    
    # Sort by date
    df_bb = df.copy().sort_values('date')
    
    # Calculate rolling statistics
    window = CONFIG['bb_window']
    k = CONFIG['bb_std']
    
    roll = df_bb['close'].rolling(window=window, min_periods=window)
    df_bb['sma20'] = roll.mean()
    df_bb['std20'] = roll.std(ddof=0)
    
    # Bollinger Bands
    df_bb['bb_upper'] = df_bb['sma20'] + k * df_bb['std20']
    df_bb['bb_lower'] = df_bb['sma20'] - k * df_bb['std20']
    
    # Entry signals
    df_signals = df_bb[['close', 'sma20', 'bb_lower']].dropna()
    df_signals['sma_entry'] = df_signals['close'] <= df_signals['sma20']
    df_signals['bb_entry'] = df_signals['close'] <= df_signals['bb_lower']
    df_signals['entry_signal'] = df_signals['sma_entry'] | df_signals['bb_entry']
    
    entry_signals[ticker] = df_signals
    
    # Count signals
    total_signals = df_signals['entry_signal'].sum()
    sma_only = (df_signals['sma_entry'] & ~df_signals['bb_entry']).sum()
    bb_signals = df_signals['bb_entry'].sum()
    
    print(f"    Total entry signals: {total_signals}")
    print(f"      - SMA only: {sma_only}")
    print(f"      - BB lower: {bb_signals}")

print("\n✓ Bollinger Bands calculated")

In [None]:
# Get last N days with entry signals per ticker
print(f"Selecting last {CONFIG['num_days']} entry signal days per ticker...")

collection_schedule = []  # List of (ticker, date) tuples

for ticker, df_signals in entry_signals.items():
    # Filter to days with entry signals
    signal_days = df_signals[df_signals['entry_signal']]
    
    # Take last N days
    last_n_days = signal_days.index[-CONFIG['num_days']:].tolist()
    
    # Add to collection schedule
    for date in last_n_days:
        collection_schedule.append((ticker, date))
    
    print(f"  {ticker}: {len(last_n_days)} days")
    print(f"    Date range: {last_n_days[0].date()} to {last_n_days[-1].date()}")

print(f"\n✓ Total collection points: {len(collection_schedule)}")
print(f"  (Will fetch options for {len(collection_schedule)} ticker-date combinations)")

## Stage 4: Batch Fetch Options Data

**Strategy**: One API call per ticker, fetch all dates in range, filter to 15:45 ET post-fetch

In [None]:
# Group collection schedule by ticker
from collections import defaultdict

ticker_dates = defaultdict(list)
for ticker, date in collection_schedule:
    ticker_dates[ticker].append(date)

# Display grouping
print("Options fetch plan (batched by ticker):")
for ticker, dates in ticker_dates.items():
    print(f"  {ticker}: {len(dates)} days ({dates[0].date()} to {dates[-1].date()})")

In [None]:
# Batch fetch options for all tickers
print("\nFetching options data (batched)...")

options_raw = {}  # ticker -> DataFrame

for ticker, dates in ticker_dates.items():
    print(f"\n  Fetching {ticker} options...")
    
    # Calculate time range for batch fetch
    min_date = min(dates)
    max_date = max(dates)
    
    # Construct timestamp for 15:45 ET
    start_time = pd.Timestamp(min_date.date()).tz_localize(CONFIG['timezone']).replace(hour=15, minute=45)
    end_time = pd.Timestamp(max_date.date()).tz_localize(CONFIG['timezone']).replace(hour=15, minute=46)
    
    try:
        data = client.timeseries.get_range(
            dataset=CONFIG['options_dataset'],
            schema='ohlcv-1m',
            symbols=f'{ticker}.OPT',
            stype_in='parent',
            start=start_time,
            end=end_time,
        )
        
        df_opts = data.to_df(tz=CONFIG['timezone'])
        
        # Filter to exact dates in our collection schedule
        df_opts['quote_date'] = df_opts.index.normalize()
        target_dates = [pd.Timestamp(d).normalize() for d in dates]
        df_opts = df_opts[df_opts['quote_date'].isin(target_dates)]
        
        options_raw[ticker] = df_opts
        
        print(f"    ✓ {len(df_opts)} option contracts fetched")
        print(f"      Unique symbols: {df_opts['symbol'].nunique()}")
        
    except Exception as e:
        print(f"    ✗ Error: {e}")
        continue

print(f"\n✓ Options data fetched for {len(options_raw)} tickers")

## Stage 5: Parse Symbols, Calculate DTE, Filter

**Processing**:
1. Parse OSI symbols
2. Calculate trading-day DTE
3. Filter: 30-45 DTE puts only
4. Add ticker column

In [None]:
# Combine all options data and parse symbols
print("Parsing option symbols...")

all_options = []

for ticker, df_opts in options_raw.items():
    print(f"\n  Processing {ticker}...")
    
    # Add ticker column
    df_opts = df_opts.copy()
    df_opts['ticker'] = ticker
    
    # Parse symbols
    parsed = df_opts['symbol'].apply(parse_opra_symbol)
    df_opts[['root', 'expiration', 'call_put', 'strike']] = pd.DataFrame(
        parsed.tolist(),
        index=df_opts.index
    )
    
    # Drop failed parses
    before = len(df_opts)
    df_opts = df_opts.dropna(subset=['root', 'expiration', 'call_put', 'strike'])
    after = len(df_opts)
    
    print(f"    Parsed {after}/{before} symbols successfully")
    
    all_options.append(df_opts)

# Combine all tickers
options_combined = pd.concat(all_options, ignore_index=False)

print(f"\n✓ Combined: {len(options_combined)} option contracts across {len(options_raw)} tickers")

In [None]:
# Calculate trading-day DTE
print("Calculating trading-day DTE...")

options_combined['dte'] = options_combined.apply(
    lambda row: calculate_dte_trading_days(
        row.name,  # ts_event from index
        row['expiration'],
        nyse
    ),
    axis=1
)

print(f"  DTE range: {options_combined['dte'].min()} to {options_combined['dte'].max()} trading days")
print(f"\n✓ DTE calculated for {len(options_combined)} contracts")

In [None]:
# Filter: 30-45 DTE puts only
print(f"\nFiltering: {CONFIG['min_dte']}-{CONFIG['max_dte']} DTE, type={CONFIG['option_type']}...")

before = len(options_combined)

candidates = options_combined[
    (options_combined['dte'] >= CONFIG['min_dte']) &
    (options_combined['dte'] <= CONFIG['max_dte']) &
    (options_combined['call_put'] == CONFIG['option_type'])
].copy()

after = len(candidates)

print(f"  Filtered: {after} / {before} contracts ({after/before*100:.1f}%)")
print(f"\nBy ticker:")
print(candidates['ticker'].value_counts())

print(f"\n✓ Filtered candidates ready")

## Stage 6: Fetch Underlying Prices & Merge

In [None]:
# Fetch underlying prices at same timestamps
print("Fetching underlying equity prices at 15:45 ET...")

underlying_prices = {}  # ticker -> DataFrame

for ticker, dates in ticker_dates.items():
    print(f"\n  Fetching {ticker} spot prices...")
    
    # Calculate time range
    min_date = min(dates)
    max_date = max(dates)
    
    # 15:45 ET timestamps
    start_time = pd.Timestamp(min_date.date()).tz_localize(CONFIG['timezone']).replace(hour=15, minute=45)
    end_time = pd.Timestamp(max_date.date()).tz_localize(CONFIG['timezone']).replace(hour=15, minute=46)
    
    try:
        data = client.timeseries.get_range(
            dataset=CONFIG['equity_dataset'],
            symbols=ticker,
            schema='ohlcv-1m',
            stype_in='raw_symbol',
            start=start_time,
            end=end_time,
        )
        
        df_equity = data.to_df(tz=CONFIG['timezone'])
        df_equity['ticker'] = ticker
        
        underlying_prices[ticker] = df_equity
        
        print(f"    ✓ {len(df_equity)} price points fetched")
        
    except Exception as e:
        print(f"    ✗ Error: {e}")
        continue

# Combine all underlying prices
underlying_combined = pd.concat(underlying_prices.values())

print(f"\n✓ Underlying prices fetched for {len(underlying_prices)} tickers")

In [None]:
# Merge underlying prices with candidates
print("Merging underlying prices with options...")

# Extract close price and rename
underlying_combined['underlying_last'] = underlying_combined['close']

# Merge on timestamp and ticker
candidates = candidates.merge(
    underlying_combined[['ticker', 'underlying_last']],
    left_index=True,
    right_index=True,
    left_on='ticker',
    right_on='ticker',
    how='left'
)

# Check for missing underlying prices
missing = candidates['underlying_last'].isna().sum()
if missing > 0:
    print(f"  ⚠ Warning: {missing} options missing underlying price")
    candidates = candidates.dropna(subset=['underlying_last'])
    print(f"    Dropped, {len(candidates)} remaining")

print(f"\n✓ Underlying prices merged")

## Stage 7: Calculate IV and Delta

**Order matters**: IV first, then delta using actual IV

In [None]:
# Calculate mid price (for IV calculation)
print("Calculating mid prices...")

# Use bid/ask from OHLCV close as proxy (or fetch from bid-ask data)
# For OHLCV schema, we'll use close as mid approximation
# Note: For production, should fetch actual bid/ask from mbp/trades schema

candidates['bid'] = candidates['close'] * 0.98  # Approximate bid
candidates['ask'] = candidates['close'] * 1.02  # Approximate ask
candidates['mid'] = candidates['close']  # Use close as mid

print(f"  ✓ Mid prices calculated (using close prices from OHLCV)")
print(f"    Note: For production, fetch actual bid/ask from mbp schema")

In [None]:
# Calculate Implied Volatility
print("\nCalculating implied volatility...")

candidates['iv'] = candidates.apply(
    lambda row: compute_iv(row, r=CONFIG['risk_free_rate']),
    axis=1
)

# Check IV calculation success rate
valid_iv = candidates['iv'].notna().sum()
total = len(candidates)

print(f"  IV calculated: {valid_iv} / {total} ({valid_iv/total*100:.1f}%)")
print(f"  IV range: {candidates['iv'].min():.2f} to {candidates['iv'].max():.2f}")
print(f"  IV mean: {candidates['iv'].mean():.2f}")

# Filter out failed IV calculations
candidates = candidates.dropna(subset=['iv'])

print(f"\n✓ {len(candidates)} candidates with valid IV")

In [None]:
# Calculate Delta (using actual IV)
print("Calculating delta...")

candidates['delta'] = candidates.apply(
    lambda row: calculate_delta(row, r=CONFIG['risk_free_rate']),
    axis=1
)

# Check delta calculation success rate
valid_delta = candidates['delta'].notna().sum()
total = len(candidates)

print(f"  Delta calculated: {valid_delta} / {total} ({valid_delta/total*100:.1f}%)")
print(f"  Delta range: {candidates['delta'].min():.3f} to {candidates['delta'].max():.3f}")
print(f"  Delta mean: {candidates['delta'].mean():.3f}")

# Add absolute delta for reference
candidates['abs_delta'] = candidates['delta'].abs()

print(f"\n✓ Delta calculated using actual IV")

## Stage 8: Output & Analysis

In [None]:
# Add collection date for tracking
candidates['collection_date'] = candidates.index.normalize()

# Select final columns
output_columns = [
    'ticker', 'symbol', 'expiration', 'call_put', 'strike', 'dte',
    'underlying_last', 'bid', 'ask', 'mid', 'volume',
    'iv', 'delta', 'abs_delta', 'collection_date'
]

results = candidates[output_columns].copy()

# Sort by ticker, DTE, strike
results = results.sort_values(['ticker', 'dte', 'strike'])

print(f"\n✓ Final results: {len(results)} candidates")
print(f"\nColumns: {list(results.columns)}")

In [None]:
# Build backtest-ready trade log that records mid premium as the cost basis for each fill.
backtest_columns = [
    'ticker', 'symbol', 'expiration', 'strike', 'dte', 'iv', 'delta',
    'mid', 'collection_date'
]
trade_log = results[backtest_columns].sort_values(['dte', 'strike']).reset_index(drop=True)
CONTRACT_SIZE = 100
trade_log['contract_size'] = CONTRACT_SIZE
trade_log['premium_per_contract'] = trade_log['mid']
trade_log['premium_total'] = trade_log['mid'] * CONTRACT_SIZE
trade_log['cost_basis_per_contract'] = trade_log['mid']
trade_log['cost_basis_total'] = trade_log['premium_total']
trade_log['trade_date'] = trade_log['collection_date']
trade_log = trade_log[
    [
        'ticker', 'symbol', 'expiration', 'strike', 'dte', 'iv', 'delta',
        'collection_date', 'trade_date', 'mid', 'contract_size',
        'premium_per_contract', 'premium_total',
        'cost_basis_per_contract', 'cost_basis_total'
    ]
]
backtest_log_path = Path("/Users/samuelminer/Projects/nissan_options/wheel_strategy/wheel_trade_log.csv")
trade_log.to_csv(backtest_log_path, index=False)
print("\nBacktest trade log ready for downstream analysis")
print(f"  Rows: {len(trade_log)} entries | contract size: {CONTRACT_SIZE}")
print(trade_log[['symbol', 'mid', 'premium_total']].head(3).to_string(index=False))
print(f"  Saved to: {backtest_log_path}")


In [None]:
# Summary statistics by ticker
print("\n" + "="*60)
print("SUMMARY BY TICKER")
print("="*60)

for ticker in results['ticker'].unique():
    ticker_data = results[results['ticker'] == ticker]
    
    print(f"\n{ticker}:")
    print(f"  Total candidates: {len(ticker_data)}")
    print(f"  DTE range: {ticker_data['dte'].min()}-{ticker_data['dte'].max()}")
    print(f"  Strike range: ${ticker_data['strike'].min():.2f} - ${ticker_data['strike'].max():.2f}")
    print(f"  IV range: {ticker_data['iv'].min():.2f} - {ticker_data['iv'].max():.2f}")
    print(f"  Delta range: {ticker_data['delta'].min():.3f} - {ticker_data['delta'].max():.3f}")
    print(f"  Abs Delta range: {ticker_data['abs_delta'].min():.3f} - {ticker_data['abs_delta'].max():.3f}")
    print(f"  Collection dates: {ticker_data['collection_date'].nunique()} unique days")

In [None]:
# Display sample results
print("\n" + "="*60)
print("SAMPLE RESULTS (First 10 per ticker)")
print("="*60 + "\n")

display_cols = ['ticker', 'symbol', 'expiration', 'strike', 'dte', 'iv', 'delta']

for ticker in results['ticker'].unique():
    print(f"\n{ticker}:")
    print(results[results['ticker'] == ticker][display_cols].head(10))

In [None]:
# Save to CSV
output_path = Path("/Users/samuelminer/Projects/nissan_options/wheel_strategy/wheel_candidates.csv")

results.to_csv(output_path, index=False)

print(f"\n✓ Results saved to: {output_path}")
print(f"  Total candidates: {len(results)}")
print(f"  Tickers: {', '.join(results['ticker'].unique())}")

In [None]:
# Full results preview
results