## Imports

In [1]:
from pathlib import Path
from dotenv import dotenv_values, load_dotenv
import sys
import os
import pandas as pd
import databento as db
import pandas_market_calendars as mcal

sys.executable

env_path = Path("/Users/samuelminer/Projects/nissan_options/wheel_strategy/.env")

print("Parsed keys:", dotenv_values(env_path).keys())

load_dotenv()  # loads .env from current working directory

assert os.getenv("DATABENTO_API_KEY"), "DATABENTO_API_KEY still not found"
print("os.getenv:", bool(os.getenv("DATABENTO_API_KEY")))
client = db.Historical()


Parsed keys: odict_keys(['DATABENTO_API_KEY', 'ANTHROPIC_API_KEY'])
os.getenv: True


### Import Daily Equity Data For a Single Symbol

In [2]:
# Cache setup
CACHE_DIR = '../cache/'
os.makedirs(CACHE_DIR, exist_ok=True)

symbol = "SPY"
dataset = "EQUS.MINI"     # consolidated US equities (best choice)
schema = "ohlcv-1d"       # DAILY bars
tz = "America/New_York"
days = 252*2

end = pd.Timestamp.utcnow().normalize() - pd.Timedelta(days=1)
start = end - pd.Timedelta(days=252*2)

# Generate cache filename
start_str = start.strftime('%Y%m%d')
end_str = end.strftime('%Y%m%d')
cache_file = os.path.join(CACHE_DIR, f"equity_daily_{symbol}_{start_str}_{end_str}.parquet")

# Check cache first
if os.path.exists(cache_file):
    print(f"[CACHE HIT] Loading daily equity data for {symbol} from cache")
    data = pd.read_parquet(cache_file)
    print(f"  Loaded {len(data)} days of data")
else:
    print(f"[API] Fetching daily equity data for {symbol} from {start.date()} to {end.date()}...")
    data = client.timeseries.get_range(
        dataset=dataset,
        symbols=symbol,
        schema=schema,
        stype_in="raw_symbol",
        start=start,
        end=end,
    )
    # Convert to DataFrame and save to cache
    data = data.to_df(tz=tz)
    data.to_parquet(cache_file)
    print(f"[CACHE SAVE] Saved {len(data)} days to cache")




[API] Fetching daily equity data for SPY from 2024-08-10 to 2025-12-27...


  data = client.timeseries.get_range(


[CACHE SAVE] Saved 346 days to cache


In [3]:
# data is already a DataFrame from cache or API fetch
equity_data = data
equity_data.head()

Unnamed: 0_level_0,rtype,publisher_id,instrument_id,open,high,low,close,volume,symbol
ts_event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-08-11 20:00:00-04:00,35,95,15144,534.08,535.74,530.99,533.34,1789266,SPY
2024-08-12 20:00:00-04:00,35,95,15144,533.79,542.28,533.66,542.0,2317529,SPY
2024-08-13 20:00:00-04:00,35,95,15144,542.23,544.95,540.13,543.75,1925298,SPY
2024-08-14 20:00:00-04:00,35,95,15144,544.03,553.99,543.56,553.57,2931628,SPY
2024-08-15 20:00:00-04:00,35,95,15144,552.94,555.02,550.14,553.99,1540725,SPY


### Equity Technical Filter

In [4]:

import pandas as pd

entry_technical_filter = equity_data.copy().sort_index()

# Bollinger Bands parameters
window = 20
k = 2.0  # 2-sigma Bollinger Bands

# Calculate rolling statistics on close price
roll = entry_technical_filter["close"].rolling(window=window, min_periods=window)
entry_technical_filter["sma20"] = roll.mean()
entry_technical_filter["std20"] = roll.std(ddof=0)

# Calculate Bollinger Bands
entry_technical_filter["bb_upper"] = entry_technical_filter["sma20"] + k * entry_technical_filter["std20"]
entry_technical_filter["bb_lower"] = entry_technical_filter["sma20"] - k * entry_technical_filter["std20"]

# Optional: Bollinger %B (position within bands)
entry_technical_filter["bb_pctb"] = (
    (entry_technical_filter["close"] - entry_technical_filter["bb_lower"]) / 
    (entry_technical_filter["bb_upper"] - entry_technical_filter["bb_lower"])
)

# Optional: Bollinger Bandwidth (width of bands relative to SMA)
entry_technical_filter["bb_bandwidth"] = (
    (entry_technical_filter["bb_upper"] - entry_technical_filter["bb_lower"]) / 
    entry_technical_filter["sma20"]
)

entry_technical_filter.dropna().head()

Unnamed: 0_level_0,rtype,publisher_id,instrument_id,open,high,low,close,volume,symbol,sma20,std20,bb_upper,bb_lower,bb_pctb,bb_bandwidth
ts_event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2024-09-08 20:00:00-04:00,35,95,15144,544.17,547.71,542.69,547.1,1806447,SPY,552.9465,8.227449,569.401397,536.491603,0.322348,0.059517
2024-09-09 20:00:00-04:00,35,95,15144,546.14,549.15,543.41,548.3,2006267,SPY,553.6945,6.999297,567.693095,539.695905,0.30732,0.050564
2024-09-10 20:00:00-04:00,35,95,15144,547.52,555.36,539.95,554.32,3341284,SPY,554.3105,6.464689,567.239878,541.381122,0.500367,0.04665
2024-09-11 20:00:00-04:00,35,95,15144,555.61,559.53,552.76,559.26,2488752,SPY,555.086,6.069553,567.225106,542.946894,0.671924,0.043738
2024-09-12 20:00:00-04:00,35,95,15144,560.45,563.02,559.45,561.55,2079323,SPY,555.485,6.217276,567.919553,543.050447,0.743877,0.04477


### Equity Technical Filter

In [5]:
# With BB Filter
df_equity_entry = entry_technical_filter.copy()[['close','sma20','bb_upper']].dropna()
df_equity_entry['bb_entry'] = df_equity_entry['close'] <= df_equity_entry['bb_upper']
df_equity_entry[['bb_entry']].value_counts()
df_equity_entry.head()


Unnamed: 0_level_0,close,sma20,bb_upper,bb_entry
ts_event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-09-08 20:00:00-04:00,547.1,552.9465,569.401397,True
2024-09-09 20:00:00-04:00,548.3,553.6945,567.693095,True
2024-09-10 20:00:00-04:00,554.32,554.3105,567.239878,True
2024-09-11 20:00:00-04:00,559.26,555.086,567.225106,True
2024-09-12 20:00:00-04:00,561.55,555.485,567.919553,True


### Get Options Data For Dates that Pass Technical Filter

In [14]:
import databento as db
import pandas as pd

client = db.Historical()

dataset = "OPRA.PILLAR"
schema = "cmbp-1"
tz = "America/New_York"

start = pd.Timestamp("2023-06-06 15:45", tz=tz)
end   = start + pd.Timedelta(minutes=1)

# Generate cache filename for options data
date_str = start.strftime('%Y%m%d')
time_str = start.strftime('%H%M')
cache_file = os.path.join(CACHE_DIR, f"options_{symbol}_{date_str}_{time_str}.parquet")

# Check cache first
if os.path.exists(cache_file):
    print(f"[CACHE HIT] Loading options data for {symbol} on {start.date()} at {start.time()}")
    df_opts = pd.read_parquet(cache_file)
    print(f"  Loaded {len(df_opts)} option quotes")
else:
    print(f"[API] Fetching options for {symbol} on {start.date()} at {start.time()}...")
    data = client.timeseries.get_range(
        dataset=dataset,
        schema=schema,
        symbols=f"{symbol}.OPT",     # ✅ parent symbology format
        stype_in="parent",           # ✅ parent lookup
        start=start,
        end=end,
    )
    
    df_opts = data.to_df(tz=tz).sort_values("ts_event")
    
    # Save to cache
    df_opts.to_parquet(cache_file)
    print(f"[CACHE SAVE] Saved {len(df_opts)} option quotes to cache")

df_opts.head()


[CACHE HIT] Loading options data for SPY on 2023-06-06 at 15:45:00
  Loaded 1458901 option quotes


Unnamed: 0_level_0,ts_event,rtype,publisher_id,instrument_id,action,side,price,size,flags,ts_in_delta,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_pb_00,ask_pb_00,symbol
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2023-06-06 15:45:00.000066500-04:00,2023-06-06 15:44:59.999859712-04:00,177,30,620759706,A,B,1.31,170,194,0,1.31,1.33,170,222,0,0,SPY 230620C00434000
2023-06-06 15:45:00.000113742-04:00,2023-06-06 15:44:59.999903488-04:00,177,30,654313925,A,B,1.36,191,194,0,1.36,1.37,191,280,0,0,SPY 230608P00428000
2023-06-06 15:45:00.000142456-04:00,2023-06-06 15:44:59.999935744-04:00,177,30,637534763,A,A,2.08,212,194,0,2.06,2.08,446,212,0,0,SPY 230707P00414000
2023-06-06 15:45:00.000146549-04:00,2023-06-06 15:44:59.999940096-04:00,177,30,620759003,A,A,3.13,354,194,0,2.98,3.13,33,354,0,0,SPY 230606C00425000
2023-06-06 15:45:00.000153372-04:00,2023-06-06 15:44:59.999944448-04:00,177,30,654311943,A,B,4.29,212,194,0,4.29,4.34,212,200,0,0,SPY 230616P00429000


In [15]:
sym = df_opts["symbol"]

# Split ROOT and OPRA code (e.g. "AAPL" and "240119P00205000")
root_and_code = sym.str.split(expand=True)
df_opts["root"] = root_and_code[0]
code = root_and_code[1]

# Expiration: YYMMDD in positions 0–5
df_opts["expiration"] = pd.to_datetime(code.str[:6], format="%y%m%d")

# Call/Put flag: single char at position 6
df_opts["call_put"] = code.str[6]

# Strike: remaining digits, usually in 1/1000 dollars
# Example: "00205000" -> 205.000
strike_int = code.str[7:].astype("int32")
df_opts["strike"] = strike_int / 1000.0

# Calculate DTE (Days to Expiry)
# Localize expiration to match ts_event timezone, then normalize both to midnight
expiration_tz = df_opts["expiration"].dt.tz_localize(df_opts["ts_event"].dt.tz)
df_opts["dte"] = (expiration_tz - df_opts["ts_event"].dt.normalize()).dt.days
print(f'df shape: {df_opts.shape}')
df_opts.head()



df shape: (1458901, 22)


Unnamed: 0_level_0,ts_event,rtype,publisher_id,instrument_id,action,side,price,size,flags,ts_in_delta,...,bid_sz_00,ask_sz_00,bid_pb_00,ask_pb_00,symbol,root,expiration,call_put,strike,dte
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-06-06 15:45:00.000066500-04:00,2023-06-06 15:44:59.999859712-04:00,177,30,620759706,A,B,1.31,170,194,0,...,170,222,0,0,SPY 230620C00434000,SPY,2023-06-20,C,434.0,14
2023-06-06 15:45:00.000113742-04:00,2023-06-06 15:44:59.999903488-04:00,177,30,654313925,A,B,1.36,191,194,0,...,191,280,0,0,SPY 230608P00428000,SPY,2023-06-08,P,428.0,2
2023-06-06 15:45:00.000142456-04:00,2023-06-06 15:44:59.999935744-04:00,177,30,637534763,A,A,2.08,212,194,0,...,446,212,0,0,SPY 230707P00414000,SPY,2023-07-07,P,414.0,31
2023-06-06 15:45:00.000146549-04:00,2023-06-06 15:44:59.999940096-04:00,177,30,620759003,A,A,3.13,354,194,0,...,33,354,0,0,SPY 230606C00425000,SPY,2023-06-06,C,425.0,0
2023-06-06 15:45:00.000153372-04:00,2023-06-06 15:44:59.999944448-04:00,177,30,654311943,A,B,4.29,212,194,0,...,212,200,0,0,SPY 230616P00429000,SPY,2023-06-16,P,429.0,10


In [None]:
df_opts[['instrument_id', 'action', 'side',
       'price', 'size', 'flags', 'ts_in_delta', 'bid_px_00', 'ask_px_00',
       'bid_sz_00', 'ask_sz_00', 'bid_pb_00', 'ask_pb_00', 'symbol',
       'expiration', 'call_put', 'strike', 'dte']]

       

Unnamed: 0_level_0,instrument_id,action,side,price,size,flags,ts_in_delta,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_pb_00,ask_pb_00,symbol,expiration,call_put,strike,dte
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2023-06-06 15:45:00.000066500-04:00,620759706,A,B,1.31,170,194,0,1.31,1.33,170,222,0,0,SPY 230620C00434000,2023-06-20,C,434.0,14
2023-06-06 15:45:00.000113742-04:00,654313925,A,B,1.36,191,194,0,1.36,1.37,191,280,0,0,SPY 230608P00428000,2023-06-08,P,428.0,2
2023-06-06 15:45:00.000142456-04:00,637534763,A,A,2.08,212,194,0,2.06,2.08,446,212,0,0,SPY 230707P00414000,2023-07-07,P,414.0,31
2023-06-06 15:45:00.000146549-04:00,620759003,A,A,3.13,354,194,0,2.98,3.13,33,354,0,0,SPY 230606C00425000,2023-06-06,C,425.0,0
2023-06-06 15:45:00.000153372-04:00,654311943,A,B,4.29,212,194,0,4.29,4.34,212,200,0,0,SPY 230616P00429000,2023-06-16,P,429.0,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-06 15:45:59.999214538-04:00,620759202,A,A,0.01,6190,194,0,,0.01,0,6190,0,0,SPY 230615C00476000,2023-06-15,C,476.0,9
2023-06-06 15:45:59.999515227-04:00,620757892,A,A,0.01,6363,194,0,,0.01,0,6363,0,0,SPY 230615C00477000,2023-06-15,C,477.0,9
2023-06-06 15:45:59.999568159-04:00,654313354,A,B,6.09,587,194,0,6.09,6.11,587,258,0,0,SPY 230818P00419000,2023-08-18,P,419.0,73
2023-06-06 15:45:59.999790423-04:00,637534547,A,A,1.41,1081,194,0,1.39,1.41,6070,1081,0,0,SPY 230721P00397000,2023-07-21,P,397.0,45


In [None]:
# Compute derived columns
df_opts['spread'] = df_opts['ask_px_00'] - df_opts['bid_px_00']
df_opts['mid_price'] = (df_opts['bid_px_00'] + df_opts['ask_px_00']) / 2

# Group by contract_id and aggregate
agg_df = df_opts.groupby('instrument_id').agg({
    'spread': ['mean', 'min', 'max', 'std', 'count'],  # Stats on bid-ask spread
    'mid_price': ['mean', 'std'],  # Price level and volatility]
    'action': [''],
    # 'volume': ['sum', 'mean'],  # Total and avg trade volume (ignores quotes if volume=0)
    # 'trade_price': ['count'],  # Number of trades (non-NaN)
    # 'timestamp': ['min', 'max']  # Time range for the minute
})

# Flatten column names for easier access
agg_df.columns = ['_'.join(col).strip() for col in agg_df.columns.values]
agg_df


Unnamed: 0_level_0,spread_mean,spread_min,spread_max,spread_std,spread_count,mid_price_mean,mid_price_std,action_first
instrument_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
603979777,0.010000,0.01,0.01,0.000000,17,1.205000,0.000000,A
603979778,0.684184,0.66,0.72,0.019045,98,46.862092,0.009523,A
603979779,0.010000,0.01,0.01,0.000000,50,0.035000,0.000000,A
603979780,1.338182,1.16,2.30,0.361619,22,178.129091,0.148393,A
603979781,1.364478,1.03,2.66,0.567382,67,116.625821,0.135746,A
...,...,...,...,...,...,...,...,...
654314145,0.010000,0.01,0.01,0.000000,802,0.505000,0.000000,A
654314146,0.029781,0.01,0.13,0.016124,1097,3.991800,0.015037,A
654314147,0.238067,0.22,0.27,0.008935,269,6.356394,0.003816,A
654314148,0.213627,0.17,0.28,0.023872,102,42.657304,0.023226,A


In [40]:
df_opts_2 = df_opts[(df_opts['dte'] <= 45) & (df_opts['call_put'] == 'P') & (df_opts['price']>10)].sort_values(['dte', 'strike'])
print(f'df shape: {df_opts_2.shape}')
df_opts_2.head()


df shape: (72370, 22)


Unnamed: 0_level_0,ts_event,rtype,publisher_id,instrument_id,action,side,price,size,flags,ts_in_delta,...,bid_sz_00,ask_sz_00,bid_pb_00,ask_pb_00,symbol,root,expiration,call_put,strike,dte
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-06-06 15:45:00.002189154-04:00,2023-06-06 15:45:00.001978368-04:00,177,30,654312995,A,A,10.02,105,194,0,...,8,105,0,0,SPY 230606P00438000,SPY,2023-06-06,P,438.0,0
2023-06-06 15:45:00.003543603-04:00,2023-06-06 15:45:00.003333376-04:00,177,30,654312995,A,A,10.02,5,194,0,...,8,5,0,0,SPY 230606P00438000,SPY,2023-06-06,P,438.0,0
2023-06-06 15:45:00.003543603-04:00,2023-06-06 15:45:00.003333376-04:00,177,30,654312995,A,A,10.04,8,194,0,...,8,8,0,0,SPY 230606P00438000,SPY,2023-06-06,P,438.0,0
2023-06-06 15:45:00.042445055-04:00,2023-06-06 15:45:00.042233344-04:00,177,30,654312995,A,A,10.04,5,194,0,...,30,5,0,0,SPY 230606P00438000,SPY,2023-06-06,P,438.0,0
2023-06-06 15:45:00.044174683-04:00,2023-06-06 15:45:00.043963392-04:00,177,30,654312995,A,A,10.06,30,194,0,...,8,30,0,0,SPY 230606P00438000,SPY,2023-06-06,P,438.0,0


In [61]:
df_opts_2.expiration.value_counts().sort_index()

expiration
2023-06-06    11038
2023-06-07     9336
2023-06-08     5205
2023-06-09     5700
2023-06-12     5270
2023-06-13     5240
2023-06-14     5013
2023-06-15     3762
2023-06-16     7253
2023-06-20      690
2023-06-23     3118
2023-06-30     2306
2023-07-07     2094
2023-07-14     2586
2023-07-21     3759
Name: count, dtype: int64

In [58]:
df_opts_2[df_opts_2['instrument_id']==654314066].dte.value_counts()

dte
2    104
Name: count, dtype: int64

In [45]:
instrument_id = df_opts_2.instrument_id.unique()
instrument_id


array([654312995, 654312566, 654313452, 654312281, 654311675, 654313170,
       654314054, 654313009, 654313609, 654311686, 654312718, 654314052,
       654313607, 654311808, 654312716, 654314053, 654313606, 654312256,
       654313159, 654312121, 654311829, 654312717, 654313604, 654312278,
       654313169, 654312715, 654314051, 654312255, 654311807, 654312714,
       654313605, 654313156, 654311826, 654314050, 654312277, 654313166,
       654312712, 654313155, 654312713, 654313603, 654313165, 654312711,
       654314048, 654312306, 654312741, 654313966, 654312777, 654313018,
       654313476, 654313463, 654312726, 654312259, 654312290, 654313608,
       654313626, 654314065, 654313171, 654313181, 654313623, 654313927,
       654313168, 654314062, 654312267, 654313178, 654311837, 654314060,
       654312289, 654311812, 654312722, 654311834, 654314059, 654313621,
       654311815, 654314058, 654312286, 654312721, 654312293, 654312285,
       654311811, 654313611, 654311830, 654314056, 

In [8]:
df_opts = df_opts[(df_opts['dte'] >= 30) & (df_opts['dte'] <= 45) & (df_opts['call_put'] == 'P')].sort_values(['dte', 'strike'])
print(f'df shape: {df_opts.shape}')
df_opts.head()


df shape: (146512, 22)


Unnamed: 0_level_0,ts_event,rtype,publisher_id,instrument_id,action,side,price,size,flags,ts_in_delta,...,bid_sz_00,ask_sz_00,bid_pb_00,ask_pb_00,symbol,root,expiration,call_put,strike,dte
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-06-06 15:45:06.138724631-04:00,2023-06-06 15:45:06.138517760-04:00,177,30,637534562,A,A,0.06,5071,194,0,...,622,5071,0,0,SPY 230707P00300000,SPY,2023-07-07,P,300.0,31
2023-06-06 15:45:06.158245424-04:00,2023-06-06 15:45:06.158038784-04:00,177,30,637534562,A,A,0.06,3496,194,0,...,622,3496,0,0,SPY 230707P00300000,SPY,2023-07-07,P,300.0,31
2023-06-06 15:45:06.227109991-04:00,2023-06-06 15:45:06.226903040-04:00,177,30,637534562,A,A,0.06,3880,194,0,...,798,3880,0,0,SPY 230707P00300000,SPY,2023-07-07,P,300.0,31
2023-06-06 15:45:06.291840057-04:00,2023-06-06 15:45:06.291631872-04:00,177,30,637534562,A,A,0.06,3880,194,0,...,622,3880,0,0,SPY 230707P00300000,SPY,2023-07-07,P,300.0,31
2023-06-06 15:45:06.318402457-04:00,2023-06-06 15:45:06.318195200-04:00,177,30,637534562,A,A,0.06,3880,194,0,...,762,3880,0,0,SPY 230707P00300000,SPY,2023-07-07,P,300.0,31


In [9]:
# Get unique timestamps from your filtered options
unique_timestamps = df_opts.index.unique()

# For a specific moment, you can use the exact timestamp
# Since your data is at 2023-06-06 15:45:00-04:00, we'll fetch a narrow window around it
start_time = pd.Timestamp("2023-06-06 15:45:00", tz="US/Eastern")
end_time = pd.Timestamp("2023-06-06 15:46:00", tz="US/Eastern")  # 1 minute window

# Generate cache filename for minute equity data
date_str = start_time.strftime('%Y%m%d')
time_str = start_time.strftime('%H%M')
cache_file = os.path.join(CACHE_DIR, f"equity_minute_{symbol}_{date_str}_{time_str}.parquet")

# Check cache first
if os.path.exists(cache_file):
    print(f"[CACHE HIT] Loading minute equity data for {symbol} on {start_time.date()} at {start_time.time()}")
    equity_df = pd.read_parquet(cache_file)
    print(f"  Loaded {len(equity_df)} minute records")
else:
    print(f"[API] Fetching minute equity data for {symbol} on {start_time.date()} at {start_time.time()}...")

    # Fetch OHLCV data for TSLA at the specific timestamp
    equity_data = client.timeseries.get_range(
        dataset='XNAS.ITCH',  # NASDAQ for TSLA
        symbols=[f'{symbol}'],
        schema='ohlcv-1m',  # 1-minute OHLCV bars
        start=start_time,
        end=end_time,
        stype_in='raw_symbol'
    )

    # Convert to dataframe
    equity_df = equity_data.to_df()
    print(f"[CACHE SAVE] Saved {len(equity_df)} minute records to cache")
    equity_df.to_parquet(cache_file)

print(f"Total: {len(equity_df)} equity records")
equity_df


[CACHE HIT] Loading minute equity data for SPY on 2023-06-06 at 15:45:00
  Loaded 1 minute records
Total: 1 equity records


Unnamed: 0_level_0,rtype,publisher_id,instrument_id,open,high,low,close,volume,symbol
ts_event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-06-06 19:45:00+00:00,33,2,9623,428.02,428.08,427.99,428.005,29023,SPY


In [10]:
import numpy as np
import pandas as pd
from py_vollib.black_scholes.implied_volatility import implied_volatility
from py_vollib.black_scholes.greeks.analytical import delta

r = 0.04  # fixed risk-free rate (4% as decimal for py_vollib)

# 0) Keep only rows that actually have a quote (bid/ask)
quotes = df_opts[df_opts["bid_px_00"].notna() & df_opts["ask_px_00"].notna()].copy()

# 1) Compute mid price per tick
quotes["mid"] = (quotes["bid_px_00"] + quotes["ask_px_00"]) / 2

# 2) Collapse to ONE row per option contract (snapshot at ~3:45 pm)
chain_snapshot = (
    quotes
    .sort_values("ts_event")   # important: so tail(1) is the latest
    .groupby(["symbol", "expiration", "strike", "call_put"])
    .tail(1)                   # last quote for each contract
    .copy()
)
underlying_price = equity_df["close"].iloc[0]   # 15:45 close
chain_snapshot["underlying_last"] = underlying_price


In [11]:
def compute_iv(row):
    price = row["mid"]
    S     = row["underlying_last"]
    K     = row["strike"]
    t     = row["dte"] / 365.0
    flag  = "p" if row["call_put"] == "P" else "c"

    if not (np.isfinite(price) and np.isfinite(S) and np.isfinite(K) and t > 0):
        return np.nan
    if price <= 0 or S <= 0 or K <= 0:
        return np.nan

    try:
        return implied_volatility(price, S, K, t, r, flag)
    except Exception:
        return np.nan


def compute_delta(row):
    sigma = row["iv"]
    if not np.isfinite(sigma):
        return np.nan

    S    = row["underlying_last"]
    K    = row["strike"]
    t    = row["dte"] / 365.0
    flag = "p" if row["call_put"] == "P" else "c"

    return abs(delta(flag, S, K, t, r, sigma))

chain_snapshot["iv"] = chain_snapshot.apply(compute_iv, axis=1)
chain_snapshot["delta"] = chain_snapshot.apply(compute_delta, axis=1)

chain_snapshot.head()

Unnamed: 0_level_0,ts_event,rtype,publisher_id,instrument_id,action,side,price,size,flags,ts_in_delta,...,symbol,root,expiration,call_put,strike,dte,mid,underlying_last,iv,delta
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-06-06 15:45:40.419400236-04:00,2023-06-06 15:45:40.419193088-04:00,177,30,637534903,A,A,0.03,8787,194,0,...,SPY 230721P00210000,SPY,2023-07-21,P,210.0,45,0.025,428.005,0.675841,0.000845
2023-06-06 15:45:48.234771493-04:00,2023-06-06 15:45:48.234565888-04:00,177,30,637535222,A,A,0.1,7343,194,0,...,SPY 230714P00305000,SPY,2023-07-14,P,305.0,38,0.095,428.005,0.423233,0.004942
2023-06-06 15:45:49.405008639-04:00,2023-06-06 15:45:49.404801536-04:00,177,30,637534586,A,A,0.11,8065,194,0,...,SPY 230714P00310000,SPY,2023-07-14,P,310.0,38,0.105,428.005,0.409726,0.005582
2023-06-06 15:45:52.217828686-04:00,2023-06-06 15:45:52.217622016-04:00,177,30,637534711,A,A,0.03,4983,194,0,...,SPY 230721P00215000,SPY,2023-07-21,P,215.0,45,0.025,428.005,0.654846,0.000872
2023-06-06 15:45:52.575009729-04:00,2023-06-06 15:45:52.574802688-04:00,177,30,637534770,A,A,7.73,1024,194,0,...,SPY 230707P00432000,SPY,2023-07-07,P,432.0,31,7.595,428.005,0.125211,0.556959


In [12]:
chain_snapshot.delta.describe()

count    364.000000
mean       0.352222
std        0.348218
min        0.000390
25%        0.041216
50%        0.182942
75%        0.767141
max        0.923370
Name: delta, dtype: float64

In [42]:
chain_snapshot['date'] = chain_snapshot['ts_event'].dt.date

candidates = chain_snapshot[
    (chain_snapshot["call_put"] == "P")
    & chain_snapshot["dte"].between(30, 45)
    & chain_snapshot["delta"].abs().between(0.25, 0.35)
].copy()

candidates[["symbol", "expiration", "strike", "dte", "iv", "delta",'mid']].sort_values(
    ["dte", "strike"]
)
candidates

Unnamed: 0_level_0,ts_event,rtype,publisher_id,instrument_id,action,side,price,size,flags,ts_in_delta,...,root,expiration,call_put,strike,dte,mid,underlying_last,iv,delta,date
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-06-06 15:45:59.404086577-04:00,2023-06-06 15:45:59.403880704-04:00,177,30,637534990,A,B,3.9,544,194,0,...,SPY,2023-07-14,P,421.0,38,3.91,428.005,0.137318,0.312566,2023-06-06
2023-06-06 15:45:59.563591370-04:00,2023-06-06 15:45:59.563385344-04:00,177,30,637534965,A,A,3.09,165,194,0,...,SPY,2023-07-07,P,420.0,31,3.085,428.005,0.137832,0.282752,2023-06-06
2023-06-06 15:45:59.691432218-04:00,2023-06-06 15:45:59.691225344-04:00,177,30,637535385,A,A,4.17,158,194,0,...,SPY,2023-07-14,P,422.0,38,4.16,428.005,0.135585,0.329953,2023-06-06
2023-06-06 15:45:59.691607327-04:00,2023-06-06 15:45:59.691400704-04:00,177,30,637534556,A,A,3.69,150,194,0,...,SPY,2023-07-07,P,422.5,31,3.68,428.005,0.133218,0.329839,2023-06-06
2023-06-06 15:45:59.695747125-04:00,2023-06-06 15:45:59.695540480-04:00,177,30,637534347,A,A,3.32,252,194,0,...,SPY,2023-07-07,P,421.0,31,3.31,428.005,0.135975,0.300843,2023-06-06
2023-06-06 15:45:59.705935367-04:00,2023-06-06 15:45:59.705729792-04:00,177,30,637534533,A,B,4.67,360,194,0,...,SPY,2023-07-21,P,422.0,45,4.675,428.005,0.136085,0.336219,2023-06-06
2023-06-06 15:45:59.707323626-04:00,2023-06-06 15:45:59.707118080-04:00,177,30,637534545,A,B,3.96,523,194,0,...,SPY,2023-07-21,P,419.0,45,3.965,428.005,0.141265,0.290159,2023-06-06
2023-06-06 15:45:59.725496937-04:00,2023-06-06 15:45:59.725291264-04:00,177,30,637534580,A,B,3.26,359,194,0,...,SPY,2023-07-14,P,418.0,38,3.27,428.005,0.14302,0.265696,2023-06-06
2023-06-06 15:45:59.798637478-04:00,2023-06-06 15:45:59.798430976-04:00,177,30,637534275,A,B,4.18,744,194,0,...,SPY,2023-07-21,P,420.0,45,4.19,428.005,0.13957,0.304888,2023-06-06
2023-06-06 15:45:59.808320450-04:00,2023-06-06 15:45:59.808113920-04:00,177,30,637535167,A,B,3.67,441,194,0,...,SPY,2023-07-14,P,420.0,38,3.68,428.005,0.139154,0.296084,2023-06-06


In [43]:
backtest_candidates = candidates.copy()
backtest_candidates['cost_basis'] = backtest_candidates['underlying_last']*100 - backtest_candidates['mid']*100
backtest_candidates['per_share_premium'] = backtest_candidates['mid']
backtest_candidates['premium'] = backtest_candidates['per_share_premium']*100
backtest_candidates['exit_pct'] = 0.75  # Exit when option is at 25% of original premium (we keep 75% as profit)
backtest_candidates['exit_price_per_share'] = backtest_candidates['per_share_premium']*backtest_candidates['exit_pct']  # Buy back at 25% of premium
backtest_candidates = backtest_candidates[['symbol', 'cost_basis', 'premium', 'exit_pct','exit_price_per_share','date','dte','expiration','mid','strike']]
backtest_candidates

Unnamed: 0_level_0,symbol,cost_basis,premium,exit_pct,exit_price_per_share,date,dte,expiration,mid,strike
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-06-06 15:45:59.404086577-04:00,SPY 230714P00421000,42409.5,391.0,0.75,2.9325,2023-06-06,38,2023-07-14,3.91,421.0
2023-06-06 15:45:59.563591370-04:00,SPY 230707P00420000,42492.0,308.5,0.75,2.31375,2023-06-06,31,2023-07-07,3.085,420.0
2023-06-06 15:45:59.691432218-04:00,SPY 230714P00422000,42384.5,416.0,0.75,3.12,2023-06-06,38,2023-07-14,4.16,422.0
2023-06-06 15:45:59.691607327-04:00,SPY 230707P00422500,42432.5,368.0,0.75,2.76,2023-06-06,31,2023-07-07,3.68,422.5
2023-06-06 15:45:59.695747125-04:00,SPY 230707P00421000,42469.5,331.0,0.75,2.4825,2023-06-06,31,2023-07-07,3.31,421.0
2023-06-06 15:45:59.705935367-04:00,SPY 230721P00422000,42333.0,467.5,0.75,3.50625,2023-06-06,45,2023-07-21,4.675,422.0
2023-06-06 15:45:59.707323626-04:00,SPY 230721P00419000,42404.0,396.5,0.75,2.97375,2023-06-06,45,2023-07-21,3.965,419.0
2023-06-06 15:45:59.725496937-04:00,SPY 230714P00418000,42473.5,327.0,0.75,2.4525,2023-06-06,38,2023-07-14,3.27,418.0
2023-06-06 15:45:59.798637478-04:00,SPY 230721P00420000,42381.5,419.0,0.75,3.1425,2023-06-06,45,2023-07-21,4.19,420.0
2023-06-06 15:45:59.808320450-04:00,SPY 230714P00420000,42432.5,368.0,0.75,2.76,2023-06-06,38,2023-07-14,3.68,420.0


In [44]:
def fetch_daily_prices_for_option(symbol, entry_date, expiration_date, client, config):
    """
    Fetch daily OHLC prices for an option from entry date to expiration.

    Args:
        symbol: Option symbol
        entry_date: Entry date (normalized)
        expiration_date: Expiration date (normalized)
        client: Databento client
        config: Configuration dict

    Returns:
        DataFrame with daily OHLC data
    """
    # Generate cache filename for daily option prices
    entry_str = entry_date.strftime('%Y%m%d')
    exp_str = expiration_date.strftime('%Y%m%d')
    cache_file = os.path.join(CACHE_DIR, f"option_daily_{symbol}_{entry_str}_{exp_str}.parquet")

    # Check cache first
    if os.path.exists(cache_file):
        print(f"    [CACHE HIT] Loading daily prices for {symbol}")
        return pd.read_parquet(cache_file)

    # Cache miss - fetch from API
    print(f"    [API] Fetching daily prices for {symbol} from {entry_date.date()} to {expiration_date.date()}")

    start_daily = entry_date + pd.Timedelta(days=1)  # Day after entry
    end_daily = expiration_date + pd.Timedelta(days=1)  # Include expiration day

    daily_data = client.timeseries.get_range(
        dataset='OPRA.PILLAR',
        schema='ohlcv-1d',
        symbols=symbol,
        stype_in='raw_symbol',
        start=start_daily,
        end=end_daily,
    )

    df_daily = daily_data.to_df(tz=config['timezone'])

    # Save to cache
    df_daily.to_parquet(cache_file)
    print(f"    [CACHE SAVE] Saved {len(df_daily)} days to cache")

    return df_daily


def check_profit_target_hit(df_daily, exit_price_per_share, entry_date):
    """
    Check if the exit price target was hit in the daily price data.

    Args:
        df_daily: DataFrame with daily OHLC data (prices are per-share)
        exit_price_per_share: Target price per share to exit at
        entry_date: Entry date to skip (we can't exit same day we entered)

    Returns:
        tuple: (hit_date, daily_row) if hit, (None, None) if not hit
    """
    for check_date, daily_row in df_daily.iterrows():
        # Skip the entry date - we can't exit on the same day we entered
        check_date_normalized = check_date.tz_localize(None) if hasattr(check_date, 'tz_localize') and check_date.tz else check_date
        if check_date_normalized.date() <= entry_date.date():
            continue
            
        daily_low = daily_row['low']
        daily_high = daily_row['high']

        # Check if our exit target (per-share) is within the daily range
        if daily_low <= exit_price_per_share <= daily_high:
            return check_date, daily_row

    return None, None


def create_exit_record(symbol, entry_date, expiration_date, premium, exit_pct,
                       exit_price, exit_reason, check_date, daily_row, cost_basis):
    """
    Create an exit record dictionary.

    Args:
        symbol: Option symbol
        entry_date: Entry date
        expiration_date: Expiration date
        premium: Premium received
        exit_pct: Exit percentage (e.g., 0.25 = exit when decays 25%)
        exit_price: Actual exit price
        exit_reason: Reason for exit
        check_date: Date of exit
        daily_row: Daily price data row
        cost_basis: Cost basis (strike * 100)

    Returns:
        dict: Exit record
    """
    return {
        'symbol': symbol,
        'entry_date': entry_date,
        'exit_date': check_date.tz_localize(None) if hasattr(check_date, 'tz_localize') and check_date.tz else check_date,
        'expiration': expiration_date,
        'cost_basis': cost_basis,
        'premium': premium,
        'exit_pct': exit_pct,
        'exit_price': exit_price,
        'exit_reason': exit_reason,
        'days_held': (check_date.tz_localize(None) - entry_date).days if check_date else None,
        'daily_low': daily_row['low'] if daily_row is not None else None,
        'daily_high': daily_row['high'] if daily_row is not None else None,
    }


def calculate_pnl_metrics(exits_df):
    """
    Calculate P&L metrics for exit results.

    Args:
        exits_df: DataFrame with exit records

    Returns:
        DataFrame with P&L metrics added
    """
    if len(exits_df) > 0:
        exits_df = exits_df.copy()
        exits_df['exit_pnl'] = exits_df['premium'] - exits_df['exit_price']
        exits_df['exit_pnl_pct'] = (exits_df['exit_pnl'] / exits_df['premium']) * 100
        exits_df['roc'] = (exits_df['exit_pnl'] / exits_df['cost_basis']) * 100

    return exits_df


def backtest_exit_strategy(backtest_candidates, client, config):
    """
    Backtest exit strategy for wheel options

    Exit conditions:
    1. Profit target: Exit when option price <= premium * (1 - exit_pct)
       - If daily range contains exit_price_target, assume we exited at that exact price

    Args:
        backtest_candidates: DataFrame with options to backtest
        client: Databento client
        config: Configuration dict

    Returns:
        DataFrame with exit results
    """
    exits = []

    for idx, row in backtest_candidates.iterrows():
        symbol = row['symbol']

        # Normalize dates
        entry_date = pd.Timestamp(row['date']).tz_localize(None)
        expiration_date = pd.Timestamp(row['expiration']).tz_localize(None)

        # Entry details - work with per-share prices for comparison, contract prices for P&L
        premium_per_share = row['mid']
        premium = premium_per_share * 100  # Contract premium (100 shares per contract)
        exit_pct = row['exit_pct']  # e.g., 0.25 = exit when option is at 25% of original premium
        exit_price_per_share = premium_per_share * exit_pct  # Per-share exit price (buy back at this price)
        exit_price_contract = exit_price_per_share * 100  # Contract exit price for P&L
        cost_basis = row['strike'] * 100  # Contract cost basis

        print(f"\nProcessing {symbol}...")
        print(f"  Entry: {entry_date.date()}, Premium: ${premium:.2f} (${premium_per_share:.2f}/share)")
        print(f"  Exit target: ${exit_price_contract:.2f} (${exit_price_per_share:.2f}/share, exit at {exit_pct*100:.0f}% of premium)")

        try:
            # Fetch daily prices
            df_daily = fetch_daily_prices_for_option(symbol, entry_date, expiration_date, client, config)

            # Check for profit target hit (using per-share prices, skipping entry date)
            hit_date, daily_row = check_profit_target_hit(df_daily, exit_price_per_share, entry_date)

            if hit_date:
                # Profit target hit - record exit at contract level
                exit_record = create_exit_record(
                    symbol, entry_date, expiration_date, premium, exit_pct,
                    exit_price_contract, 'profit_target', hit_date, daily_row, cost_basis
                )
                exits.append(exit_record)

                print(f"  ✓ Profit target hit on {hit_date.date()} @ ${exit_price_contract:.2f} (${exit_price_per_share:.2f}/share)")
                print(f"    (Daily range: ${daily_row['low']:.2f} - ${daily_row['high']:.2f} per share)")
            else:
                # Option expired worthless - this is a WIN for CSP sellers!
                # Keep 100% of premium
                exit_record = create_exit_record(
                    symbol, entry_date, expiration_date, premium, exit_pct,
                    0.0, 'expired_worthless', expiration_date, None, cost_basis
                )
                exits.append(exit_record)
                print(f"  🎉 Option expired worthless on {expiration_date.date()} - KEEP 100% PREMIUM!")

        except Exception as e:
            print(f"  ✗ Error: {e}")
            import traceback
            traceback.print_exc()
            continue

    # Create results DataFrame and calculate P&L
    exits_df = pd.DataFrame(exits)
    exits_df = calculate_pnl_metrics(exits_df)

    return exits_df

# Configuration
CONFIG = {
    'timezone': 'America/New_York',
}

# Run backtest
exits_df = backtest_exit_strategy(
    backtest_candidates=backtest_candidates,
    client=client,
    config=CONFIG
)

# Display results
print("\n" + "="*60)
print("BACKTEST RESULTS")
print("="*60)
print(f"\nTotal exits: {len(exits_df)}")

if len(exits_df) > 0:
    print(f"\nExit reasons:")
    print(exits_df['exit_reason'].value_counts())
    print(f"\nP&L Summary:")
    print(exits_df[['exit_pnl', 'exit_pnl_pct', 'roc']].describe())
    
    # Show sample
    print("\nSample exits:")
    print(exits_df[['symbol', 'entry_date', 'exit_date', 'premium', 'exit_price', 
                   'exit_pnl', 'roc', 'exit_reason']].head(10))
else:
    print("\n⚠ No exits recorded - check for errors above")



Processing SPY   230714P00421000...
  Entry: 2023-06-06, Premium: $391.00 ($3.91/share)
  Exit target: $293.25 ($2.93/share, exit at 75% of premium)
    [API] Fetching daily prices for SPY   230714P00421000 from 2023-06-06 to 2023-07-14
    [CACHE SAVE] Saved 291 days to cache
  ✓ Profit target hit on 2023-06-08 @ $293.25 ($2.93/share)
    (Daily range: $2.80 - $3.23 per share)

Processing SPY   230707P00420000...
  Entry: 2023-06-06, Premium: $308.50 ($3.08/share)
  Exit target: $231.37 ($2.31/share, exit at 75% of premium)
    [API] Fetching daily prices for SPY   230707P00420000 from 2023-06-06 to 2023-07-07
    [CACHE SAVE] Saved 313 days to cache
  ✓ Profit target hit on 2023-06-08 @ $231.37 ($2.31/share)
    (Daily range: $2.03 - $2.54 per share)

Processing SPY   230714P00422000...
  Entry: 2023-06-06, Premium: $416.00 ($4.16/share)
  Exit target: $312.00 ($3.12/share, exit at 75% of premium)
    [API] Fetching daily prices for SPY   230714P00422000 from 2023-06-06 to 2023-07-1

In [47]:
exits_df.round(2)

Unnamed: 0,symbol,entry_date,exit_date,expiration,cost_basis,premium,exit_pct,exit_price,exit_reason,days_held,daily_low,daily_high,exit_pnl,exit_pnl_pct,roc
0,SPY 230714P00421000,2023-06-06,2023-06-08 20:00:00,2023-07-14,42100.0,391.0,0.75,293.25,profit_target,2,2.8,3.23,97.75,25.0,0.23
1,SPY 230707P00420000,2023-06-06,2023-06-08 20:00:00,2023-07-07,42000.0,308.5,0.75,231.37,profit_target,2,2.03,2.54,77.13,25.0,0.18
2,SPY 230714P00422000,2023-06-06,2023-06-08 20:00:00,2023-07-14,42200.0,416.0,0.75,312.0,profit_target,2,3.12,3.42,104.0,25.0,0.25
3,SPY 230707P00422500,2023-06-06,2023-06-08 20:00:00,2023-07-07,42250.0,368.0,0.75,276.0,profit_target,2,2.74,2.85,92.0,25.0,0.22
4,SPY 230707P00421000,2023-06-06,2023-06-08 20:00:00,2023-07-07,42100.0,331.0,0.75,248.25,profit_target,2,2.25,2.68,82.75,25.0,0.2
5,SPY 230721P00422000,2023-06-06,2023-06-08 20:00:00,2023-07-21,42200.0,467.5,0.75,350.62,profit_target,2,3.45,4.17,116.88,25.0,0.28
6,SPY 230721P00419000,2023-06-06,2023-06-08 20:00:00,2023-07-21,41900.0,396.5,0.75,297.38,profit_target,2,2.91,3.47,99.12,25.0,0.24
7,SPY 230714P00418000,2023-06-06,2023-06-08 20:00:00,2023-07-14,41800.0,327.0,0.75,245.25,profit_target,2,2.4,2.75,81.75,25.0,0.2
8,SPY 230721P00420000,2023-06-06,2023-06-08 20:00:00,2023-07-21,42000.0,419.0,0.75,314.25,profit_target,2,3.05,3.75,104.75,25.0,0.25
9,SPY 230714P00420000,2023-06-06,2023-06-08 20:00:00,2023-07-14,42000.0,368.0,0.75,276.0,profit_target,2,2.68,3.11,92.0,25.0,0.22


In [46]:
100*(exits_df.exit_pnl.sum()/exits_df.cost_basis.sum())

np.float64(0.2210731504077138)