In [None]:
# Now calculate delta with the actual underlying prices
from py_vollib.black_scholes.greeks.analytical import delta
import numpy as np

def calculate_delta_with_price(row):
    """
    Calculate delta using Black-Scholes model with actual underlying price
    """
    try:
        S = row['underlying_last']  # Actual stock price at the same timestamp
        K = row['strike']
        t = row['dte'] / 365.0  # Convert days to years
        r = 0.05  # 5% risk-free rate (you can adjust based on 2023 rates)
        
        # Using 30% default volatility - we can improve this later
        # by calculating historical volatility or implied volatility
        sigma = 0.30
        
        flag = 'p'  # Put option
        
        return delta(flag, S, K, t, r, sigma)
    except Exception as e:
        return np.nan

# Calculate delta
filtered_df['delta'] = filtered_df.apply(calculate_delta_with_price, axis=1)

# Display results with moneyness indicator
filtered_df['moneyness'] = filtered_df['strike'] / filtered_df['underlying_last']

filtered_df[['symbol', 'expiration', 'strike', 'dte', 'underlying_last', 'moneyness', 'delta']].sort_values(['dte', 'delta'])

In [None]:
# Merge equity prices with options data
# We'll use the close price from the equity data as the underlying_last price

# Make sure equity_df has ts_event as index if not already
if 'ts_event' in equity_df.columns:
    equity_df = equity_df.set_index('ts_event')

# Extract just the close price and rename for clarity
equity_prices = equity_df[['close']].rename(columns={'close': 'underlying_last'})

# Merge with filtered options data using the timestamp index
filtered_df = filtered_df.merge(
    equity_prices,
    left_index=True,
    right_index=True,
    how='left'
)

print(f"Merged data shape: {filtered_df.shape}")
print(f"Missing underlying prices: {filtered_df['underlying_last'].isna().sum()}")
filtered_df[['symbol', 'strike', 'dte', 'underlying_last']].head(10)

In [None]:
import databento as db

# Initialize client (make sure your API key is set)
client = db.Historical()

# Get the date range from filtered data
start_date = unique_timestamps.min().date()
end_date = unique_timestamps.max().date()

# Fetch OHLCV data for AAPL at the same timestamps
# Using OHLCV-1S (1-second bars) or trades to get precise 15:45:00 prices
equity_data = client.timeseries.get_range(
    dataset='XNAS.ITCH',  # NASDAQ for AAPL
    symbols=['AAPL'],
    schema='ohlcv-1m',  # 1-minute OHLCV bars
    start=start_date,
    end=end_date,
    stype_in='raw_symbol'
)

# Convert to dataframe
equity_df = equity_data.to_df()
print(f"Fetched {len(equity_df)} equity records")
equity_df.head(20)

In [None]:
# Get unique timestamps and symbols from filtered options
unique_timestamps = filtered_df.index.unique()
unique_symbols = filtered_df['symbol'].str.split().str[0].unique()  # Extract underlying symbols (e.g., 'AAPL' from 'AAPL 230707P00180000')

print(f"Unique timestamps: {len(unique_timestamps)}")
print(f"Unique underlying symbols: {unique_symbols}")
print(f"Date range: {unique_timestamps.min()} to {unique_timestamps.max()}")

In [None]:
from py_vollib.black_scholes import black_scholes as bs
from py_vollib.black_scholes.greeks.analytical import delta
import numpy as np

# Filter the options we want
filtered_df = df[(df['dte'] >= 30) & (df['dte'] <= 45) & (df['call_put'] == 'P')].sort_values(['dte', 'strike']).copy()

# Function to calculate delta for each option
def calculate_delta(row):
    """
    Calculate delta using Black-Scholes model
    
    Parameters:
    - S: underlying price (spot price)
    - K: strike price
    - t: time to expiration (in years)
    - r: risk-free rate (assume 0.05 or 5%)
    - sigma: implied volatility (we'll need to estimate or use a default)
    - flag: 'c' for call, 'p' for put
    """
    try:
        S = row['underlying_last']  # Current stock price - adjust column name as needed
        K = row['strike']
        t = row['dte'] / 365.0  # Convert days to years
        r = 0.05  # 5% risk-free rate (you can adjust this)
        
        # Estimate volatility from bid-ask spread or use a default
        # For now, using a default of 30% annualized volatility
        sigma = 0.30  # You may want to calculate this from historical data or implied vol
        
        flag = 'p'  # Put option
        
        # Calculate delta
        return delta(flag, S, K, t, r, sigma)
    except Exception as e:
        return np.nan

# Add delta column
filtered_df['delta'] = filtered_df.apply(calculate_delta, axis=1)

# Display results
filtered_df[['expiration', 'strike', 'dte', 'underlying_last', 'delta']]

In [None]:
# Install py_vollib for options Greeks calculations
# !pip install py_vollib

In [27]:
from pathlib import Path
from dotenv import dotenv_values, load_dotenv
import os

env_path = Path("/Users/samuelminer/Projects/nissan_options/wheel_strategy/.env")

print("Parsed keys:", dotenv_values(env_path).keys())

load_dotenv(env_path, override=True)
print("os.getenv:", bool(os.getenv("DATABENTO_API_KEY")))


Parsed keys: odict_keys(['DATABENTO_API_KEY'])
os.getenv: True


In [28]:
import sys
sys.executable

from dotenv import load_dotenv
import os

load_dotenv()  # loads .env from current working directory

assert os.getenv("DATABENTO_API_KEY"), "DATABENTO_API_KEY still not found"



In [29]:
import os
import pandas as pd
import databento as db
import pandas_market_calendars as mcal

client = db.Historical()


In [38]:
import databento as db
client = db.Historical()

datasets = client.metadata.list_datasets()
print("Dataset count:", len(datasets))
datasets[:50]


Dataset count: 27


['ARCX.PILLAR',
 'BATS.PITCH',
 'BATY.PITCH',
 'DBEQ.BASIC',
 'EDGA.PITCH',
 'EDGX.PITCH',
 'EPRL.DOM',
 'EQUS.MINI',
 'EQUS.SUMMARY',
 'GLBX.MDP3',
 'IEXG.TOPS',
 'IFEU.IMPACT',
 'IFLL.IMPACT',
 'IFUS.IMPACT',
 'MEMX.MEMOIR',
 'NDEX.IMPACT',
 'OPRA.PILLAR',
 'XASE.PILLAR',
 'XBOS.ITCH',
 'XCHI.PILLAR',
 'XCIS.TRADESBBO',
 'XEEE.EOBI',
 'XEUR.EOBI',
 'XNAS.BASIC',
 'XNAS.ITCH',
 'XNYS.PILLAR',
 'XPSX.ITCH']

In [39]:
supports_1m = []
for ds in datasets:
    try:
        schemas = client.metadata.list_schemas(ds)
        if "ohlcv-1m" in schemas:
            supports_1m.append(ds)
    except Exception as e:
        pass

print("Datasets with ohlcv-1m:", supports_1m)


Datasets with ohlcv-1m: ['ARCX.PILLAR', 'BATS.PITCH', 'BATY.PITCH', 'DBEQ.BASIC', 'EDGA.PITCH', 'EDGX.PITCH', 'EPRL.DOM', 'EQUS.MINI', 'GLBX.MDP3', 'IEXG.TOPS', 'IFEU.IMPACT', 'IFLL.IMPACT', 'IFUS.IMPACT', 'MEMX.MEMOIR', 'NDEX.IMPACT', 'OPRA.PILLAR', 'XASE.PILLAR', 'XBOS.ITCH', 'XCHI.PILLAR', 'XCIS.TRADESBBO', 'XEEE.EOBI', 'XEUR.EOBI', 'XNAS.BASIC', 'XNAS.ITCH', 'XNYS.PILLAR', 'XPSX.ITCH']


In [41]:
import pandas as pd
import databento as db
import pandas_market_calendars as mcal

client = db.Historical()
tz = "America/New_York"

dataset = "EQUS.MINI"
schema = "ohlcv-1m"
symbol = "AAPL"

# IMPORTANT: cap end to last completed UTC day
available_end = pd.Timestamp.utcnow().normalize()
available_end


Timestamp('2025-12-12 00:00:00+0000', tz='UTC')

In [42]:
end_date = (available_end - pd.Timedelta(days=1)).date()
start_date = (pd.Timestamp(end_date) - pd.Timedelta(days=31)).date()

nyse = mcal.get_calendar("NYSE")
schedule = nyse.schedule(start_date=start_date, end_date=end_date)


/var/folders/6k/0v57cgbd2k37vp0lh44zby640000gn/T/ipykernel_16246/1135243470.py:7: BentoWarning: No data found for the request you submitted.
  store = client.timeseries.get_range(
    

In [117]:
import databento as db
import pandas as pd

# Uses DATABENTO_API_KEY from environment
client = db.Historical()

symbol = "AAPL"
dataset = "EQUS.MINI"     # consolidated US equities (best choice)
schema = "ohlcv-1d"       # DAILY bars
tz = "America/New_York"
days = 252*2

end = pd.Timestamp.utcnow().normalize() - pd.Timedelta(days=1)
start = end - pd.Timedelta(days=252*2)

data = client.timeseries.get_range(
    dataset=dataset,
    symbols=symbol,
    schema=schema,
    stype_in="raw_symbol",
    start=start,
    end=end,
)


  data = client.timeseries.get_range(


In [130]:
df = data.to_df(tz=tz)#.reset_index(names="date").set_index('date',inplace=True)
df

Unnamed: 0_level_0,rtype,publisher_id,instrument_id,open,high,low,close,volume,symbol
ts_event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-07-25 20:00:00-04:00,35,95,38,219.07,220.020,216.020,217.88,1652109,AAPL
2024-07-28 20:00:00-04:00,35,95,38,217.68,219.280,215.790,217.05,1106068,AAPL
2024-07-29 20:00:00-04:00,35,95,38,218.86,220.340,216.120,218.31,1342250,AAPL
2024-07-30 20:00:00-04:00,35,95,38,220.16,223.890,219.850,223.79,1612752,AAPL
2024-07-31 20:00:00-04:00,35,95,38,223.90,224.820,212.500,219.60,2939539,AAPL
...,...,...,...,...,...,...,...,...,...
2025-12-04 19:00:00-05:00,35,95,38,280.56,281.130,278.050,279.39,1113423,AAPL
2025-12-07 19:00:00-05:00,35,95,38,277.97,279.630,276.170,277.36,1403891,AAPL
2025-12-08 19:00:00-05:00,35,95,38,278.72,280.030,276.920,276.94,961735,AAPL
2025-12-09 19:00:00-05:00,35,95,38,277.25,279.745,276.470,279.11,922187,AAPL


In [135]:
df.index.name = 'date'
df.head()


Unnamed: 0_level_0,rtype,publisher_id,instrument_id,open,high,low,close,volume,symbol
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-07-25 20:00:00-04:00,35,95,38,219.07,220.02,216.02,217.88,1652109,AAPL
2024-07-28 20:00:00-04:00,35,95,38,217.68,219.28,215.79,217.05,1106068,AAPL
2024-07-29 20:00:00-04:00,35,95,38,218.86,220.34,216.12,218.31,1342250,AAPL
2024-07-30 20:00:00-04:00,35,95,38,220.16,223.89,219.85,223.79,1612752,AAPL
2024-07-31 20:00:00-04:00,35,95,38,223.9,224.82,212.5,219.6,2939539,AAPL


In [161]:
import pandas as pd

# df is your daily OHLCV dataframe with a 'date' column and a 'close' column
# If you still have date as the index, do: df = df.reset_index(names="date")

df_bb = df.copy().sort_values("date")

window = 20
k = 2.0  # 2-sigma Bollinger Bands

# Rolling stats on close
roll = df_bb["close"].rolling(window=window, min_periods=window)
df_bb["sma20"] = roll.mean()
df_bb["std20"] = roll.std(ddof=0)

# Bollinger Bands
df_bb["bb_upper"] = df_bb["sma20"] + k * df_bb["std20"]
df_bb["bb_lower"] = df_bb["sma20"] - k * df_bb["std20"]

# Optional: Bollinger %B and Bandwidth
df_bb["bb_pctb"] = (df_bb["close"] - df_bb["bb_lower"]) / (df_bb["bb_upper"] - df_bb["bb_lower"])
df_bb["bb_bandwidth"] = (df_bb["bb_upper"] - df_bb["bb_lower"]) / df_bb["sma20"]

# # Keep the most useful columns for strategy work
# df_bb_out = df_bb[["date", "symbol", "open", "high", "low", "close", "volume",
#                    "sma20", "bb_upper", "bb_lower", "bb_pctb", "bb_bandwidth"]]

# df_bb_out.tail(30)


In [162]:
df_bb

Unnamed: 0_level_0,rtype,publisher_id,instrument_id,open,high,low,close,volume,symbol,sma20,std20,bb_upper,bb_lower,bb_pctb,bb_bandwidth
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2024-07-25 20:00:00-04:00,35,95,38,219.07,220.020,216.020,217.88,1652109,AAPL,,,,,,
2024-07-28 20:00:00-04:00,35,95,38,217.68,219.280,215.790,217.05,1106068,AAPL,,,,,,
2024-07-29 20:00:00-04:00,35,95,38,218.86,220.340,216.120,218.31,1342250,AAPL,,,,,,
2024-07-30 20:00:00-04:00,35,95,38,220.16,223.890,219.850,223.79,1612752,AAPL,,,,,,
2024-07-31 20:00:00-04:00,35,95,38,223.90,224.820,212.500,219.60,2939539,AAPL,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-12-04 19:00:00-05:00,35,95,38,280.56,281.130,278.050,279.39,1113423,AAPL,274.8840,5.739911,286.363821,263.404179,0.696257,0.083525
2025-12-07 19:00:00-05:00,35,95,38,277.97,279.630,276.170,277.36,1403891,AAPL,275.3170,5.581534,286.480068,264.153932,0.591507,0.081092
2025-12-08 19:00:00-05:00,35,95,38,278.72,280.030,276.920,276.94,961735,AAPL,275.7015,5.412667,286.526834,264.876166,0.557204,0.078529
2025-12-09 19:00:00-05:00,35,95,38,277.25,279.745,276.470,279.11,922187,AAPL,275.8750,5.463293,286.801585,264.948415,0.648033,0.079214


In [163]:
df_equity_entry = df_bb.copy()[['close','sma20','bb_lower']].dropna()
df_equity_entry['sma_entry'] = df_equity_entry['close'] <= df_equity_entry['sma20']
df_equity_entry['bb_entry'] = df_equity_entry['close'] <= df_equity_entry['bb_lower']
df_equity_entry[['sma_entry']].value_counts()
df_equity_entry[['bb_entry']].value_counts()


bb_entry
False       311
True         17
Name: count, dtype: int64

In [170]:
df_equity_entry[df_equity_entry['sma_entry'] | df_equity_entry['bb_entry']].index.tolist()[:5]

[Timestamp('2024-09-02 20:00:00-0400', tz='America/New_York'),
 Timestamp('2024-09-03 20:00:00-0400', tz='America/New_York'),
 Timestamp('2024-09-04 20:00:00-0400', tz='America/New_York'),
 Timestamp('2024-09-05 20:00:00-0400', tz='America/New_York'),
 Timestamp('2024-09-08 20:00:00-0400', tz='America/New_York')]

In [197]:
import databento as db
import pandas as pd

client = db.Historical()

dataset = "OPRA.PILLAR"
schema = "cmbp-1"
tz = "America/New_York"

start = pd.Timestamp("2023-06-06 15:45", tz=tz)
end   = start + pd.Timedelta(minutes=1)

data = client.timeseries.get_range(
    dataset=dataset,
    schema=schema,
    symbols="AAPL.OPT",     # ✅ parent symbology format
    stype_in="parent",      # ✅ parent lookup
    start=start,
    end=end,
)

df_opts = data.to_df(tz=tz).sort_values("ts_event")
df_opts.head()


Unnamed: 0_level_0,ts_event,rtype,publisher_id,instrument_id,action,side,price,size,flags,ts_in_delta,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_pb_00,ask_pb_00,symbol
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2023-06-06 15:45:00.000108703-04:00,2023-06-06 15:44:59.999900672-04:00,177,30,3613,A,B,30.85,65,194,0,30.85,31.1,65,82,0,0,AAPL 241220C00175000
2023-06-06 15:45:00.000224931-04:00,2023-06-06 15:45:00.000014336-04:00,177,30,16777300,A,A,27.55,254,194,0,25.8,27.55,293,254,0,0,AAPL 240119P00205000
2023-06-06 15:45:00.000445184-04:00,2023-06-06 15:45:00.000235008-04:00,177,30,16783012,A,A,16.55,155,194,0,16.2,16.55,115,155,0,0,AAPL 241220P00175000
2023-06-06 15:45:00.000534787-04:00,2023-06-06 15:45:00.000326656-04:00,177,30,1348,A,A,1.83,79,194,0,1.81,1.83,24,79,0,0,AAPL 230623C00182500
2023-06-06 15:45:00.000595483-04:00,2023-06-06 15:45:00.000387328-04:00,177,30,17,A,B,0.67,235,194,0,0.67,0.69,235,382,0,0,AAPL 230616C00185000


In [220]:
data = client.timeseries.get_range(
    dataset="OPRA.PILLAR",
    schema="ohlcv-1m",
    symbols="AAPL.OPT",
    stype_in="parent",
    start=start,          # 15:45
    end=end,              # 15:46
)

df_ohlc = data.to_df(tz="America/New_York")


In [221]:
df_ohlc.head()

Unnamed: 0_level_0,rtype,publisher_id,instrument_id,open,high,low,close,volume,symbol
ts_event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-06-06 15:45:00-04:00,33,26,2062,0.57,0.57,0.57,0.57,1,AAPL 230630C00190000
2023-06-06 15:45:00-04:00,33,24,4576,3.55,3.55,3.55,3.55,1,AAPL 230630C00180000
2023-06-06 15:45:00-04:00,33,29,2865,1.07,1.07,1.07,1.07,1,AAPL 230623C00185000
2023-06-06 15:45:00-04:00,33,24,16778402,1.65,1.65,1.65,1.65,6,AAPL 230616P00177500
2023-06-06 15:45:00-04:00,33,29,16788557,3.95,3.95,3.95,3.95,1,AAPL 230707P00180000


df_ohlc.head()

In [222]:
def parse_opra_symbol(sym: str):
    # Example: "AAPL 240119P00205000"
    root, rest = sym.split()
    expiration = pd.to_datetime(rest[:6], format="%y%m%d")
    call_put = rest[6]
    strike = int(rest[7:]) / 1000
    return expiration, call_put, strike

df_ohlc = df_ohlc.copy()

df_ohlc[["expiration", "call_put", "strike"]] = (
    df_ohlc["symbol"]
    .apply(parse_opra_symbol)
    .apply(pd.Series)
)

In [228]:
df = df_ohlc.copy()

event_midnight = pd.to_datetime(df.index.tz_convert("America/New_York").date)
exp_midnight = pd.to_datetime(df["expiration"]).dt.normalize()

df["dte"] = (exp_midnight - event_midnight).dt.days


In [236]:
df[(df['dte'] >= 30) & (df['dte'] <= 45) & (df['call_put'] == 'P')].sort_values(['dte', 'strike'])

Unnamed: 0_level_0,rtype,publisher_id,instrument_id,open,high,low,close,volume,symbol,expiration,call_put,strike,dte
ts_event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2023-06-06 15:45:00-04:00,33,29,16788557,3.95,3.95,3.95,3.95,1,AAPL 230707P00180000,2023-07-07,P,180.0,31
2023-06-06 15:45:00-04:00,33,29,16788433,0.79,0.8,0.79,0.8,3,AAPL 230714P00165000,2023-07-14,P,165.0,38
2023-06-06 15:45:00-04:00,33,22,16780008,0.6,0.6,0.6,0.6,5,AAPL 230721P00160000,2023-07-21,P,160.0,45
2023-06-06 15:45:00-04:00,33,26,16789850,1.64,1.64,1.64,1.64,7,AAPL 230721P00170000,2023-07-21,P,170.0,45
2023-06-06 15:45:00-04:00,33,22,16777453,4.57,4.57,4.57,4.57,5,AAPL 230721P00180000,2023-07-21,P,180.0,45
2023-06-06 15:45:00-04:00,33,26,16777453,4.55,4.55,4.55,4.55,7,AAPL 230721P00180000,2023-07-21,P,180.0,45
