In [76]:
import pandas as pd
import numpy as np
from datetime import datetime

pd.set_option("display.max_rows", None)

In [77]:
def parse_option_symbol(symbol):
    """
    example: SPX   240419C00800000
    """
    numbers = symbol.split(" ")[3]
    exp_date = datetime.strptime(numbers[:6], "%y%m%d")  # Convert to datetime object
    action = numbers[6]  # Extract the action ('C' or 'P')
    strike_price = float(numbers[7:]) / 1000  # Extract and convert the strike price
    return exp_date, action, strike_price

In [78]:
options = pd.read_csv(r"..\data\cleaned_options_data.csv")

In [79]:
options["datetime"] = pd.to_datetime(options["ts_recv"], format="%Y-%m-%dT%H:%M:%S.%fZ")
parsed_features = options["symbol"].apply(parse_option_symbol)
# Create new columns by unpacking the parsed features

(
    options["exp_date"],
    options["action"],
    options["strike_price"],
) = zip(*parsed_features)

options = options.rename(
    columns={
        "bid_px_00": "bidp",
        "ask_px_00": "askp",
        "bid_sz_00": "bid_sz",
        "ask_sz_00": "ask_sz",
    }
)

In [80]:
underlying = pd.read_csv(r"..\data\spx_minute_level_data_jan_mar_2024.csv")
underlying.columns = underlying.columns.str.lower()
underlying["date"] = pd.to_datetime(
    underlying["date"], format="%Y%m%d"
) + pd.to_timedelta(underlying["ms_of_day"], unit="ms")
underlying = underlying[["date", "price"]]
print(underlying.shape)

(23850, 2)


In [81]:
print(options.head(5))

                          ts_recv  instrument_id   bidp    askp  bid_sz  \
0  2024-01-02T14:30:02.402838204Z      587228771   27.0   27.70     174   
1  2024-01-02T14:30:02.402844171Z      587211744    1.6    1.75     755   
2  2024-01-02T14:30:02.402848382Z      587211728    4.1    4.40    1566   
3  2024-01-02T14:30:02.410507796Z      587246038  121.1  123.70      15   
4  2024-01-02T14:30:02.410507796Z      587237433   42.7   43.40      75   

   ask_sz                 symbol                      datetime   exp_date  \
0     155  SPX   240119P04700000 2024-01-02 14:30:02.402838204 2024-01-19   
1    1981  SPX   240119P04300000 2024-01-02 14:30:02.402844171 2024-01-19   
2    1125  SPX   240119P04500000 2024-01-02 14:30:02.402848382 2024-01-19   
3      15  SPX   240216P04850000 2024-01-02 14:30:02.410507796 2024-02-16   
4     120  SPX   240216P04650000 2024-01-02 14:30:02.410507796 2024-02-16   

  action  strike_price  
0      P        4700.0  
1      P        4300.0  
2      P   

In [82]:
print(underlying.head(5))

                 date    price
0 2024-01-02 09:31:00  4742.96
1 2024-01-02 09:32:00  4743.24
2 2024-01-02 09:33:00  4744.84
3 2024-01-02 09:34:00  4744.11
4 2024-01-02 09:35:00  4743.49


In [83]:
def get_underlying(date):
    date = pd.Timestamp(date, tz="UTC").astimezone("US/Eastern").replace(tzinfo=None)

    index = underlying["date"].searchsorted(date)
    if index == 0:
        underlying_price = underlying["price"].iloc[0]
    elif index == len(underlying):
        underlying_price = underlying["price"].iloc[-1]
    else:
        before = underlying["date"].iloc[index - 1]
        after = underlying["date"].iloc[index]
        if abs(before - date) < abs(after - date):
            underlying_price = underlying["price"].iloc[index - 1]
        else:
            underlying_price = underlying["price"].iloc[index]
    # print(underlying.iloc[index])
    # print(underlying.iloc[index - 1])
    return underlying_price

In [71]:
curr = None
closest_call = closest_put = None
cc_val = cp_val = 10000

for i, option in options.iterrows():
    option_date = option["datetime"]
    underlying_price = get_underlying(option_date)
    option_date = option_date.date()

    # print(option)
    # print(underlying_price)
    # break

    if curr is None or curr != option_date:
        if curr != option_date:
            print(option_date)
            # TODO: process the closest call and put
            if closest_call is not None and closest_put is not None:
                exp_underlying = get_underlying(option["exp_date"])
                underlying_cost = closest_call["askp"] + closest_put["askp"]
                exp_price = get_underlying(option["exp_date"])
                call_profit = 100 * (underlying_price - closest_call["strike_price"])
                put_profit = 100 * (closest_put["strike_price"] - underlying_price)
                overall = max(call_profit, 0) + max(put_profit, 0) - underlying_cost
                print(
                    f"Date: {option_date}, Underlying: {underlying_price}, Closest Call: {closest_call}, Closest Put: {closest_put}, Overall: {overall}"
                )

        #     # Reset
        curr = option_date
        if option["action"] == "C":
            closest_call = option
            ccval = abs(option["strike_price"] - underlying_price)
            closest_put = None
            cpval = 10000
        else:
            closest_put = option
            cpval = abs(option["strike_price"] - underlying_price)
            closest_call = None
            ccval = 10000
    else:
        if option["action"] == "C":
            cccval = abs(option["strike_price"] - underlying_price)
            if cccval < ccval:
                closest_call = option
                ccval = cccval
        else:
            ccpval = abs(option["strike_price"] - underlying_price)
            if ccpval < cpval:
                closest_put = option
                cpval = ccpval

2024-01-02
2024-01-03
Date: 2024-01-03, Underlying: 0.0, Closest Call: datetime        2024-01-02 16:13:08.265789414
bidp                                     50.8
askp                                     51.5
bid_sz                                    492
ask_sz                                    348
exp_date                  2024-01-19 00:00:00
action                                      C
strike_price                           4740.0
Name: 7291, dtype: object, Closest Put: datetime        2024-01-02 17:21:30.258022903
bidp                                     38.0
askp                                     38.5
bid_sz                                    551
ask_sz                                     75
exp_date                  2024-01-19 00:00:00
action                                      P
strike_price                           4750.0
Name: 11351, dtype: object, Overall: 474910.0
2024-01-04
Date: 2024-01-04, Underlying: 0.0, Closest Call: datetime        2024-01-03 17:02:26.979057821
b

In [84]:
options["date"] = options["datetime"].dt.date
options.head(5)

Unnamed: 0,ts_recv,instrument_id,bidp,askp,bid_sz,ask_sz,symbol,datetime,exp_date,action,strike_price,date
0,2024-01-02T14:30:02.402838204Z,587228771,27.0,27.7,174,155,SPX 240119P04700000,2024-01-02 14:30:02.402838204,2024-01-19,P,4700.0,2024-01-02
1,2024-01-02T14:30:02.402844171Z,587211744,1.6,1.75,755,1981,SPX 240119P04300000,2024-01-02 14:30:02.402844171,2024-01-19,P,4300.0,2024-01-02
2,2024-01-02T14:30:02.402848382Z,587211728,4.1,4.4,1566,1125,SPX 240119P04500000,2024-01-02 14:30:02.402848382,2024-01-19,P,4500.0,2024-01-02
3,2024-01-02T14:30:02.410507796Z,587246038,121.1,123.7,15,15,SPX 240216P04850000,2024-01-02 14:30:02.410507796,2024-02-16,P,4850.0,2024-01-02
4,2024-01-02T14:30:02.410507796Z,587237433,42.7,43.4,75,120,SPX 240216P04650000,2024-01-02 14:30:02.410507796,2024-02-16,P,4650.0,2024-01-02


In [None]:
curr = None
closest_call = closest_put = None
cc_val = cp_val = 10000

for i, option in options.iterrows():
    option_date = option["datetime"]
    underlying_price = get_underlying(option_date)
    option_date = option_date.date()

    # print(option)
    # print(underlying_price)
    # break

    if curr is None or curr != option_date:
        if curr != option_date:
            print(option_date)
            # TODO: process the closest call and put
            if closest_call is not None and closest_put is not None:
                exp_underlying = get_underlying(option["exp_date"])
                underlying_cost = closest_call["askp"] + closest_put["askp"]
                exp_price = get_underlying(option["exp_date"])
                call_profit = 100 * (underlying_price - closest_call["strike_price"])
                put_profit = 100 * (closest_put["strike_price"] - underlying_price)
                overall = max(call_profit, 0) + max(put_profit, 0) - underlying_cost
                print(
                    f"Date: {option_date}, Underlying: {underlying_price}, Closest Call: {closest_call}, Closest Put: {closest_put}, Overall: {overall}"
                )

        #     # Reset
        curr = option_date
        if option["action"] == "C":
            closest_call = option
            ccval = abs(option["strike_price"] - underlying_price)
            closest_put = None
            cpval = 10000
        else:
            closest_put = option
            cpval = abs(option["strike_price"] - underlying_price)
            closest_call = None
            ccval = 10000
    else:
        if option["action"] == "C":
            cccval = abs(option["strike_price"] - underlying_price)
            if cccval < ccval:
                closest_call = option
                ccval = cccval
        else:
            ccpval = abs(option["strike_price"] - underlying_price)
            if ccpval < cpval:
                closest_put = option
                cpval = ccpval