In [36]:
import pandas as pd
import numpy as np
from datetime import datetime

pd.set_option("display.max_rows", None)

In [27]:
def parse_option_symbol(symbol):
    """
    example: SPX   240419C00800000
    """
    numbers = symbol.split(" ")[3]
    exp_date = datetime.strptime(numbers[:6], "%y%m%d")  # Convert to datetime object
    action = numbers[6]  # Extract the action ('C' or 'P')
    strike_price = float(numbers[7:]) / 1000  # Extract and convert the strike price
    return exp_date, action, strike_price

In [28]:
options = pd.read_csv(r"..\data\cleaned_options_data.csv")

In [29]:
options["datetime"] = pd.to_datetime(options["ts_recv"], format="%Y-%m-%dT%H:%M:%S.%fZ")
parsed_features = options["symbol"].apply(parse_option_symbol)
# Create new columns by unpacking the parsed features

(
    options["exp_date"],
    options["action"],
    options["strike_price"],
) = zip(*parsed_features)
options = options[
    [
        "datetime",
        "bid_px_00",
        "ask_px_00",
        "bid_sz_00",
        "ask_sz_00",
        "exp_date",
        "action",
        "strike_price",
    ]
]
options = options.rename(
    columns={
        "bid_px_00": "bidp",
        "ask_px_00": "askp",
        "bid_sz_00": "bid_sz",
        "ask_sz_00": "ask_sz",
    }
)

In [43]:
underlying = pd.read_csv(r"..\data\underlying_data_hour.csv")
underlying.columns = underlying.columns.str.lower()
underlying["date"] = pd.to_datetime(underlying["date"])
print(underlying.shape)

(427, 7)


In [58]:
print(options.head(5))

                       datetime   bidp    askp  bid_sz  ask_sz   exp_date  \
0 2024-01-02 14:30:02.402838204   27.0   27.70     174     155 2024-01-19   
1 2024-01-02 14:30:02.402844171    1.6    1.75     755    1981 2024-01-19   
2 2024-01-02 14:30:02.402848382    4.1    4.40    1566    1125 2024-01-19   
3 2024-01-02 14:30:02.410507796  121.1  123.70      15      15 2024-02-16   
4 2024-01-02 14:30:02.410507796   42.7   43.40      75     120 2024-02-16   

  action  strike_price  
0      P        4700.0  
1      P        4300.0  
2      P        4500.0  
3      P        4850.0  
4      P        4650.0  


In [32]:
print(underlying.head(5))

                        date         open         high          low  \
0  2024-01-02 09:30:00-05:00  4745.200195  4745.200195  4730.950195   
1  2024-01-02 10:30:00-05:00  4736.100098  4742.439941  4731.169922   
2  2024-01-02 11:30:00-05:00  4741.120117  4751.220215  4740.890137   
3  2024-01-02 12:30:00-05:00  4747.549805  4754.330078  4740.919922   
4  2024-01-02 13:30:00-05:00  4744.810059  4747.220215  4733.669922   

         close    adj close     volume  
0  4736.259766  4736.259766          0  
1  4741.120117  4741.120117  292380245  
2  4747.549805  4747.549805  217999404  
3  4744.799805  4744.799805  185937425  
4  4737.020020  4737.020020  194942000  


In [57]:
def get_underlying(date):
    date = pd.Timestamp(date, tz="UTC")
    index = underlying["date"].searchsorted(date)
    if index == 0:
        underlying_price = underlying["adj close"].iloc[0]
    elif index == len(underlying):
        underlying_price = underlying["adj close"].iloc[-1]
    else:
        before = underlying["date"].iloc[index - 1]
        after = underlying["date"].iloc[index]
        if abs(before - date) < abs(after - date):
            underlying_price = underlying["adj close"].iloc[index - 1]
        else:
            underlying_price = underlying["adj close"].iloc[index]
    # print(underlying.iloc[index])
    # print(underlying.iloc[index - 1])
    return underlying_price

In [63]:
curr = None
closest_call = closest_put = None
cc_val = cp_val = 10000

for i, option in options.iterrows():
    option_date = option["datetime"]
    underlying_price = get_underlying(option_date)
    option_date = option_date.date()

    # print(option)
    # print(underlying_price)
    # break

    if curr is None or curr != option_date:
        if curr != option_date:
            print(option_date)
            # TODO: process the closest call and put
            if closest_call is not None and closest_put is not None:
                exp_underlying = get_underlying(option["exp_date"])
                underlying_cost = closest_call["askp"] + closest_put["askp"]
                exp_price = get_underlying(option["exp_date"])
                call_profit = 100 * (underlying_price - closest_call["strike_price"])
                put_profit = 100 * (closest_put["strike_price"] - underlying_price)
                overall = max(call_profit, 0) + max(put_profit, 0) - underlying_cost
                print(
                    f"Date: {option_date}, Underlying: {underlying_price}, Closest Call: {closest_call}, Closest Put: {closest_put}, Overall: {overall}"
                )

        #     # Reset
        curr = option_date
        if option["action"] == "C":
            closest_call = option
            ccval = abs(option["strike_price"] - underlying_price)
            closest_put = None
            cpval = 10000
        else:
            closest_put = option
            cpval = abs(option["strike_price"] - underlying_price)
            closest_call = None
            ccval = 10000
    else:
        if option["action"] == "C":
            cccval = abs(option["strike_price"] - underlying_price)
            if cccval < ccval:
                closest_call = option
                ccval = cccval
        else:
            ccpval = abs(option["strike_price"] - underlying_price)
            if ccpval < cpval:
                closest_put = option
                cpval = ccpval

2024-01-02
2024-01-03
Date: 2024-01-03, Underlying: 4713.2099609375, Closest Call: datetime        2024-01-02 17:05:45.510331181
bidp                                     53.2
askp                                     54.0
bid_sz                                    513
ask_sz                                    369
exp_date                  2024-01-19 00:00:00
action                                      C
strike_price                           4745.0
Name: 10468, dtype: object, Closest Put: datetime        2024-01-02 15:02:18.300953443
bidp                                    139.6
askp                                    140.6
bid_sz                                     74
ask_sz                                    142
exp_date                  2024-06-21 00:00:00
action                                      P
strike_price                           4740.0
Name: 2984, dtype: object, Overall: 2484.40390625
2024-01-04
Date: 2024-01-04, Underlying: 4715.08984375, Closest Call: datetime        2024