# Data Preparation
- Get all the trading days
- Create two sets for training and testing

In [1]:
import datetime as dt
import utils
import pandas as pd
import icharts
from functools import cache


TEST_START = dt.datetime.strptime("2023-01-01", "%Y-%m-%d")
TEST_END = dt.datetime.strptime("2023-12-31", "%Y-%m-%d")
SYMBOL = "NIFTY 50"
IC_SYMBOL = "NIFTY"
INTERVAL = utils.INTERVAL_MIN1
EXCHANGE = utils.EXCHANGE_NSE
pickle_file_name = "ocdf_2024_02_17.pkl"
# pickle_file_name = "test_analyzer_ocdf_2024_02_17.pkl"

pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", 200)

In [2]:
def build_date_range(date_start, date_end, symbol):
    date_range = []
    cur_date = date_start
    while cur_date < date_end:
        if cur_date.weekday() not in [5, 6]:
            has_data, _ = utils.has_data(symbol, cur_date, interval=INTERVAL, exchange=EXCHANGE)
            if has_data:
                date_range.append(cur_date)
        cur_date += dt.timedelta(days=1)
    return date_range

all_dates = pd.DataFrame({"trade_date": build_date_range(TEST_START, TEST_END, SYMBOL)})
all_dates_shuffled = all_dates.sample(frac=1, random_state=42)

train_size = int(0.5 * len(all_dates_shuffled))
train_dates = all_dates_shuffled.iloc[:train_size]
test_dates = all_dates_shuffled.iloc[train_size:]
train_dates = train_dates.sort_values(by="trade_date")
train_dates.set_index("trade_date", inplace=True)
test_dates = test_dates.sort_values(by="trade_date")
test_dates.set_index("trade_date", inplace=True)

train_dates = test_dates

In [3]:
@cache
def get_intraday_data(date):
    return utils.get_data(symbol=SYMBOL, date=date, interval=INTERVAL, exchange=EXCHANGE)

@cache
def get_symbol_first_candle(symbol, trade_date):
    data = utils.get_data(symbol=SYMBOL, date=trade_date, interval=INTERVAL, exchange=EXCHANGE)
    return data.iloc[0].open, data.iloc[0].high, data.iloc[0].low, data.iloc[0].close

@cache
def get_first_candle_close(symbol, trade_date):
    data = utils.get_data(symbol=SYMBOL, date=trade_date, interval=INTERVAL, exchange=EXCHANGE)
    return data.iloc[0].close

@cache
def get_last_trading_day(date):
    return utils.get_last_trading_day(SYMBOL, date, interval=INTERVAL, exchange=utils.EXCHANGE_NSE)

train_dates["previous_trading_day"] = None
train_dates["previous_trading_day"] = train_dates.apply(lambda row: get_last_trading_day(row.name), axis=1)
train_dates["expiry"] = train_dates.apply(lambda row: utils.find_closest_expiry(SYMBOL, row.name), axis=1)
train_dates["first_candle_ohlc"] = train_dates.apply(lambda row: get_symbol_first_candle(IC_SYMBOL, row.name), axis=1)
train_dates["market_open"] = train_dates.apply(lambda row: row["first_candle_ohlc"][0], axis=1)
train_dates["first_candle_high"] = train_dates.apply(lambda row: row["first_candle_ohlc"][1], axis=1)
train_dates["first_candle_low"] = train_dates.apply(lambda row: row["first_candle_ohlc"][2], axis=1)
train_dates["first_candle_close"] = train_dates.apply(lambda row: row["first_candle_ohlc"][3], axis=1)
del train_dates["first_candle_ohlc"]



result = []
for i, row in train_dates.iterrows():
    ocdf = icharts.get_oc_df(IC_SYMBOL, row.expiry, row.previous_trading_day)
    ocdf = ocdf.loc[((row.market_open - 700) < ocdf.index) & ((row.market_open + 700) > ocdf.index)]
    ocdf.loc[:,"trade_date"] = row.name
    for col, val in row.items():
        ocdf[col] = val
    result.append(ocdf)
ocdf = pd.concat(result)

ocdf["previous_trading_candles"] = ocdf.apply(lambda row: get_intraday_data(row.previous_trading_day), axis=1)
ocdf["previous_trading_open"] = ocdf.apply(lambda row: row["previous_trading_candles"].iloc[0].open, axis=1)
ocdf["previous_trading_close"] = ocdf.apply(lambda row: row["previous_trading_candles"].iloc[-1].close, axis=1)
del ocdf["previous_trading_candles"]
# ocdf["previous_day_option_chain_file_path"] = ocdf.apply(lambda row: icharts.get_option_chain_file_path(symbol=SYMBOL, expiry=row.expiry, cur_dt=row.previous_trading_day), axis=1)
# train_dates["pdoc"] = train_dates.apply(lambda row: icharts.get_oc_df(IC_SYMBOL, row.expiry, row.previous_trading_day), axis=1) # pdoc - Previous day option chain
ocdf["first_candle_open"] = ocdf["market_open"]
ocdf["market_open_pt"] = ocdf["market_open"] - ocdf["previous_trading_close"]
ocdf["market_open_pc"] = ocdf["market_open_pt"] / ocdf["previous_trading_open"]
ocdf["first_candle_change_pt"] = ocdf["first_candle_close"] - ocdf["first_candle_open"]
ocdf["first_candle_change_pc"] = ocdf["first_candle_change_pt"] / ocdf["first_candle_open"]
# train_dates[["first_candle_open", "first_candle_close", "market_open_pc", "first_candle_change_pc", "first_candle_change_pt", "market_open_pt", "market_open_pc"]]

get_intraday_data.cache_clear()
get_symbol_first_candle.cache_clear()
get_first_candle_close.cache_clear()
get_last_trading_day.cache_clear()

## Calculate Expected Premium for each strike after market opens

In [4]:

def calculate_expected_premium(r, delta, theta, market_open_pt):
    return delta * market_open_pt + theta

ocdf["ec_ce_pt"] = ocdf.apply(lambda r: calculate_expected_premium(r, r.ce_delta, r.ce_theta, r.market_open_pt), axis=1) # ec - expected points change in premium
ocdf["ec_pe_pt"] = ocdf.apply(lambda r: calculate_expected_premium(r, r.pe_delta, r.pe_theta, r.market_open_pt), axis=1) # ec - expected points change in premium
ocdf["ec_ce_pc"] = ocdf["ec_ce_pt"] / ocdf["ce_ltp"]
ocdf["ec_pe_pc"] = ocdf["ec_pe_pt"] / ocdf["pe_ltp"]
ocdf

Unnamed: 0_level_0,ce_build_up,ce_trend,ce_time,ce_vega,ce_theta,ce_gamma,ce_delta,ce_iv_chg_pc,ce_iv_chg,ce_iv,...,previous_trading_close,first_candle_open,market_open_pt,market_open_pc,first_candle_change_pt,first_candle_change_pc,ec_ce_pt,ec_pe_pt,ec_ce_pc,ec_pe_pc
strike_price,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
17500,LB,Bullish,02-Jan-2023_EOD,0.0370,-0.0852,0.0000,0.9994,-,-,18.42,...,18207.1,18163.20,-43.90,-0.002421,10.05,0.000553,-43.958860,-1.634250,-0.061224,-0.797195
17550,LB,Bullish,02-Jan-2023_EOD,0.0752,-0.1729,0.0000,0.9986,-,-,20.54,...,18207.1,18163.20,-43.90,-0.002421,10.05,0.000553,-44.011440,-1.917520,-0.065689,-0.913105
17600,LB,Bullish,02-Jan-2023_EOD,0.1448,-0.3328,0.0000,0.9971,58.67,7.04,19.04,...,18207.1,18163.20,-43.90,-0.002421,10.05,0.000553,-44.105490,-1.980430,-0.071155,-0.707296
17650,LB,Bullish,02-Jan-2023_EOD,0.2642,-0.6072,0.0001,0.9944,60.21,7.52,20.01,...,18207.1,18163.20,-43.90,-0.002421,10.05,0.000553,-44.261360,-1.876330,-0.077306,-0.625443
17700,SC,Bullish,02-Jan-2023_EOD,0.4572,-1.0507,0.0001,0.9896,23.58,3.00,15.72,...,18207.1,18163.20,-43.90,-0.002421,10.05,0.000553,-44.494140,-2.229330,-0.085689,-0.543739
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22200,LB,Bullish,28-Dec-2023_EOD,7.7269,-7.1584,0.0007,0.1728,0.23,0.03,12.97,...,21773.7,21737.65,-36.05,-0.001660,-11.30,-0.000520,-13.387840,22.046625,-0.371369,0.060402
22250,LB,Bullish,28-Dec-2023_EOD,6.9619,-6.5740,0.0006,0.1473,4.42,0.56,13.22,...,21773.7,21737.65,-36.05,-0.001660,-11.30,-0.000520,-11.884165,24.048470,-0.394823,0.052634
22300,LB,Bullish,28-Dec-2023_EOD,6.0894,-5.7588,0.0005,0.1212,4.50,0.57,13.24,...,21773.7,21737.65,-36.05,-0.001660,-11.30,-0.000520,-10.128060,25.606370,-0.426445,0.051090
22350,LB,Bullish,28-Dec-2023_EOD,5.2266,-4.9354,0.0004,0.0980,4.18,0.53,13.22,...,21773.7,21737.65,-36.05,-0.001660,-11.30,-0.000520,-8.468300,30.032325,-0.461488,0.055879


In [None]:
# ocdf.loc[ocdf.ce_delta > 0, "ce_delta"].index
# x = ocdf.loc['15050']
# # print(f"exp: {x.expiry}, date: {x.cur_date}, ch: {x.market_open_pt}, trade: {x.index}")
# x[["ec_ce_pt", "ec_ce_pc", "ec_pe_pt", "ec_pe_pc", "ce_delta", "ce_theta", "pe_delta", "pe_theta"]]

In [5]:
def ct(fn):
    def wraps(*args, **kwargs):
        t1 = dt.datetime.now()
        fn(*args, **kwargs)
        t2 = dt.datetime.now()
        print(round((t2-t1).total_seconds(), 2))
    return wraps

print(ocdf.shape)
def get_market_open_ohlc(expiry, trade_date, strike_price, option_type):
    def get_premium_df():
        try:
            pr = icharts.get_opt_pre_df(SYMBOL, expiry, trade_date, strike_price, option_type)
        except FileNotFoundError:
            # print(f"ex: {expiry}, sp:{strike_price}, td:{trade_date}")
            return pd.NA
        return pr
    opdf = get_premium_df()
    if type(opdf) == type(pd.NA):
        return (pd.NA,pd.NA,pd.NA,pd.NA,pd.NA)
    date = trade_date.replace(hour=9, minute=15, second=0)
    last_candles = opdf[opdf.index >= date]
    if last_candles.shape[0] > 0:
        last = last_candles.iloc[0]
        return last.open, last.high, last.low, last.close, last.volume
    return (pd.NA,pd.NA,pd.NA,pd.NA,pd.NA)

# print(f"{SYMBOL}, ex: {x.pdoc.iloc[0].expiry}, x.pdoc.iloc[0].cur_date, x.pdoc.iloc[0].strike_price, utils.OPTION_TYPE_CALL")
# get_option_premium_file_path(SYMBOL, x.pdoc.iloc[30].expiry, x.pdoc.iloc[30].cur_date, x.pdoc.iloc[30].name, icharts.OPTION_TYPE_CALL)
# opdf = icharts.get_opt_pre_df(SYMBOL, x.pdoc.iloc[30].expiry, x.pdoc.iloc[30].cur_date, x.pdoc.iloc[30].name, icharts.OPTION_TYPE_PUT)
# get_market_open_ohlc(opdf, dt.datetime.strptime("2023-01-05", "%Y-%m-%d"))

# def inner_set_actual_points_change(r, option_type):
#     try:
#         pr = icharts.get_opt_pre_df(SYMBOL, r.expiry, r.trade_date, r.name, option_type)
#     except FileNotFoundError:
#         return pd.NA
#     return pr

# def set_actual_points_change(row):
#     def inner_set_actual_points_change(r, option_type):
#         try:
#             pr = icharts.get_opt_pre_df(SYMBOL, r.expiry, r.cur_date, r.name, option_type)
#         except FileNotFoundError:
#             return pd.NA
#         return pr

#     row.pdoc["ce_premium"] = row.pdoc.apply(lambda r: inner_set_actual_points_change(r, icharts.OPTION_TYPE_CALL), axis=1)
# row.pdoc["pe_premium"] = row.pdoc.apply(lambda r: inner_set_actual_points_change(r, icharts.OPTION_TYPE_PUT), axis=1)
# row.pdoc["ce_first_candle"] = row.pdoc.apply(lambda r: get_market_open_ohlc(r.ce_premium, r.cur_date), axis=1)
# row.pdoc["ce_first_candle_open"] = row.pdoc.apply(lambda r: r["ce_first_candle"][0], axis=1)
# row.pdoc["ce_first_candle_high"] = row.pdoc.apply(lambda r: r["ce_first_candle"][1], axis=1)
# row.pdoc["ce_first_candle_low"] = row.pdoc.apply(lambda r: r["ce_first_candle"][2], axis=1)
# row.pdoc["ce_first_candle_close"] = row.pdoc.apply(lambda r: r["ce_first_candle"][3], axis=1)
# row.pdoc["ce_first_candle_volume"] = row.pdoc.apply(lambda r: r["ce_first_candle"][4], axis=1)

# row.pdoc["pe_first_candle"] = row.pdoc.apply(lambda r: get_market_open_ohlc(r.pe_premium, r.cur_date), axis=1)
# row.pdoc["pe_first_candle_open"] = row.pdoc.apply(lambda r: r["pe_first_candle"][0], axis=1)
# row.pdoc["pe_first_candle_high"] = row.pdoc.apply(lambda r: r["pe_first_candle"][1], axis=1)
# row.pdoc["pe_first_candle_low"] = row.pdoc.apply(lambda r: r["pe_first_candle"][2], axis=1)
# row.pdoc["pe_first_candle_close"] = row.pdoc.apply(lambda r: r["pe_first_candle"][3], axis=1)
# row.pdoc["pe_first_candle_volume"] = row.pdoc.apply(lambda r: r["pe_first_candle"][4], axis=1)

# row.pdoc["ce_actual_chg_pt"] = row.pdoc["ce_first_candle_close"] - row.pdoc["ce_first_candle_open"]
# row.pdoc["ce_actual_chg_pc"] = row.pdoc["ce_actual_chg_pt"] / row.pdoc["ce_first_candle_open"]
# row.pdoc["pe_actual_chg_pt"] = row.pdoc["pe_first_candle_close"] - row.pdoc["pe_first_candle_open"]
# row.pdoc["pe_actual_chg_pc"] = row.pdoc["pe_actual_chg_pt"] / row.pdoc["pe_first_candle_open"]

# train_dates.apply(set_actual_points_change, axis=1)

ocdf["ce_first_candle"] = ocdf.apply(lambda r: get_market_open_ohlc(r.expiry, r.trade_date, r.name, icharts.OPTION_TYPE_CALL), axis=1)
ocdf["pe_first_candle"] = ocdf.apply(lambda r: get_market_open_ohlc(r.expiry, r.trade_date, r.name, icharts.OPTION_TYPE_PUT), axis=1)

(3444, 74)


In [7]:
ocdf["ce_first_candle_open"] = ocdf.apply(lambda r: r["ce_first_candle"][0], axis=1)
ocdf["ce_first_candle_high"] = ocdf.apply(lambda r: r["ce_first_candle"][1], axis=1)
ocdf["ce_first_candle_low"] = ocdf.apply(lambda r: r["ce_first_candle"][2], axis=1)
ocdf["ce_first_candle_close"] = ocdf.apply(lambda r: r["ce_first_candle"][3], axis=1)
ocdf["ce_first_candle_volume"] = ocdf.apply(lambda r: r["ce_first_candle"][4], axis=1)
ocdf["ce_first_candle_chg_pt"] = ocdf["ce_first_candle_close"] - ocdf["ce_first_candle_open"]
ocdf["ce_first_candle_chg_pc"] = ocdf["ce_first_candle_chg_pt"] / ocdf["ce_first_candle_open"]

ocdf["pe_first_candle_open"] = ocdf.apply(lambda r: r["pe_first_candle"][0], axis=1)
ocdf["pe_first_candle_high"] = ocdf.apply(lambda r: r["pe_first_candle"][1], axis=1)
ocdf["pe_first_candle_low"] = ocdf.apply(lambda r: r["pe_first_candle"][2], axis=1)
ocdf["pe_first_candle_close"] = ocdf.apply(lambda r: r["pe_first_candle"][3], axis=1)
ocdf["pe_first_candle_volume"] = ocdf.apply(lambda r: r["pe_first_candle"][4], axis=1)
ocdf["pe_first_candle_chg_pt"] = ocdf["pe_first_candle_close"] - ocdf["pe_first_candle_open"]
ocdf["pe_first_candle_chg_pc"] = ocdf["pe_first_candle_chg_pt"] / ocdf["pe_first_candle_open"]

ocdf["ce_actual_chg_pt"] = ocdf["ce_first_candle_open"] - ocdf["ce_ltp"]
ocdf["ce_actual_chg_pc"] = ocdf["ce_actual_chg_pt"] / ocdf["ce_ltp"]

ocdf["pe_actual_chg_pt"] = ocdf["pe_first_candle_open"] - ocdf["pe_ltp"]
ocdf["pe_actual_chg_pc"] = ocdf["pe_actual_chg_pt"] / ocdf["pe_ltp"]

ocdf["ce_ac_ex_diff"] = ocdf["ce_actual_chg_pc"] - ocdf["ec_ce_pc"]
ocdf["pe_ac_ex_diff"] = ocdf["pe_actual_chg_pc"] - ocdf["ec_pe_pc"]

In [8]:
ocdf.to_pickle(pickle_file_name)

In [9]:
ocdf = pd.read_pickle(pickle_file_name)

In [None]:
# x = ocdf.iloc[0]
# ocdf["ce_first_candle"]
# ocdf.loc[ocdf.ec_ce_pc.notna(),["ce_ltp", "ec_ce_pt", "ec_ce_pc", "ce_actual_chg_pt", "ce_actual_chg_pc", "ce_first_candle"]]

In [8]:
oc_columns = [
# "strike_price",
"trade_date",
"expiry",
"oc_date",
"market_open",
"market_open_pt",
"market_open_pc",
"previous_trading_open",
"previous_trading_close",
"first_candle_change_pt",
"first_candle_change_pc",
"ec_ce_pt",
"ec_ce_pc",
"ce_actual_chg_pt",
"ce_actual_chg_pc",
"ce_ac_ex_diff",
"ec_pe_pt",
"ec_pe_pc",
"pe_ac_ex_diff",
"pe_actual_chg_pt",
"pe_actual_chg_pc",
"first_candle_open",
"first_candle_high",
"first_candle_low",
"first_candle_close",
"ce_first_candle_chg_pt",
"ce_first_candle_chg_pc",
"pe_first_candle_chg_pt",
"pe_first_candle_chg_pc",
"ce_first_candle_open",
"ce_first_candle_high",
"ce_first_candle_low",
"ce_first_candle_close",
"ce_first_candle_volume",
"pe_first_candle_open",
"pe_first_candle_high",
"pe_first_candle_low",
"pe_first_candle_close",
"pe_first_candle_volume",
"previous_trading_day",
"ce_build_up",
"ce_trend",
"ce_time",
"ce_vega",
"ce_theta",
"ce_gamma",
"ce_delta",
"ce_iv_chg_pc",
"ce_iv_chg",
"ce_iv",
"ce_oi_chg_pc",
"ce_oi_chg",
"ce_oi",
"ce_volume_chg_pc",
"ce_volume_chg",
"ce_volume",
"ce_int_val",
"ce_ext_val",
"ce_ohol",
"ce_ltp_chg_pc",
"ce_ltp_chg",
"ce_ltp",
"ce_vwap",
"ce_bid",
"ce_ask",
"pe_ce_oi",
"pe_ce_oi_chg",
"pe_bid",
"pe_ask",
"pe_vwap",
"pe_ltp",
"pe_ltp_chg",
"pe_ltp_chg_pc",
"pe_ohol",
"pe_int_val",
"pe_ext_val",
"pe_volume",
"pe_volume_chg",
"pe_volume_chg_pc",
"pe_oi",
"pe_oi_chg",
"pe_oi_chg_pc",
"pe_iv",
"pe_iv_chg",
"pe_iv_chg_pc",
"pe_delta",
"pe_gamma",
"pe_theta",
"pe_vega",
"pcr_oi",
"pcr_oi_chg",
"pcr_vol",
"pe_time",
"pe_trend",
"pe_build_up",
]
ocdf.to_csv("final_output.csv", columns=oc_columns)