# Data Preparation
- Get all the trading days
- Create two sets for training and testing

In [1]:
import datetime as dt
import utils
import pandas as pd
import icharts

TEST_START = dt.datetime.strptime("2023-01-01", "%Y-%m-%d")
TEST_END = dt.datetime.strptime("2023-12-31", "%Y-%m-%d")
SYMBOL = "NIFTY 50"
IC_SYMBOL = "NIFTY"
INTERVAL = utils.INTERVAL_MIN30
EXCHANGE = utils.EXCHANGE_NSE


pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", 200)

def build_date_range(date_start, date_end, symbol):
    date_range = []
    cur_date = date_start
    while cur_date < date_end:
        if cur_date.weekday() not in [5, 6]:
            has_data, _ = utils.has_data(symbol, cur_date, interval=utils.INTERVAL_MIN1, exchange=EXCHANGE)
            if has_data:
                date_range.append(cur_date)
        cur_date += dt.timedelta(days=1)
    return date_range

all_dates = pd.DataFrame({"trade_date": build_date_range(TEST_START, TEST_END, SYMBOL)})
all_dates_shuffled = all_dates.sample(frac=1, random_state=42)

train_size = int(0.5 * len(all_dates_shuffled))
train_dates = all_dates_shuffled.iloc[:train_size]
test_dates = all_dates_shuffled.iloc[train_size:]
train_dates = train_dates.sort_values(by="trade_date")
train_dates.set_index("trade_date", inplace=True)
test_dates = test_dates.sort_values(by="trade_date")
test_dates.set_index("trade_date", inplace=True)

In [11]:
def get_intraday_data(row):
    return utils.get_data(symbol=SYMBOL, date=row.previous_trading_day, interval=INTERVAL, exchange=EXCHANGE)

def get_premium(symbol, expiry, trade_date, option_type):
    pass

def get_symbol_open(symbol, trade_date):
    data = utils.get_data(symbol=SYMBOL, date=trade_date, interval=INTERVAL, exchange=EXCHANGE)
    return data.iloc[0].open

def get_last_trading_day(row):
    return utils.get_last_trading_day(SYMBOL, row.name, interval=INTERVAL, exchange=utils.EXCHANGE_NSE)

train_dates["previous_trading_day"] = None
train_dates["previous_trading_day"] = train_dates.apply(get_last_trading_day, axis=1)
train_dates["previous_trading_candles"] = train_dates.apply(get_intraday_data, axis=1)
train_dates["previous_trading_open"] = train_dates.apply(lambda row: row["previous_trading_candles"].iloc[0].open, axis=1)
train_dates["previous_trading_close"] = train_dates.apply(lambda row: row["previous_trading_candles"].iloc[-1].close, axis=1)
train_dates["expiry"] = train_dates.apply(lambda row: utils.find_closest_expiry(SYMBOL, row.name), axis=1)
train_dates["previous_day_option_chain_file_path"] = train_dates.apply(lambda row: icharts.get_option_chain_file_path(symbol=SYMBOL, expiry=row.expiry, cur_dt=row.previous_trading_day), axis=1)
train_dates["previous_day_option_chain"] = train_dates.apply(lambda row: icharts.get_oc_df(IC_SYMBOL, row.expiry, row.name), axis=1)
train_dates["market_open"] = train_dates.apply(lambda row: get_symbol_open(IC_SYMBOL, row.name), axis=1)

In [12]:
train_dates["market_open"]

trade_date
2023-01-02    18131.70
2023-01-04    18230.65
2023-01-09    17952.55
2023-01-10    18121.30
2023-01-13    17867.50
2023-01-16    18033.15
2023-01-18    18074.30
2023-01-23    18118.45
2023-01-24    18183.95
2023-01-27    17877.20
2023-01-30    17541.95
2023-02-06    17818.55
2023-02-07    17790.10
2023-02-08    17750.30
2023-02-10    17847.55
2023-02-13    17859.10
2023-02-14    17840.35
2023-02-15    17896.60
2023-02-17    17974.85
2023-02-21    17905.80
2023-02-24    17591.35
2023-03-01    17360.10
2023-03-02    17421.50
2023-03-08    17665.75
2023-03-09    17772.05
2023-03-16    16994.65
2023-03-22    17177.45
2023-03-23    17097.40
2023-03-29    16977.30
2023-04-10    17634.90
2023-04-11    17704.80
2023-04-12    17759.55
2023-04-13    17807.30
2023-04-17    17863.00
2023-04-21    17639.75
2023-04-25    17761.55
2023-04-26    17767.30
2023-04-27    17813.10
2023-04-28    17950.40
2023-05-02    18124.80
2023-05-05    18117.30
2023-05-09    18303.40
2023-05-10    18313.60


In [23]:
import json
import icharts_config

def get_oc_df(symbol, expiry, cur_dt):
    file_path = icharts.get_option_chain_file_path(symbol, expiry, cur_dt)
    with open(file_path, "r") as f:
        data = json.load(f)
        pd_data = []
        for row in data["aaData"]:
            cur_row = {icharts_config.oc_columns[i]: row[i] for i in range(len(row))}
            pd_data.append(cur_row)
        # print(pd_data)
        pd_data = pd.DataFrame(pd_data)
        pd_data.set_index("strike_price", inplace=True)
        if pd_data.shape[0] == 0:
            print("empty oc")
            exit()
        return pd_data

test_date = train_dates.iloc[0]
get_oc_df(IC_SYMBOL, test_date.expiry, test_date.name)

Unnamed: 0_level_0,ce_build_up,ce_trend,ce_time,ce_vega,ce_theta,ce_gamma,ce_delta,ce_iv_chg_pc,ce_iv_chg,ce_iv,...,pe_delta,pe_gamma,pe_theta,pe_vega,pcr_oi,pcr_oi_chg,pcr_vol,pe_time,pe_trend,pe_build_up
strike_price,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15050,-,-,02-Jan-2023_EOD,0.0,0.0,0.0,1.0,-,-,0.0,...,-0.0009,0.0,-0.6034,0.0529,-,-,-,02-Jan-2023_EOD,Bullish,LU
15100,-,-,02-Jan-2023_EOD,0.0,0.0,0.0,1.0,0.00,-,0.0,...,-0.001,0.0,-0.6021,0.0536,1062.00,-,-,02-Jan-2023_EOD,Bullish,LU
15150,-,-,02-Jan-2023_EOD,0.0,0.0,0.0,1.0,-,-,0.0,...,0.0,0.0,0.0,0.0,-,-,-,02-Jan-2023_EOD,Bullish,SB
15200,-,-,02-Jan-2023_EOD,0.0,0.0,0.0,1.0,-,-,0.0,...,0.0,0.0,0.0,0.0,-,-,-,02-Jan-2023_EOD,Bullish,SB
15250,-,-,02-Jan-2023_EOD,0.0,0.0,0.0,1.0,-,-,0.0,...,0.0,0.0,0.0,0.0,-,-,-,02-Jan-2023_EOD,Bullish,SB
15300,-,-,02-Jan-2023_EOD,0.0,0.0,0.0,1.0,-,-,0.0,...,-0.0012,0.0,-0.6819,0.0641,-,-,-,02-Jan-2023_EOD,Bullish,SB
15350,-,-,02-Jan-2023_EOD,0.0,0.0,0.0,1.0,-,-,0.0,...,0.0,0.0,0.0,0.0,-,-,-,02-Jan-2023_EOD,-,-
15400,-,-,02-Jan-2023_EOD,0.0,0.0,0.0,1.0,-,-,0.0,...,-0.0014,0.0,-0.7614,0.0733,-,-,-,02-Jan-2023_EOD,Bullish,LU
15450,-,-,02-Jan-2023_EOD,0.0,0.0,0.0,1.0,-,-,0.0,...,0.0,0.0,0.0,0.0,-,-,-,02-Jan-2023_EOD,Bearish,LB
15500,-,-,02-Jan-2023_EOD,0.0,0.0,0.0,1.0,-,-,0.0,...,-0.0014,0.0,-0.7576,0.0756,-,-,-,02-Jan-2023_EOD,Bullish,LU
