# Data Preparation
- Get all the trading days
- Create two sets for training and testing

In [5]:
import datetime as dt
import utils
import pandas as pd
import icharts

TEST_START = dt.datetime.strptime("2023-01-01", "%Y-%m-%d")
TEST_END = dt.datetime.strptime("2023-12-31", "%Y-%m-%d")
SYMBOL = "NIFTY 50"
IC_SYMBOL = "NIFTY"
INTERVAL = utils.INTERVAL_MIN1
EXCHANGE = utils.EXCHANGE_NSE

pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", 200)

def build_date_range(date_start, date_end, symbol):
    date_range = []
    cur_date = date_start
    while cur_date < date_end:
        if cur_date.weekday() not in [5, 6]:
            has_data, _ = utils.has_data(symbol, cur_date, interval=INTERVAL, exchange=EXCHANGE)
            if has_data:
                date_range.append(cur_date)
        cur_date += dt.timedelta(days=1)
    return date_range

all_dates = pd.DataFrame({"trade_date": build_date_range(TEST_START, TEST_END, SYMBOL)})
all_dates_shuffled = all_dates.sample(frac=1, random_state=42)

train_size = int(0.5 * len(all_dates_shuffled))
train_dates = all_dates_shuffled.iloc[:train_size]
test_dates = all_dates_shuffled.iloc[train_size:]
train_dates = train_dates.sort_values(by="trade_date")
train_dates.set_index("trade_date", inplace=True)
test_dates = test_dates.sort_values(by="trade_date")
test_dates.set_index("trade_date", inplace=True)

In [6]:
def get_intraday_data(row):
    return utils.get_data(symbol=SYMBOL, date=row.previous_trading_day, interval=INTERVAL, exchange=EXCHANGE)

def get_premium(symbol, expiry, trade_date, option_type):
    pass

def get_symbol_first_candle(symbol, trade_date):
    data = utils.get_data(symbol=SYMBOL, date=trade_date, interval=INTERVAL, exchange=EXCHANGE)
    return data.iloc[0].open, data.iloc[0].high, data.iloc[0].low, data.iloc[0].close

def get_first_candle_close(symbol, trade_date):
    data = utils.get_data(symbol=SYMBOL, date=trade_date, interval=INTERVAL, exchange=EXCHANGE)
    return data.iloc[0].close

def get_last_trading_day(row):
    return utils.get_last_trading_day(SYMBOL, row.name, interval=INTERVAL, exchange=utils.EXCHANGE_NSE)

train_dates["previous_trading_day"] = None
train_dates["previous_trading_day"] = train_dates.apply(get_last_trading_day, axis=1)
train_dates["previous_trading_candles"] = train_dates.apply(get_intraday_data, axis=1)
train_dates["previous_trading_open"] = train_dates.apply(lambda row: row["previous_trading_candles"].iloc[0].open, axis=1)
train_dates["previous_trading_close"] = train_dates.apply(lambda row: row["previous_trading_candles"].iloc[-1].close, axis=1)
train_dates["expiry"] = train_dates.apply(lambda row: utils.find_closest_expiry(SYMBOL, row.name), axis=1)
train_dates["previous_day_option_chain_file_path"] = train_dates.apply(lambda row: icharts.get_option_chain_file_path(symbol=SYMBOL, expiry=row.expiry, cur_dt=row.previous_trading_day), axis=1)
train_dates["pdoc"] = train_dates.apply(lambda row: icharts.get_oc_df(IC_SYMBOL, row.expiry, row.previous_trading_day), axis=1) # pdoc - Previous day option chain
train_dates["first_candle_ohlc"] = train_dates.apply(lambda row: get_symbol_first_candle(IC_SYMBOL, row.name), axis=1)
train_dates["market_open"] = train_dates.apply(lambda row: row["first_candle_ohlc"][0], axis=1)
train_dates["first_candle_open"] = train_dates["market_open"]
train_dates["first_candle_high"] = train_dates.apply(lambda row: row["first_candle_ohlc"][1], axis=1)
train_dates["first_candle_low"] = train_dates.apply(lambda row: row["first_candle_ohlc"][2], axis=1)
train_dates["first_candle_close"] = train_dates.apply(lambda row: row["first_candle_ohlc"][3], axis=1)
train_dates["market_open_pt"] = train_dates["market_open"] - train_dates["previous_trading_close"]
train_dates["market_open_pc"] = train_dates["market_open_pt"] / train_dates["previous_trading_open"]
train_dates["first_candle_change_pt"] = train_dates["first_candle_close"] - train_dates["first_candle_open"]
train_dates["first_candle_change_pc"] = train_dates["first_candle_change_pt"] / train_dates["first_candle_open"]
# train_dates[["first_candle_open", "first_candle_close", "market_open_pc", "first_candle_change_pc", "first_candle_change_pt", "market_open_pt", "market_open_pc"]]

## Calculate Expected Premium for each strike after market opens

In [7]:
def calculate_expected_premium(r, delta, theta, market_open_pt):
    return delta * market_open_pt + theta

def set_expected_change(row):
    row["pdoc"]["ec_ce_pt"] = row["pdoc"].apply(lambda r: calculate_expected_premium(r, r.ce_delta, r.ce_theta, row.market_open_pt), axis=1) # ec - expected points change in premium
    row["pdoc"]["ec_pe_pt"] = row["pdoc"].apply(lambda r: calculate_expected_premium(r, r.pe_delta, r.pe_theta, row.market_open_pt), axis=1) # ec - expected points change in premium
    row["pdoc"]["ec_ce_pc"] = row["pdoc"]["ec_ce_pt"] / row["pdoc"]["ce_ltp"]
    row["pdoc"]["ec_pe_pc"] = row["pdoc"]["ec_pe_pt"] / row["pdoc"]["pe_ltp"]

train_dates.apply(set_expected_change, axis=1)
print("Hi")

Hi


In [5]:
# x = train_dates.iloc[1]
# print(f"exp: {x["pdoc"].iloc[0].expiry}, date: {x["pdoc"].iloc[0].cur_date}, ch: {x.market_open_pt}, trade: {x.name}")
# x.pdoc[["ec_ce_pt", "ec_ce_pc", "ec_pe_pt", "ec_pe_pc", "ce_delta", "ce_theta", "pe_delta", "pe_theta"]]

In [4]:

def get_market_open_ohlc(opdf, date):
    if type(opdf) == type(pd.NA):
        return (pd.NA,pd.NA,pd.NA,pd.NA,pd.NA)
    date = date.replace(hour=9, minute=15, second=0)
    last_candles = opdf[opdf.index >= date]
    if last_candles.shape[0] > 0:
        return last_candles.iloc[0].open, last_candles.iloc[0].high, last_candles.iloc[0].low, last_candles.iloc[0].close, last_candles.iloc[0].volume
    return (pd.NA,pd.NA,pd.NA,pd.NA,pd.NA)

# print(f"{SYMBOL}, ex: {x.pdoc.iloc[0].expiry}, x.pdoc.iloc[0].cur_date, x.pdoc.iloc[0].strike_price, utils.OPTION_TYPE_CALL")
# get_option_premium_file_path(SYMBOL, x.pdoc.iloc[30].expiry, x.pdoc.iloc[30].cur_date, x.pdoc.iloc[30].name, icharts.OPTION_TYPE_CALL)
# opdf = icharts.get_opt_pre_df(SYMBOL, x.pdoc.iloc[30].expiry, x.pdoc.iloc[30].cur_date, x.pdoc.iloc[30].name, icharts.OPTION_TYPE_PUT)
# get_market_open_ohlc(opdf, dt.datetime.strptime("2023-01-05", "%Y-%m-%d"))

def set_actual_points_change(row):
    def inner_set_actual_points_change(r, option_type):
        try:
            pr = icharts.get_opt_pre_df(SYMBOL, r.expiry, r.cur_date, r.name, option_type)
        except FileNotFoundError:
            return pd.NA
        return pr

    row.pdoc["ce_premium"] = row.pdoc.apply(lambda r: inner_set_actual_points_change(r, icharts.OPTION_TYPE_CALL), axis=1)
    row.pdoc["pe_premium"] = row.pdoc.apply(lambda r: inner_set_actual_points_change(r, icharts.OPTION_TYPE_PUT), axis=1)
    row.pdoc["ce_first_candle"] = row.pdoc.apply(lambda r: get_market_open_ohlc(r.ce_premium, r.cur_date), axis=1)
    row.pdoc["ce_first_candle_open"] = row.pdoc.apply(lambda r: r["ce_first_candle"][0], axis=1)
    row.pdoc["ce_first_candle_high"] = row.pdoc.apply(lambda r: r["ce_first_candle"][1], axis=1)
    row.pdoc["ce_first_candle_low"] = row.pdoc.apply(lambda r: r["ce_first_candle"][2], axis=1)
    row.pdoc["ce_first_candle_close"] = row.pdoc.apply(lambda r: r["ce_first_candle"][3], axis=1)
    row.pdoc["ce_first_candle_volume"] = row.pdoc.apply(lambda r: r["ce_first_candle"][4], axis=1)
    
    row.pdoc["pe_first_candle"] = row.pdoc.apply(lambda r: get_market_open_ohlc(r.pe_premium, r.cur_date), axis=1)
    row.pdoc["pe_first_candle_open"] = row.pdoc.apply(lambda r: r["pe_first_candle"][0], axis=1)
    row.pdoc["pe_first_candle_high"] = row.pdoc.apply(lambda r: r["pe_first_candle"][1], axis=1)
    row.pdoc["pe_first_candle_low"] = row.pdoc.apply(lambda r: r["pe_first_candle"][2], axis=1)
    row.pdoc["pe_first_candle_close"] = row.pdoc.apply(lambda r: r["pe_first_candle"][3], axis=1)
    row.pdoc["pe_first_candle_volume"] = row.pdoc.apply(lambda r: r["pe_first_candle"][4], axis=1)

    row.pdoc["ce_actual_chg_pt"] = row.pdoc["ce_first_candle_close"] - row.pdoc["ce_first_candle_open"]
    row.pdoc["ce_actual_chg_pc"] = row.pdoc["ce_actual_chg_pt"] / row.pdoc["ce_first_candle_open"]
    row.pdoc["pe_actual_chg_pt"] = row.pdoc["pe_first_candle_close"] - row.pdoc["pe_first_candle_open"]
    row.pdoc["pe_actual_chg_pc"] = row.pdoc["pe_actual_chg_pt"] / row.pdoc["pe_first_candle_open"]

train_dates.apply(set_actual_points_change, axis=1)

KeyboardInterrupt: 

In [14]:
x = train_dates.iloc[0]

# x.pdoc[["ce_ltp", "ec_ce_pt", "ec_ce_pc", "ce_actual_chg_pt", "ce_actual_chg_pc", "ce_first_candle"]]
print(x.pdoc.columns)

Index(['ce_build_up', 'ce_trend', 'ce_time', 'ce_vega', 'ce_theta', 'ce_gamma',
       'ce_delta', 'ce_iv_chg_pc', 'ce_iv_chg', 'ce_iv', 'ce_oi_chg_pc',
       'ce_oi_chg', 'ce_oi', 'ce_volume_chg_pc', 'ce_volume_chg', 'ce_volume',
       'ce_int_val', 'ce_ext_val', 'ce_ohol', 'ce_ltp_chg_pc', 'ce_ltp_chg',
       'ce_ltp', 'ce_vwap', 'ce_bid', 'ce_ask', 'pe_ce_oi', 'pe_ce_oi_chg',
       'pe_bid', 'pe_ask', 'pe_vwap', 'pe_ltp', 'pe_ltp_chg', 'pe_ltp_chg_pc',
       'pe_ohol', 'pe_int_val', 'pe_ext_val', 'pe_volume', 'pe_volume_chg',
       'pe_volume_chg_pc', 'pe_oi', 'pe_oi_chg', 'pe_oi_chg_pc', 'pe_iv',
       'pe_iv_chg', 'pe_iv_chg_pc', 'pe_delta', 'pe_gamma', 'pe_theta',
       'pe_vega', 'pcr_oi', 'pcr_oi_chg', 'pcr_vol', 'pe_time', 'pe_trend',
       'pe_build_up', 'expiry', 'cur_date', 'ec_ce_pt', 'ec_pe_pt', 'ec_ce_pc',
       'ec_pe_pc'],
      dtype='object')
