# Candle Gaps
> Do big gaps in candles reflect panic or frenzy in the stock?

In [1]:
import datetime as dt
import utils as ut
import pandas as pd
import icharts as ic
from functools import cache
from constants import *
import config
from matplotlib import pyplot as plt
import numpy as np
import nse_plot as nplt


TEST_START = dt.datetime.strptime("2024-01-01", "%Y-%m-%d")
TEST_END = dt.datetime.strptime("2024-02-29", "%Y-%m-%d")
INTERVAL = ut.INTERVAL_MIN1
EXCHANGE = ut.EXCHANGE_NSE
SYMBOL = "HDFCBANK"
SYMBOL = "NIFTY 50"
IC_SYMBOL = "NIFTY"

pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.precision", 2)
pd.options.display.float_format = lambda x: '%.2f' % x

def build_date_range(date_start, date_end, symbol):
    date_range = []
    cur_date = date_start
    while cur_date < date_end:
        if cur_date.weekday() not in [5, 6]:
            has_data, _ = ut.has_data(symbol, cur_date, interval=INTERVAL, exchange=EXCHANGE)
            if has_data:
                date_range.append(cur_date)
        cur_date += dt.timedelta(days=1)
    return date_range

all_dates = pd.DataFrame({"trade_date": build_date_range(TEST_START, TEST_END, SYMBOL)})
all_dates_shuffled = all_dates.sample(frac=1, random_state=42)

train_size = int(0.5 * len(all_dates_shuffled))
train_dates = all_dates_shuffled.iloc[:train_size]
test_dates = all_dates_shuffled.iloc[train_size:]
# train_dates = all_dates
train_dates = train_dates.sort_values(by="trade_date")
train_dates.set_index("trade_date", inplace=True)
test_dates = test_dates.sort_values(by="trade_date")
test_dates.set_index("trade_date", inplace=True)

def get_intraday_data(date):
    return ut.get_data(symbol=SYMBOL, date=date, interval=INTERVAL, exchange=EXCHANGE)

def get_daily_data(date):
    return ut.get_data(symbol=SYMBOL, date=date, interval=INTERVAL_DAY, exchange=EXCHANGE)

def get_last_trading_day(date):
    return ut.get_last_trading_day(SYMBOL, date, interval=INTERVAL, exchange=ut.EXCHANGE_NSE)

def get_symbol_price(d, t):
    data = ut.get_data(symbol=SYMBOL, date=d, interval=INTERVAL, exchange=EXCHANGE)
    try:
        return data.loc[data.index.time == t].iloc[0].open
    except IndexError:
        return pd.NA
    except AttributeError as e:
        return pd.NA

train_dates["expiry"] = pd.NA
train_dates["expiry"] = train_dates.apply(lambda row: ut.find_nclosest_expiry(SYMBOL, row.name, 1), axis=1)

In [2]:
cdf = []
for idx, row in train_dates.iterrows():
    sdf = get_intraday_data(row.name)
    sdf["expiry"] = row.expiry
    cdf.append(sdf)

cdf = pd.concat(cdf)
# cdf["prev_low"] = cdf.low.shift(1)
# cdf["prev_close"] = cdf.close.shift(1)
# cdf["gap_pt"] = cdf.open - cdf.prev_low
# cdf["gap_pc"] = cdf["gap_pt"] * 100 / cdf.open

cdf["size_pt"] = cdf.close - cdf.open
cdf["size_pc"] = cdf["size_pt"] * 100 / cdf.open
# cdf.loc[cdf.index.time == dt.time(hour=9, minute=15), "size_pc"] = pd.NA
cdf.dropna(inplace=True)

# hist, edges = np.histogram(cdf.size_pc, density=True, bins=200)
nplt.generate_candle_histogram(cdf, bins=100)

## N3gative Gaps

In [12]:
pc_threshold_low = -0.15
pc_threshold_high = -.2
print(cdf.loc[(cdf.size_pc < pc_threshold_low) & (cdf.size_pc > pc_threshold_high)].shape[0] * 100 / cdf.shape[0])
#cdf.loc[cdf.index.date == dt.date(year=2024, month=1, day=5)]

cond = (cdf.size_pc < pc_threshold_low) & (cdf.size_pc > pc_threshold_high) # & (cdf.volume > cdf.volume.quantile(.1))
PC_FACTOR = 10 ** 2
num = 3
diff_pc_key = f"cd{num}_diff_pc"
cdf.loc[cond, f"cd{num}_low"] = cdf.low.shift(1 - num)
cdf.loc[cond, f"cd{num}_diff_pt"] = cdf.low.shift(1 - num) - cdf.open.shift(-1)
cdf.loc[cond, diff_pc_key] = (cdf.low.shift(1 - num) - cdf.open.shift(-1)) * PC_FACTOR / cdf.open.shift(-1)
cdf.loc[cond, f"cd{num}_dd_pc"] = (cdf.high.rolling(window=num - 1).max().shift(1 - num) - cdf.open.shift(-1)) * PC_FACTOR / cdf.open.shift(-1)
# print(cdf.loc[cond].index)
# cdf[cond][["open", "high", "low", "close", "volume", ]]
cdf[cond]

0.0761904761904762


Unnamed: 0_level_0,open,high,low,close,volume,expiry,size_pt,size_pc,cd3_low,cd3_diff_pt,cd3_diff_pc,cd3_dd_pc
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2024-01-24 09:18:00,21191.75,21191.75,21152.35,21156.1,0,2024-01-25,-35.65,-0.17,21147.75,-8.95,-0.04,0.24
2024-01-24 10:03:00,21368.55,21368.55,21330.4,21330.4,0,2024-01-25,-38.15,-0.18,21321.5,-8.0,-0.04,0.13
2024-01-30 09:20:00,21804.3,21804.3,21767.75,21770.75,0,2024-02-01,-33.55,-0.15,21755.85,-13.6,-0.06,0.0
2024-01-30 09:25:00,21764.65,21765.05,21722.85,21723.2,0,2024-02-01,-41.45,-0.19,21710.6,-12.4,-0.06,0.02


In [5]:
print(f"pc down: {cdf.loc[cond & (cdf[diff_pc_key] < 0)].shape[0]/cdf.loc[cond].shape[0]}")
print(f"avg change: {cdf.loc[cond][diff_pc_key].mean()}")
print(f"less than .1% down: {cdf.loc[cond & (cdf[diff_pc_key] < -.03)].shape[0] * 100 /cdf.loc[cond].shape[0]}")

pc down: 0.7142857142857143
avg change: -0.030294642836821976
less than .1% down: 71.42857142857143


In [13]:
ut.bokeh_plot(cdf.loc[cond].size_pc, cdf.loc[cond][diff_pc_key], "size_pc", diff_pc_key)

In [14]:
at_zero = "atm_prm_at_0"
cdf[at_zero] = pd.NA
cdf['atm'] = cdf.open.apply(ut.get_atm_strike)
cdf[at_zero] = cdf[cond].apply(lambda r: ut.get_premium_at(symbol=IC_SYMBOL, expiry=r.expiry, strike_price=r.atm, date=r.name.date(), option_type=OPTION_TYPE_PUT, tm=ut.add_to_time(r.name.time(), 1), get_open=True), axis=1)
at_key = f"atm_prm_at_{num}"
cdf[at_key] = cdf[cond].apply(lambda r: ut.get_premium_at(symbol=IC_SYMBOL, expiry=r.expiry, strike_price=r.atm, date=r.name.date(), option_type=OPTION_TYPE_PUT, tm=ut.add_to_time(r.name.time(), num+1), get_open=False), axis=1)
diff_pc_key = f"prm_pc_chg_{num}"
cdf[diff_pc_key] = (cdf[at_key] - cdf[at_zero]) * 100 / cdf[at_zero]
cdf[cond]
# cdf.loc[cdf.atm.notna()].atm
# cdf.atm
# cdf.atm = cdf[cdf.atm.notna()].atm.astype('int64')

Unnamed: 0_level_0,open,high,low,close,volume,expiry,size_pt,size_pc,cd3_low,cd3_diff_pt,cd3_diff_pc,cd3_dd_pc,atm_prm_at_0,atm,atm_prm_at_3,prm_pc_chg_3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2024-01-24 09:18:00,21191.75,21191.75,21152.35,21156.1,0,2024-01-25,-35.65,-0.17,21147.75,-8.95,-0.04,0.24,138.1,21200,94.7,-31.43
2024-01-24 10:03:00,21368.55,21368.55,21330.4,21330.4,0,2024-01-25,-38.15,-0.18,21321.5,-8.0,-0.04,0.13,110.3,21350,99.05,-10.2
2024-01-30 09:20:00,21804.3,21804.3,21767.75,21770.75,0,2024-02-01,-33.55,-0.15,21755.85,-13.6,-0.06,0.0,167.85,21800,177.0,5.45
2024-01-30 09:25:00,21764.65,21765.05,21722.85,21723.2,0,2024-02-01,-41.45,-0.19,21710.6,-12.4,-0.06,0.02,176.2,21750,185.85,5.48


In [38]:
((cdf[at_key] - cdf[at_zero]).sum()) * 100 / cdf[at_key].sum()

0.18666698729702555