# Candle Gaps
> Do big gaps in candles reflect panic or frenzy in the stock?

In [1]:
import datetime as dt
import utils as ut
import pandas as pd
import icharts as ic
from functools import cache
from constants import *
import config
from matplotlib import pyplot as plt
import numpy as np
import nse_plot as nplt


TEST_START = dt.datetime.strptime("2023-01-01", "%Y-%m-%d")
TEST_END = dt.datetime.strptime("2024-02-29", "%Y-%m-%d")
INTERVAL = ut.INTERVAL_MIN1
EXCHANGE = ut.EXCHANGE_NSE
SYMBOL = "HDFCBANK"

pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.precision", 2)
pd.options.display.float_format = lambda x: '%.2f' % x

def build_date_range(date_start, date_end, symbol):
    date_range = []
    cur_date = date_start
    while cur_date < date_end:
        if cur_date.weekday() not in [5, 6]:
            has_data, _ = ut.has_data(symbol, cur_date, interval=INTERVAL, exchange=EXCHANGE)
            if has_data:
                date_range.append(cur_date)
        cur_date += dt.timedelta(days=1)
    return date_range

all_dates = pd.DataFrame({"trade_date": build_date_range(TEST_START, TEST_END, SYMBOL)})
all_dates_shuffled = all_dates.sample(frac=1, random_state=42)

train_size = int(0.5 * len(all_dates_shuffled))
train_dates = all_dates_shuffled.iloc[:train_size]
test_dates = all_dates_shuffled.iloc[train_size:]
# train_dates = all_dates
train_dates = train_dates.sort_values(by="trade_date")
train_dates.set_index("trade_date", inplace=True)
test_dates = test_dates.sort_values(by="trade_date")
test_dates.set_index("trade_date", inplace=True)

def get_intraday_data(date):
    return ut.get_data(symbol=SYMBOL, date=date, interval=INTERVAL, exchange=EXCHANGE)

def get_daily_data(date):
    return ut.get_data(symbol=SYMBOL, date=date, interval=INTERVAL_DAY, exchange=EXCHANGE)

def get_last_trading_day(date):
    return ut.get_last_trading_day(SYMBOL, date, interval=INTERVAL, exchange=ut.EXCHANGE_NSE)

def get_symbol_price(d, t):
    data = ut.get_data(symbol=SYMBOL, date=d, interval=INTERVAL, exchange=EXCHANGE)
    try:
        return data.loc[data.index.time == t].iloc[0].open
    except IndexError:
        return pd.NA
    except AttributeError as e:
        return pd.NA

In [2]:
cdf = []
for idx, row in train_dates.iterrows():
    sdf = get_intraday_data(row.name)
    cdf.append(sdf)

cdf = pd.concat(cdf)
# cdf["prev_low"] = cdf.low.shift(1)
# cdf["prev_close"] = cdf.close.shift(1)
# cdf["gap_pt"] = cdf.open - cdf.prev_low
# cdf["gap_pc"] = cdf["gap_pt"] * 100 / cdf.open

cdf["size_pt"] = cdf.close - cdf.open
cdf["size_pc"] = cdf["size_pt"] * 10000 / cdf.open
cdf.loc[cdf.index.time == dt.time(hour=9, minute=15), "size_pc"] = pd.NA
cdf.dropna(inplace=True)

# hist, edges = np.histogram(cdf.size_pc, density=True, bins=200)
nplt.generate_candle_histogram(cdf, bins=100)

## N3gative Gaps

In [3]:
pc_threshold = -30
print(cdf.loc[cdf.size_pc < pc_threshold].shape[0] / cdf.shape[0])

0.0003603261900246539


Unnamed: 0_level_0,open,high,low,close,volume,size_pt,size_pc
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-01-03 09:16:00+05:30,,,,,,,
2022-01-03 09:17:00+05:30,1484.9,1486.0,1483.65,1485.15,24106.0,0.25,1.68
2022-01-03 09:18:00+05:30,1485.3,1485.3,1482.8,1484.05,17319.0,-1.25,-8.42
2022-01-03 09:19:00+05:30,1483.85,1488.85,1483.85,1488.1,25257.0,4.25,28.64
2022-01-03 09:20:00+05:30,1488.1,1489.35,1487.05,1488.95,13295.0,0.85,5.71


In [28]:
cdf.loc[cdf.index.date == dt.date(year=2024, month=1, day=5)]

Unnamed: 0_level_0,open,high,low,close,volume,size_pt,size_pc,cd3_diff,cd3_dd,cd3_low,cd3_diff_pt,cd3_diff_pc
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2024-01-05 09:16:00+05:30,1697.95,1704.9,1697.3,1700.8,322090,2.85,16.78,,,,,
2024-01-05 09:17:00+05:30,1700.45,1701.45,1699.65,1700.8,91447,0.35,2.06,,,,,
2024-01-05 09:18:00+05:30,1700.95,1701.0,1694.8,1694.8,92276,-6.15,-36.16,-10.03,8.23,1693.1,-1.7,-0.0
2024-01-05 09:19:00+05:30,1695.05,1696.2,1694.05,1694.05,82662,-1.0,-5.9,,,,,
2024-01-05 09:20:00+05:30,1694.4,1694.4,1693.1,1694.3,61026,-0.1,-0.59,,,,,
2024-01-05 09:21:00+05:30,1694.3,1694.35,1691.85,1692.2,49308,-2.1,-12.39,,,,,
2024-01-05 09:22:00+05:30,1692.2,1692.45,1689.3,1690.1,67892,-2.1,-12.41,,,,,
2024-01-05 09:23:00+05:30,1690.05,1690.5,1688.7,1690.25,74202,0.2,1.18,,,,,
2024-01-05 09:24:00+05:30,1689.85,1690.25,1686.65,1686.65,84316,-3.2,-18.94,,,,,
2024-01-05 09:25:00+05:30,1687.4,1687.9,1686.05,1687.2,84025,-0.2,-1.19,,,,,


In [5]:
cond = (cdf.size_pc < pc_threshold)
PC_FACTOR = 10 ** 2
cdf.loc[cond, "cd3_low"] = cdf.low.shift(-2)
cdf.loc[cond, "cd3_diff_pt"] = cdf.low.shift(-2) - cdf.close
cdf.loc[cond, "cd3_diff_pc"] = (cdf.low.shift(-2) - cdf.close) * PC_FACTOR / cdf.close
cdf.loc[cond, "cd3_dd_pc"] = (cdf.high.rolling(window=2).max().shift(-2) - cdf.close) * PC_FACTOR / cdf.close
# print(cdf.loc[cond].index)
# cdf[cond][["open", "high", "low", "close", "volume", ]]
cdf[cond]

Unnamed: 0_level_0,open,high,low,close,volume,size_pt,size_pc,cd3_low,cd3_diff_pt,cd3_diff_pc,cd3_dd_pc
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-01-27 09:16:00+05:30,1625.65,1627.5,1618.2,1618.2,389657,-7.45,-45.83,1590.25,-27.95,-1.73,0.03
2023-01-27 09:17:00+05:30,1618.6,1618.65,1598.45,1599.9,372186,-18.7,-115.53,1600.45,0.55,0.03,0.9
2023-01-27 09:28:00+05:30,1622.55,1622.9,1616.65,1617.55,137578,-5.0,-30.82,1607.0,-10.55,-0.65,0.1
2023-01-30 09:18:00+05:30,1589.9,1590.1,1582.2,1584.2,149224,-5.7,-35.85,1587.0,2.8,0.18,1.04
2023-01-30 09:40:00+05:30,1619.45,1619.45,1611.0,1611.3,54967,-8.15,-50.33,1608.3,-3.0,-0.19,0.26
2023-02-02 09:19:00+05:30,1606.25,1606.45,1600.45,1600.75,80634,-5.5,-34.24,1602.4,1.65,0.1,0.32
2023-02-02 09:27:00+05:30,1616.0,1617.95,1610.4,1610.9,41168,-5.1,-31.56,1606.85,-4.05,-0.25,0.04
2023-02-23 09:16:00+05:30,1615.55,1615.55,1608.65,1608.9,66017,-6.65,-41.16,1605.85,-3.05,-0.19,0.05
2023-02-23 11:30:00+05:30,1605.95,1605.95,1600.5,1600.95,101698,-5.0,-31.13,1594.2,-6.75,-0.42,0.01
2023-04-11 09:17:00+05:30,1658.95,1659.0,1652.15,1652.55,71077,-6.4,-38.58,1654.35,1.8,0.11,0.24


In [6]:
ut.bokeh_plot(cdf.loc[cond].size_pc, cdf.loc[cond].cd3_diff_pc, "size_pc", "cd3_diff_pc")