In [78]:
import pandas as pd
import numpy as np

In [79]:
df = pd.read_csv("./demo_data.csv").set_index("datetime")
df.index = pd.to_datetime(df.index)
df.head()

Unnamed: 0_level_0,open,high,low,close,volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2026-01-23 14:15:00+05:30,1302.1,1309.0,1295.0,1305.8,266637
2026-01-23 14:20:00+05:30,1305.3,1314.8,1303.5,1308.0,261105
2026-01-23 14:25:00+05:30,1308.0,1308.8,1304.0,1308.2,133692
2026-01-23 14:30:00+05:30,1307.9,1308.9,1302.2,1307.7,123654
2026-01-23 14:35:00+05:30,1307.7,1308.2,1299.4,1301.7,141716


#### Creating features from this data

In [80]:
df["resistance"] = df["close"].shift().rolling(10).max() 
df["support"]    = df["close"].shift().rolling(10).min() 

In [81]:
# Calculate True Range (Wilder's definition)
df['prev_close'] = df['close'].shift(1)
df['tr'] = df[['high', 'low', 'prev_close']].apply(
    lambda x: max(x['high'] - x['low'], 
                  abs(x['high'] - x['prev_close']), 
                  abs(x['low'] - x['prev_close'])), axis=1
)
df['atr'] = df['tr'].shift().rolling(14).mean()

In [82]:
# Option 3: Session Progress and Position-Based Features
df["hour"] = df.index.hour
# First, let's define market hours in minutes
market_open_minutes = 9 * 60 + 15  # 9:15 AM = 555 minutes from midnight
market_close_minutes = 15 * 60 + 30  # 3:30 PM = 930 minutes from midnight
total_trading_minutes = market_close_minutes - market_open_minutes  # 375 minutes (6h 15min)

# Get current time in minutes from midnight
df['current_minutes'] = df['hour'] * 60 + pd.to_datetime(df.index).minute

# Calculate session progress (0 at open, 1 at close)
df['session_progress'] = (df['current_minutes'] - market_open_minutes) / total_trading_minutes

# Create binary flags for early and late sessions
df['is_early_session'] = (df['session_progress'] < 0.33).astype(int)  # First ~2 hours
df['is_late_session'] = (df['session_progress'] > 0.67).astype(int)   # Last ~2 hours

# Optional: You can drop the intermediate 'current_minutes' column if you don't need it
# df = df.drop('current_minutes', axis=1)

In [83]:
df['hl_ratio'] = (
    (df['high'].shift(1) - df['low'].shift(1)) / df['close'].shift(1)
)   #This comes from previous candle


In [84]:
df['open_to_support_dist'] = (df['open'] - df['support']) / df['open']

In [85]:
df = df.copy()
lookback = 20  # 20 bars = 5 hours of 15min data

df["volume_ma"] = df["volume"].shift().rolling(window=lookback).mean()
df["volume_ratio"] = df["volume"].shift() / df["volume_ma"]

In [86]:
def calculate_rsi(data, period=14):
    delta = data.diff().shift(1)  # ✓ Shift after diff
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

df["rsi_14"] = calculate_rsi(df["close"], period=14)

In [87]:
def calculate_macd(data, fast=12, slow=26, signal=9):
    shifted_data = data.shift(1)
    ema_fast = shifted_data.ewm(span=fast, adjust=False).mean()
    ema_slow = shifted_data.ewm(span=slow, adjust=False).mean()
    macd_line = ema_fast - ema_slow
    signal_line = macd_line.ewm(span=signal, adjust=False).mean()
    macd_histogram = macd_line - signal_line
    return macd_line, signal_line, macd_histogram

df["macd"], df["macd_signal"], df["macd_hist"] = calculate_macd(df["close"])

# Normalize by PREVIOUS close (not current)
df["macd_norm"] = df["macd"] / df["close"].shift(1)
df["macd_hist_norm"] = df["macd_hist"] / df["close"].shift(1)

In [88]:
# Moving averages (just for calculation, not direct features)
df["5ma"] = df["close"].shift().rolling(5).mean()
df["10ma"] = df["close"].shift().rolling(10).mean()
df["50ma"] = df["close"].shift().rolling(50).mean()
df["200ma"] = df["close"].shift().rolling(200).mean()  # Fixed: was 100

# RELATIVE features (normalized by price)
df["5_10_ma_diff_pct"] = (df["5ma"] - df["10ma"]) / df["close"].shift()
df["close_5ma_diff_pct"] = (df["close"].shift() - df["5ma"]) / df["close"].shift()
df["close_10ma_diff_pct"] = (df["close"].shift() - df["10ma"]) / df["close"].shift()
df["golden_cross_pct"] = (df["50ma"] - df["200ma"]) / df["close"].shift()

# Additional useful MA features
df["close_50ma_diff_pct"] = (df["close"].shift() - df["50ma"]) / df["close"].shift()
df["close_200ma_diff_pct"] = (df["close"].shift() - df["200ma"]) / df["close"].shift()

# Slope/momentum of MAs (rate of change)
df["5ma_roc"] = df["5ma"].pct_change(periods=5)
df["50ma_roc"] = df["50ma"].pct_change(periods=10)


In [89]:
df["volume_pressure_prev"] = (
    df["close"].shift(1) - df["open"].shift(1)
) / df["volume"].shift(1)
df["range_per_volume_prev"] = (
    df["high"].shift(1) - df["low"].shift(1)
) / df["volume"].shift(1)


In [90]:
df["gap_pct"] = (
    df["open"] - df["close"].shift(1)
) / df["close"].shift(1)


In [91]:
df["candle_type"] = (df["close"].shift() - df["open"].shift())/(df["high"].shift()-df["low"].shift())   
df["candle_type_lag1"] = df["candle_type"].shift(1)  # Previous candle
df["candle_type_lag2"] = df["candle_type"].shift(2)  # 2 candles ago


In [92]:
# Open's position relative to S/R levels
df['open_above_resistance'] = (
    ((df['open'] - df['resistance']) / df['atr']).shift(1)
)
df['open_below_support'] = (
    ((df['support'] - df['open']) / df['atr']).shift(1)
)

# Or combined - where is open in the S/R range?
df['open_sr_position'] = (
    ((df['open'] - df['support']) / (df['resistance'] - df['support']))
).shift(1)  # 0 = at support, 1 = at resistance, >1 = above, <0 = below

# S/R range width (how wide is the channel?)
df['sr_range_atr'] = (
    ((df['resistance'] - df['support']) / df['atr']).shift(1)
)

In [93]:
# Strength
df['resistance_breakout_strength'] = (
    ((df['close'] - df['resistance']) / df['atr']).clip(lower=0).shift(1)
)
df['support_breakdown_strength'] = (
    ((df['support'] - df['close']) / df['atr']).clip(lower=0).shift(1)
)
# Conviction
rng = (df['high'] - df['low']).replace(0, np.nan)
df['resistance_breakout_conviction'] = (
    ((df['close'] - df['resistance']) / rng).clip(-1, 1).shift(1)
)
df['support_breakdown_conviction'] = (
    ((df['support'] - df['close']) / rng).clip(-1, 1).shift(1)
)
# Volume
df['breakout_volume_ratio'] = (
    df['volume'] / df['volume'].rolling(20).mean()
).shift(1)
# Pressure / velocity
df['resistance_touch_count'] = (
    (df['high'].shift(1) >= df['resistance'].shift(1))
    .rolling(10).sum()
)
df['support_touch_count'] = (
    (df['low'].shift(1) <= df['support'].shift(1))
    .rolling(10).sum()
)

In [94]:
# df = df.dropna()

In [95]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,resistance,support,prev_close,tr,atr,...,open_below_support,open_sr_position,sr_range_atr,resistance_breakout_strength,support_breakdown_strength,resistance_breakout_conviction,support_breakdown_conviction,breakout_volume_ratio,resistance_touch_count,support_touch_count
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2026-01-23 14:15:00+05:30,1302.1,1309.0,1295.0,1305.8,266637,,,,14.0,,...,,,,,,,,,,
2026-01-23 14:20:00+05:30,1305.3,1314.8,1303.5,1308.0,261105,,,1305.8,11.3,,...,,,,,,,,,,
2026-01-23 14:25:00+05:30,1308.0,1308.8,1304.0,1308.2,133692,,,1308.0,4.8,,...,,,,,,,,,,
2026-01-23 14:30:00+05:30,1307.9,1308.9,1302.2,1307.7,123654,,,1308.2,6.7,,...,,,,,,,,,,
2026-01-23 14:35:00+05:30,1307.7,1308.2,1299.4,1301.7,141716,,,1307.7,8.8,,...,,,,,,,,,,


In [96]:
for i in df.columns:
    idx = df.index[len(df)-1]
    print(f"{i} : {df.loc[idx,i]}")

open : 1391.9
high : 1392.0
low : 1390.4
close : 1391.5
volume : 8256
resistance : 1395.5
support : 1385.7
prev_close : 1392.0
tr : 1.599999999999909
atr : 3.6571428571428277
hour : 14
current_minutes : 850
session_progress : 0.7866666666666666
is_early_session : 0
is_late_session : 1
hl_ratio : 0.0024425287356322494
open_to_support_dist : 0.004454342984409832
volume_ma : 15882.6
volume_ratio : 0.43481545842620223
rsi_14 : 64.06250000000001
macd : 4.017361113312518
macd_signal : 3.7206001811015397
macd_hist : 0.2967609322109781
macd_norm : 0.002886035282552096
macd_hist_norm : 0.0002131903248642084
5ma : 1393.7199999999998
10ma : 1391.25
50ma : 1380.604
200ma : 1399.1405
5_10_ma_diff_pct : 0.0017744252873561782
close_5ma_diff_pct : -0.0012356321839079023
close_10ma_diff_pct : 0.0005387931034482759
golden_cross_pct : -0.01331645114942524
close_50ma_diff_pct : 0.008186781609195372
close_200ma_diff_pct : -0.005129669540229867
5ma_roc : 0.003557078874983466
50ma_roc : 0.002863447963261878
