## 1_DATA_SET_PREPROCESSING

In [18]:
#imports
import numpy as np
import pandas as pd
import matplotlib as plt
import ta 

In [26]:
#load the dataset
df = pd.read_csv(r'D:\mehrs\VSCodeProjects\prediction\Dataset\BHAVCOPY_DATA.csv')
df.dropna(inplace=True)
df['BH_TIMESTAMP'] = pd.to_datetime(df['BH_TIMESTAMP'])
df = df.sort_values(by=['BH_SYMBOL', 'BH_TIMESTAMP'])

In [27]:
# feature engineering
df['%Change_Open_Close'] = (df['BH_CLOSE']-df['BH_OPEN'])/df['BH_OPEN']
df['Volatility_Day'] = (df['BH_HIGH']-df['BH_LOW'])/df['BH_OPEN']
df['Gap_Up_Down'] = (df['BH_OPEN']-df['BH_PREVCLOSE'])/df['BH_PREVCLOSE']
df['2d_Return'] = (df['BH_CLOSE'].shift(2)-df['BH_CLOSE'])/df['BH_CLOSE']

# volumne and delivery based
df['Volume_Spike'] = (df['DEL_QTY'])/df.groupby('BH_SYMBOL')['BH_TOTTRDQTY'].transform(lambda x: x.rolling(window=5, min_periods=1).mean())
df['Delivery_Change'] = df.groupby('BH_SYMBOL')['DEL_TRADED_RATIO'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())

# Moving Averages
df['MA_5'] = df.groupby('BH_SYMBOL')['BH_CLOSE'].transform(lambda x: x.rolling(window=5).mean())
df['MA_10'] = df.groupby('BH_SYMBOL')['BH_CLOSE'].transform(lambda x: x.rolling(window=10).mean())
df['MA_20'] = df.groupby('BH_SYMBOL')['BH_CLOSE'].transform(lambda x: x.rolling(window=20).mean())

# RSI (14-day)
df['RSI_14'] = df.groupby('BH_SYMBOL')['BH_CLOSE'].transform(lambda x: ta.momentum.RSIIndicator(x, window=14).rsi())

# MACD (12, 26, 9)
df['MACD'] = df.groupby('BH_SYMBOL')['BH_CLOSE'].transform(lambda x: ta.trend.MACD(x, window_slow=26, window_fast=12, window_sign=9).macd())
df['MACD_signal'] = df.groupby('BH_SYMBOL')['BH_CLOSE'].transform(lambda x: ta.trend.MACD(x, window_slow=26, window_fast=12, window_sign=9).macd_signal())

# Bollinger Bands (20d, 2 std)
bb = df.groupby('BH_SYMBOL')['BH_CLOSE'].transform(lambda x: ta.volatility.BollingerBands(x, window=20, window_dev=2))
df['BB_upper'] = df.groupby('BH_SYMBOL')['BH_CLOSE'].transform(lambda x: ta.volatility.BollingerBands(x, window=20, window_dev=2).bollinger_hband())
df['BB_lower'] = df.groupby('BH_SYMBOL')['BH_CLOSE'].transform(lambda x: ta.volatility.BollingerBands(x, window=20, window_dev=2).bollinger_lband())
df['BB_pct'] = (df['BH_CLOSE'] - df['BB_lower']) / (df['BB_upper'] - df['BB_lower'])

In [24]:
# Simple target will price go up or down
df['TARGET'] = df['BH_CLOSE'].shift(2)>df['BH_CLOSE']

In [None]:
# clean and sort data
df.dropna(inplace=True)
df = df.sort_values(by=['BH_SYMBOL', 'BH_TIMESTAMP']).reset_index(drop=True)

## 2_MODEL_TRAINING

In [None]:
#imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import precision_score
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import EarlyStopping
from tensorflow.python.client import device_lib

In [None]:
import tensorflow as tf
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))

In [35]:
# Define your predictors
predictors = [
    '%Change_Open_Close', 'Volatility_Day', 'Gap_Up_Down', 'Volume_Spike', 'Delivery_Change',
    'MA_5', 'MA_10', 'MA_20', 'RSI_14', 'MACD', 'MACD_signal',
    'BB_upper', 'BB_lower', 'BB_pct'
]

# Define target: probability of price going up after 2 days
df['Target'] = (df['2d_Return'] > 0).astype(int)  # Binary for now


In [36]:
#drop NaNs snad scale
df.dropna(subset=predictors + ['Target'], inplace=True)

# Normalize features
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df[predictors])

In [37]:
def create_sequences(data, target, window_size=10):
    X, y = [], []
    for i in range(window_size, len(data)):
        X.append(data[i-window_size:i])
        y.append(target[i])
    return np.array(X), np.array(y)

X, y = create_sequences(scaled_features, df['Target'].values)


In [38]:
# 80% Train, 10% Validation, 10% Test
train_size = int(len(X) * 0.8)
val_size = int(len(X) * 0.1)

X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size+val_size], y[train_size:train_size+val_size]
X_test, y_test = X[train_size+val_size:], y[train_size+val_size:]


In [39]:
model = Sequential()
model.add(LSTM(units=64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=32))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [40]:

print(device_lib.list_local_devices())

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model.fit(X_train, y_train, validation_data=(X_val, y_val),
          epochs=50, batch_size=64, callbacks=[early_stop])


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50


<keras.callbacks.History at 0x2ae962e97b0>

In [41]:
model.save('lstm_model')



INFO:tensorflow:Assets written to: lstm_model\assets


INFO:tensorflow:Assets written to: lstm_model\assets


In [42]:
y_pred_proba = model.predict(X_test)
y_pred = (y_pred_proba > 0.5).astype(int)

precision = precision_score(y_test, y_pred)
print(f"Precision Score: {precision:.4f}")


Precision Score: 0.7873


In [43]:
df_predictions = pd.DataFrame({
    'Actual': y_test,
    'Predicted_Class': y_pred.flatten(),
    'Predicted_Prob': y_pred_proba.flatten()
})

print(df_predictions.head(10))


   Actual  Predicted_Class  Predicted_Prob
0       0                0        0.011363
1       0                0        0.127538
2       0                0        0.040892
3       1                1        0.542518
4       1                1        0.950686
5       0                0        0.280021
6       0                0        0.214639
7       0                0        0.190380
8       0                0        0.064768
9       0                0        0.061640
