In [1]:
import pandas as pd
import os

In [7]:
df = pd.read_csv('train_processed.csv')

PreProcessing

In [8]:
df = df.dropna()
df = df.drop(['Stock Splits', 'Dividends'], axis=1)

In [10]:
import pandas as pd
import matplotlib.pyplot as plt

df_anamoly = df[abs(df['Close']-df['Open'])>=1000]

original_anamoly_index = df_anamoly.index

df = df.drop(original_anamoly_index) # Drop the initial anomalies using the original index
df = df.reset_index(drop=True)

for i in range(len(df)):
    maxx=max({df['Open'][i],df['Close'][i],df['High'][i],df['Low'][i]})
    minn=min({df['Open'][i],df['Close'][i],df['High'][i],df['Low'][i]})
    if(maxx/minn>=1.25):
        df_anamoly=pd.concat([df_anamoly,df.iloc[i:i+1]]) # Concatenate new anomalies
    if(df['Volume'][i]==0 and (df['Open'][i]==df['Close'][i])):
        df_anamoly=pd.concat([df_anamoly,df.iloc[i:i+1]]) # Concatenate new anomalies

# Get the indices from the updated df for the new anomalies
new_anamoly_index = df[df.index.isin(df_anamoly.index)].index

df = df.drop(new_anamoly_index)
df = df.reset_index(drop=True)

In [23]:
def calculate_rsi(data, window=20):
    delta = data['Close'].diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)

    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))

    return rsi

In [32]:
df['MA20'] = df['Close'].rolling(window=20).mean()
df['STD20'] = df['Close'].rolling(window=20).std()
df['RSI']=calculate_rsi(df)
df['Bollinger_UB']=df['MA20'] + 3 * df['STD20']
df['Bollinger_LB']=df['MA20'] - 3 * df['STD20']
# Identify anomalies
df['Anomaly_via_bollinger'] = (df['Close'] >df['Bollinger_UB'] ) | (df['Close'] <df['Bollinger_LB'] )

# df.head()

In [30]:
import numpy as np

In [37]:
overbought_threshold = 70
oversold_threshold = 30

df['Anomaly_via_rsi'] = np.where(
    (df['RSI'] > overbought_threshold), 'Overbought',
    np.where(df['RSI'] < oversold_threshold, 'Oversold', 'Normal')
)

df['Price_Change'] = df['Close'].diff()
df['RSI_Change'] = df['RSI'].diff()

df['Bullish_Divergence'] = np.where(
    (df['Price_Change'] > 0) & (df['RSI_Change'] < 0), 'Bullish Divergence', 'No Divergence'
)

df['Bearish_Divergence'] = np.where(
    (df['Price_Change'] < 0) & (df['RSI_Change'] > 0), 'Bearish Divergence', 'No Divergence'
)

df['RSI_Sharp_Change'] = np.where(df['RSI'].diff().abs() > 10, 'Sharp RSI Change', 'Normal')

df['Combined_Anomaly_via_rsi'] = np.where(
    (df['Anomaly_via_rsi'] != 'Normal') |
    (df['Bullish_Divergence'] == 'Bullish Divergence') |
    (df['Bearish_Divergence'] == 'Bearish Divergence') |
    (df['RSI_Sharp_Change'] == 'Sharp RSI Change'),
    True,
    False
)

In [69]:
# df['Class']=df['Combined_Anomaly_via_rsi']|df['Anomaly_via_bollinger']
df['Class']=df['Anomaly_via_bollinger']

Processing Done

---


Model Making starts

In [70]:
df_temp = df[['Close','Open','High','Low','Volume','Class']]

In [71]:
from sklearn.model_selection import train_test_split

X = df_temp.drop('Class', axis=1)  # Features
y = df_temp['Class']  # Target variable

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=69)

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (14624, 5)
y_train shape: (14624,)
X_test shape: (3657, 5)
y_test shape: (3657,)


In [72]:
from tensorflow import keras
from tensorflow.keras import layers

In [76]:
inputs = keras.Input(shape=(X_train.shape[1],))
hidden_layer1 = keras.layers.Dense(10, activation="relu")(inputs)
hidden_layer2 = keras.layers.Dense(10, activation="sigmoid")(hidden_layer1)
outputs = keras.layers.Dense(1, activation="relu")(hidden_layer2)
model = keras.Model(inputs=inputs, outputs=outputs)

In [77]:
model.summary()
model.compile(optimizer='adam',loss='mse')

In [78]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.1993 - val_loss: 0.0055
Epoch 2/10
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0039 - val_loss: 0.0055
Epoch 3/10
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0036 - val_loss: 0.0055
Epoch 4/10
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0051 - val_loss: 0.0055
Epoch 5/10
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0032 - val_loss: 0.0055
Epoch 6/10
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0026 - val_loss: 0.0055
Epoch 7/10
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0032 - val_loss: 0.0055
Epoch 8/10
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.0037 - val_loss: 0.0055
Epoch 9/10
[1m366/366[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x7c61a27764a0>

In [79]:
test_loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.4f}")

[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0040
Test Loss: 0.0033


In [80]:
model.save("path_to_model.h5")

