# 📊 Smart Trade Classifier (using Zerodha Data)

This AI-powered project analyzes past trades using my Zerodha trading data to classify whether a trade is likely to be **profitable (Good)** or **risky (Bad)**.  
I built this to better understand patterns in my own trading behavior.



In [17]:
# Data handling
import pandas as pd
import numpy as np
# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd

df = pd.read_excel("pnlclean.xlsx")
df.head()

Unnamed: 0.1,Unnamed: 0,Symbol,Quantity,Buy Value,Sell Value,Realized P&L,Realized P&L Pct.
0,,BANKNIFTY2361543400PE,25,832.5,1000.0,167.5,20.1201
1,,BANKNIFTY2361543500PE,100,5445.0,5647.5,202.5,3.719
2,,BANKNIFTY2361543600PE,50,2793.75,3630.0,836.25,29.9329
3,,BANKNIFTY2361543700PE,100,7727.5,7572.5,-155.0,-2.0058
4,,BANKNIFTY2361543800PE,25,1725.0,3022.5,1297.5,75.2174


In [18]:
df['Label'] = df['Realized P&L'].apply(lambda x: 'Good' if x > 300 else ('Risky' if x < -200 else 'Neutral'))

In [19]:
df.head()

Unnamed: 0.1,Unnamed: 0,Symbol,Quantity,Buy Value,Sell Value,Realized P&L,Realized P&L Pct.,Label
0,,BANKNIFTY2361543400PE,25,832.5,1000.0,167.5,20.1201,Neutral
1,,BANKNIFTY2361543500PE,100,5445.0,5647.5,202.5,3.719,Neutral
2,,BANKNIFTY2361543600PE,50,2793.75,3630.0,836.25,29.9329,Good
3,,BANKNIFTY2361543700PE,100,7727.5,7572.5,-155.0,-2.0058,Neutral
4,,BANKNIFTY2361543800PE,25,1725.0,3022.5,1297.5,75.2174,Good


In [20]:
df = df[['Symbol', 'Quantity', 'Buy Value', 'Sell Value', 'Realized P&L Pct.', 'Label']]
df.head()

Unnamed: 0,Symbol,Quantity,Buy Value,Sell Value,Realized P&L Pct.,Label
0,BANKNIFTY2361543400PE,25,832.5,1000.0,20.1201,Neutral
1,BANKNIFTY2361543500PE,100,5445.0,5647.5,3.719,Neutral
2,BANKNIFTY2361543600PE,50,2793.75,3630.0,29.9329,Good
3,BANKNIFTY2361543700PE,100,7727.5,7572.5,-2.0058,Neutral
4,BANKNIFTY2361543800PE,25,1725.0,3022.5,75.2174,Good


In [21]:
#extract 5-digit strike price before "PE" or "CE"
df['Strike'] = df['Symbol'].str.extract(r'(\d{5})(?=PE|CE)').astype(float)
df['Option_Type'] = df['Symbol'].str.extract(r'(PE|CE)$')

df.head()

Unnamed: 0,Symbol,Quantity,Buy Value,Sell Value,Realized P&L Pct.,Label,Strike,Option_Type
0,BANKNIFTY2361543400PE,25,832.5,1000.0,20.1201,Neutral,43400.0,PE
1,BANKNIFTY2361543500PE,100,5445.0,5647.5,3.719,Neutral,43500.0,PE
2,BANKNIFTY2361543600PE,50,2793.75,3630.0,29.9329,Good,43600.0,PE
3,BANKNIFTY2361543700PE,100,7727.5,7572.5,-2.0058,Neutral,43700.0,PE
4,BANKNIFTY2361543800PE,25,1725.0,3022.5,75.2174,Good,43800.0,PE


In [22]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['Option_Type'] = le.fit_transform(df['Option_Type'])
df.tail()

Unnamed: 0,Symbol,Quantity,Buy Value,Sell Value,Realized P&L Pct.,Label,Strike,Option_Type
255,BANKNIFTY24MAR47000CE,15,6328.5,4237.5,-33.041,Risky,47000.0,0
256,NIFTY2370619450CE,100,2832.5,2952.5,4.2365,Neutral,19450.0,0
257,NIFTY2381719500CE,150,4740.0,3325.0,-29.8523,Risky,19500.0,0
258,NIFTY23JUL19600PE,50,2657.5,2527.5,-4.8918,Neutral,19600.0,1
259,NIFTY23JUL19700PE,50,3172.5,3252.5,2.5217,Neutral,19700.0,1


In [23]:
X = df[['Quantity', 'Buy Value', 'Sell Value', 'Strike', 'Option_Type']]  # Features
y = df['Label']  # Target

In [24]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
df.tail()

Unnamed: 0,Symbol,Quantity,Buy Value,Sell Value,Realized P&L Pct.,Label,Strike,Option_Type
255,BANKNIFTY24MAR47000CE,15,6328.5,4237.5,-33.041,Risky,47000.0,0
256,NIFTY2370619450CE,100,2832.5,2952.5,4.2365,Neutral,19450.0,0
257,NIFTY2381719500CE,150,4740.0,3325.0,-29.8523,Risky,19500.0,0
258,NIFTY23JUL19600PE,50,2657.5,2527.5,-4.8918,Neutral,19600.0,1
259,NIFTY23JUL19700PE,50,3172.5,3252.5,2.5217,Neutral,19700.0,1


In [25]:
from sklearn.ensemble import RandomForestClassifier

In [26]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [27]:
y_pred = model.predict(X_test)

In [31]:
from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[20  1  1]
 [ 6  6  3]
 [ 2  4  9]]
              precision    recall  f1-score   support

        Good       0.71      0.91      0.80        22
     Neutral       0.55      0.40      0.46        15
       Risky       0.69      0.60      0.64        15

    accuracy                           0.67        52
   macro avg       0.65      0.64      0.63        52
weighted avg       0.66      0.67      0.66        52



In [33]:
new_trade = pd.DataFrame([{
    'Quantity': 50,
    'Buy Value': 1500.0,
    'Sell Value': 1600.0,   # Add this (even a dummy value if unknown)
    'Strike': 43500,
    'Option_Type': 1        # 1 = CE, 0 = PE
}])

pred = model.predict(new_trade[['Quantity', 'Buy Value', 'Sell Value', 'Strike', 'Option_Type']])
print("📊 Prediction for upcoming trade:", pred[0])


📊 Prediction for upcoming trade: Risky


In [34]:
new_trade = pd.DataFrame([{
    'Quantity': 50,
    'Buy Value': 300.0,
    'Sell Value': 325.0,   # Add this (even a dummy value if unknown)
    'Strike': 43500,
    'Option_Type': 1        # 1 = CE, 0 = PE
}])

pred = model.predict(new_trade[['Quantity', 'Buy Value', 'Sell Value', 'Strike', 'Option_Type']])
print("📊 Prediction for upcoming trade:", pred[0])

📊 Prediction for upcoming trade: Neutral
