In [35]:
import pandas as pd

# Load CSV
df = pd.read_csv("../data/Raw/trades_sample.csv")

# Show first few rows
df.head()

df.info()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   trade_id     10 non-null     int64  
 1   timestamp    10 non-null     object 
 2   instrument   10 non-null     object 
 3   side         10 non-null     object 
 4   qty          10 non-null     int64  
 5   entry_price  10 non-null     float64
 6   exit_price   10 non-null     float64
 7   pnl          10 non-null     float64
 8   notes        10 non-null     object 
dtypes: float64(3), int64(2), object(4)
memory usage: 852.0+ bytes


In [36]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   trade_id     10 non-null     int64  
 1   timestamp    10 non-null     object 
 2   instrument   10 non-null     object 
 3   side         10 non-null     object 
 4   qty          10 non-null     int64  
 5   entry_price  10 non-null     float64
 6   exit_price   10 non-null     float64
 7   pnl          10 non-null     float64
 8   notes        10 non-null     object 
dtypes: float64(3), int64(2), object(4)
memory usage: 852.0+ bytes


In [37]:
df.describe()


Unnamed: 0,trade_id,qty,entry_price,exit_price,pnl
count,10.0,10.0,10.0,10.0,10.0
mean,5.5,9000.2,466.41877,466.56911,8.55
std,3.02765,5676.109795,980.876645,981.198787,16.657414
min,1.0,1.0,1.071,1.072,-15.0
25%,3.25,10000.0,1.07325,1.073925,-5.625
50%,5.5,10000.0,1.1735,1.1735,12.5
75%,7.75,10000.0,1.275625,1.27605,20.25
max,10.0,20000.0,2330.0,2336.0,36.0


In [38]:
# Phase 2: Data Cleaning & Processing

# 1. Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# 2. Standardize categorical fields
df['side'] = df['side'].str.capitalize()
df['instrument'] = df['instrument'].str.upper()

# 3. Create derived columns
df['return_pct'] = (df['exit_price'] - df['entry_price']) / df['entry_price'] * 100

# Ensure trades are in chronological order
df = df.sort_values('timestamp')

# Holding time in minutes (time since previous trade)
df['holding_time'] = df['timestamp'].diff().dt.total_seconds().fillna(0) / 60

# 4. Quick validation
print(df.info())
df.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   trade_id      10 non-null     int64         
 1   timestamp     10 non-null     datetime64[ns]
 2   instrument    10 non-null     object        
 3   side          10 non-null     object        
 4   qty           10 non-null     int64         
 5   entry_price   10 non-null     float64       
 6   exit_price    10 non-null     float64       
 7   pnl           10 non-null     float64       
 8   notes         10 non-null     object        
 9   return_pct    10 non-null     float64       
 10  holding_time  10 non-null     float64       
dtypes: datetime64[ns](1), float64(5), int64(2), object(3)
memory usage: 1012.0+ bytes
None


Unnamed: 0,trade_id,timestamp,instrument,side,qty,entry_price,exit_price,pnl,notes,return_pct,holding_time
0,1,2024-06-12 09:31:00,EURUSD,Buy,10000,1.074,1.0752,12.0,chased breakout fomo,0.111732,0.0
1,2,2024-06-12 10:15:00,EURUSD,Sell,10000,1.075,1.0765,-15.0,stopped out quickly,0.139535,44.0
2,3,2024-06-12 11:40:00,GBPUSD,Buy,10000,1.2745,1.2768,23.0,followed plan,0.180463,85.0
3,4,2024-06-12 13:05:00,GBPUSD,Sell,10000,1.276,1.2738,22.0,news volatility,-0.172414,85.0
4,5,2024-06-12 14:20:00,XAUUSD,Buy,1,2325.0,2320.5,-4.5,held too long revenge,-0.193548,75.0


In [39]:
# Phase 3: Bias Detection

def detect_bias(row):
    note = str(row['notes']).lower()
    bias_list = []

    # Rule-based detection from notes
    if "fomo" in note or "chased" in note:
        bias_list.append("FOMO")
    if "revenge" in note or "held too long" in note:
        bias_list.append("Revenge Trading")
    if "stopped out quickly" in note:
        bias_list.append("Fear / Early Exit")
    if "overtraded" in note or "too many" in note:
        bias_list.append("Overtrading")
    if "news" in note or "volatility" in note:
        bias_list.append("News Reaction")

    # Behavioral rules
    if row['holding_time'] < 5:
        bias_list.append("Impatience")
    if row['return_pct'] < -0.5:
        bias_list.append("Lack of Stop Loss")

    return ", ".join(bias_list) if bias_list else "No clear bias"

# Apply to dataframe
df['bias_detected'] = df.apply(detect_bias, axis=1)

# Show results
df[['trade_id', 'notes', 'bias_detected']]


Unnamed: 0,trade_id,notes,bias_detected
0,1,chased breakout fomo,"FOMO, Impatience"
1,2,stopped out quickly,Fear / Early Exit
2,3,followed plan,No clear bias
3,4,news volatility,News Reaction
4,5,held too long revenge,Revenge Trading
5,6,overconfident after loss,No clear bias
6,7,re-entry calmer,No clear bias
7,8,good setup,No clear bias
8,9,overtraded tired,Overtrading
9,10,followed rules,No clear bias


In [40]:
import os
from pathlib import Path
import pandas as pd

# show where we are and what's inside data/
print("Current working dir:", os.getcwd())
print("Top-level folders:", os.listdir("."))

# show data folder contents if available
if os.path.isdir("data"):
    print("\n/data contents:", os.listdir("data"))
    if os.path.isdir("data/Processed"):
        print("Found: data/Processed")
    if os.path.isdir("data/Raw"):
        print("Found: data/Raw ->", os.listdir("data/Raw"))
else:
    print("\nNo 'data' folder found at project root.")

# decide where to save:
if os.path.isdir("data/Processed"):
    out_dir = Path("data/Processed")
elif os.path.isdir("data/Raw/Processed"):
    out_dir = Path("data/Raw/Processed")
else:
    # create data/Processed at project root (safe)
    out_dir = Path("data/Processed")
    out_dir.mkdir(parents=True, exist_ok=True)
    print("\nCreated directory:", out_dir)

# load df if not already loaded (optional - safe reload)
if 'df' not in globals():
    df = pd.read_csv("../data/Raw/trades_sample.csv")  # adjust if needed
    print("Loaded df from ../data/Raw/trades_sample.csv")

# save file
out_path = out_dir / "trades_with_bias.csv"
df.to_csv(out_path, index=False)
print("\nSaved file to:", out_path)

# list contents of the output folder to verify
print("\nContents of", out_dir, ":", os.listdir(out_dir))


Current working dir: /Users/nikhilsudan/Desktop/tradebias_ai/notebooks
Top-level folders: ['step1_read_csv.ipynb', 'main.py']

No 'data' folder found at project root.

Created directory: data/Processed

Saved file to: data/Processed/trades_with_bias.csv

Contents of data/Processed : ['trades_with_bias.csv']
