# Trader Performance vs Market Sentiment Analysis
Structured quantitative analysis of trader behavior across Fear and Greed market regimes.

## Setup
This notebook assumes both CSV files are in the same directory:
- fear_greed.csv
- historical_trader_data.csv

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression

plt.style.use("default")
sns.set()


## Load Data

In [None]:
sentiment = pd.read_csv("fear_greed.csv")
trades = pd.read_csv("historical_trader_data.csv")

print("Sentiment shape:", sentiment.shape)
print("Trades shape:", trades.shape)


## Data Cleaning

In [None]:
# Sentiment cleaning
sentiment['date'] = pd.to_datetime(sentiment['date'], errors='coerce')
sentiment['date_only'] = sentiment['date'].dt.date

# Trades cleaning
trades['Timestamp IST'] = pd.to_datetime(trades['Timestamp IST'], errors='coerce')
trades['date_only'] = trades['Timestamp IST'].dt.date

trades['Closed PnL'] = pd.to_numeric(trades['Closed PnL'], errors='coerce')
trades['Size USD'] = pd.to_numeric(trades['Size USD'], errors='coerce')

# Merge datasets
df = trades.merge(
    sentiment[['date_only', 'classification']],
    on='date_only',
    how='left'
)

df.rename(columns={'classification': 'sentiment'}, inplace=True)

print("Merged shape:", df.shape)
df.head()


## Feature Engineering

In [None]:
df['win'] = df['Closed PnL'] > 0

daily_summary = df.groupby(['date_only', 'sentiment'])['Closed PnL']                   .agg(['sum','mean','std'])                   .reset_index()

daily_summary.head()


## PnL Distribution Across Sentiment

In [None]:
plt.figure(figsize=(8,5))
sns.boxplot(x='sentiment', y='Closed PnL', data=df)
plt.title("PnL Distribution: Fear vs Greed")
plt.xticks(rotation=20)
plt.tight_layout()
plt.show()


## Trade Frequency by Sentiment

In [None]:
trade_freq = df.groupby(['date_only','sentiment']).size().reset_index(name='trades')

plt.figure(figsize=(8,5))
sns.barplot(x='sentiment', y='trades', data=trade_freq)
plt.title("Trade Frequency Across Sentiment")
plt.xticks(rotation=20)
plt.tight_layout()
plt.show()


## Position Size Behavior

In [None]:
plt.figure(figsize=(8,5))
sns.boxplot(x='sentiment', y='Size USD', data=df)
plt.title("Position Size vs Sentiment")
plt.xticks(rotation=20)
plt.tight_layout()
plt.show()


## Trader Segmentation (Frequent vs Casual)

In [None]:
trade_counts = df['Account'].value_counts()
active_accounts = trade_counts[trade_counts > 50].index

df['activity'] = df['Account'].apply(lambda x: 'Frequent' if x in active_accounts else 'Casual')

plt.figure(figsize=(8,5))
sns.boxplot(x='activity', y='Closed PnL', data=df)
plt.title("Performance: Frequent vs Casual Traders")
plt.tight_layout()
plt.show()


## Simple Predictive Model

In [None]:
model_df = df.dropna(subset=['Closed PnL','Size USD']).copy()
model_df['profit'] = model_df['Closed PnL'] > 0

X = model_df[['Size USD']]
y = model_df['profit']

model = LogisticRegression(max_iter=1000)
model.fit(X, y)

print("Model accuracy:", model.score(X, y))
