In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# load datasets
fear = pd.read_csv("fear_greed_index.csv")
trades = pd.read_csv("trades.csv")

# basic checks
print("Fear shape:", fear.shape)
print("Trades shape:", trades.shape)

print("\nMissing values (Fear):")
print(fear.isnull().sum())

print("\nMissing values (Trades):")
print(trades.isnull().sum())

print("\nDuplicates (Fear):", fear.duplicated().sum())
print("Duplicates (Trades):", trades.duplicated().sum())

# convert timestamps
fear["date"] = pd.to_datetime(fear["date"])
trades["timestamp"] = pd.to_datetime(trades["timestamp"])

trades["date"] = trades["timestamp"].dt.date
fear["date"] = fear["date"].dt.date

# merge datasets
data = trades.merge(fear, on="date", how="left")

# daily pnl per trader
daily_pnl = (
    data.groupby(["date", "trader_id"])["pnl"]
    .sum()
    .reset_index(name="daily_pnl")
)

# win rate per trader
win_rate = (
    data.assign(win=data["pnl"] > 0)
    .groupby("trader_id")["win"]
    .mean()
    .reset_index(name="win_rate")
)

# average trade size
avg_trade_size = (
    data.groupby("trader_id")["position_size"]
    .mean()
    .reset_index(name="avg_trade_size")
)

# leverage distribution
leverage_dist = data["leverage"]

# trades per day
trades_per_day = data.groupby("date").size()

# long short ratio
long_short_ratio = data["side"].value_counts(normalize=True)

# sentiment buckets
data["sentiment_bucket"] = pd.cut(
    data["value"],
    bins=[0, 40, 60, 100],
    labels=["Fear", "Neutral", "Greed"]
)

# performance by sentiment
sentiment_perf = data.groupby("sentiment_bucket").agg(
    avg_pnl=("pnl", "mean"),
    win_rate=("pnl", lambda x: (x > 0).mean()),
    avg_leverage=("leverage", "mean"),
    trades=("pnl", "count")
)

print("\nPerformance by sentiment:")
print(sentiment_perf)

# leverage segment
data["lev_segment"] = pd.qcut(data["leverage"], 2, labels=["Low", "High"])

# frequency segment
trade_counts = data.groupby("trader_id").size()
median_trades = trade_counts.median()

data["freq_segment"] = data["trader_id"].map(
    lambda x: "Frequent" if trade_counts[x] > median_trades else "Infrequent"
)

# consistency segment
pnl_std = data.groupby("trader_id")["pnl"].std()
median_std = pnl_std.median()

data["consistency"] = data["trader_id"].map(
    lambda x: "Consistent" if pnl_std[x] < median_std else "Inconsistent"
)

# average pnl by sentiment chart
sentiment_perf["avg_pnl"].plot(kind="bar")
plt.title("Average PnL by Sentiment")
plt.show()

# leverage distribution chart
plt.hist(leverage_dist, bins=30)
plt.title("Leverage Distribution")
plt.show()

# trades per day chart
trades_per_day.plot()
plt.title("Trades Per Day")
plt.show()

print("\nKey insights:")
print("Compare average pnl across sentiment buckets.")
print("Observe leverage changes to see behavior shifts.")
print("Trade counts show activity variation across sentiment.")

print("\nStrategy ideas:")
print("Reduce leverage during Fear periods.")
print("Increase trading only for high win-rate traders during Greed.")

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# create profitability label
data["profit_bucket"] = np.where(data["pnl"] > 0, 1, 0)

features = data[["value", "leverage", "position_size"]]
target = data["profit_bucket"]

X_train, X_test, y_train, y_test = train_test_split(
    features, target, test_size=0.2, random_state=42
)

model = RandomForestClassifier()
model.fit(X_train, y_train)

preds = model.predict(X_test)

print("\nModel evaluation:")
print(classification_report(y_test, preds))