In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import ttest_ind
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

plt.style.use("default")

In [7]:
sentiment_df = pd.read_csv("fear_greed_index.csv")
trades_df = pd.read_csv("historical_data.csv")

print("Sentiment Data Shape:", sentiment_df.shape)
print("Trader Data Shape:", trades_df.shape)

Sentiment Data Shape: (2644, 4)
Trader Data Shape: (211224, 16)


In [12]:
sentiment_df

Unnamed: 0,timestamp,value,classification,date
0,1517463000,30,Fear,2018-02-01
1,1517549400,15,Extreme Fear,2018-02-02
2,1517635800,40,Fear,2018-02-03
3,1517722200,24,Extreme Fear,2018-02-04
4,1517808600,11,Extreme Fear,2018-02-05
...,...,...,...,...
2639,1745818200,54,Neutral,2025-04-28
2640,1745904600,60,Greed,2025-04-29
2641,1745991000,56,Greed,2025-04-30
2642,1746077400,53,Neutral,2025-05-01


In [14]:
sentiment_df.head(10)

Unnamed: 0,timestamp,value,classification,date
0,1517463000,30,Fear,2018-02-01
1,1517549400,15,Extreme Fear,2018-02-02
2,1517635800,40,Fear,2018-02-03
3,1517722200,24,Extreme Fear,2018-02-04
4,1517808600,11,Extreme Fear,2018-02-05
5,1517895000,8,Extreme Fear,2018-02-06
6,1517981400,36,Fear,2018-02-07
7,1518067800,30,Fear,2018-02-08
8,1518154200,44,Fear,2018-02-09
9,1518240600,54,Neutral,2018-02-10


In [15]:
sentiment_df.tail(10)

Unnamed: 0,timestamp,value,classification,date
2634,1745386200,72,Greed,2025-04-23
2635,1745472600,63,Greed,2025-04-24
2636,1745559000,60,Greed,2025-04-25
2637,1745645400,65,Greed,2025-04-26
2638,1745731800,61,Greed,2025-04-27
2639,1745818200,54,Neutral,2025-04-28
2640,1745904600,60,Greed,2025-04-29
2641,1745991000,56,Greed,2025-04-30
2642,1746077400,53,Neutral,2025-05-01
2643,1746163800,67,Greed,2025-05-02


In [20]:
print("Sentiment Data Shape:", sentiment_df.shape)
print("Trader Data Shape:", trades_df.shape)

Sentiment Data Shape: (2644, 4)
Trader Data Shape: (211224, 16)


In [None]:
sentiment_df["Date"] = pd.to_datetime(sentiment_df["Date"])
trades_df["time"] = pd.to_datetime(trades_df["time"])

In [None]:
trades_df["Date"] = trades_df["time"].dt.date
sentiment_df["Date"] = sentiment_df["Date"].dt.date

In [None]:
sentiment_df["Classification"] = sentiment_df["Classification"].str.lower()


In [None]:
sentiment_map = {"fear": -1, "greed": 1}
sentiment_df["sentiment_score"] = sentiment_df["Classification"].map(sentiment_map)

In [None]:
trades_df = trades_df.dropna(subset=["closedPnL", "size", "leverage"])


In [None]:
merged_df = pd.merge(
    trades_df,
    sentiment_df,
    on="Date",
    how="inner"
)

print("Merged Data Shape:", merged_df.shape)

In [None]:
merged_df["is_win"] = merged_df["closedPnL"] > 0

In [None]:
merged_df["pnl_per_unit"] = merged_df["closedPnL"] / merged_df["size"].replace(0, np.nan)

In [None]:
merged_df["risk_adjusted_pnl"] = merged_df["closedPnL"] / merged_df["leverage"].replace(0, np.nan)

In [None]:
merged_df["side_encoded"] = merged_df["side"].map({"buy": 1, "sell": -1})

In [None]:
merged_df["contrarian"] = (
    ((merged_df["Classification"] == "fear") & (merged_df["side"] == "buy")) |
    ((merged_df["Classification"] == "greed") & (merged_df["side"] == "sell"))
)

In [None]:
plt.figure()
sns.boxplot(x="Classification", y="closedPnL", data=merged_df)
plt.title("Trader PnL by Market Sentiment")
plt.show()

In [None]:
plt.figure()
sns.boxplot(x="Classification", y="leverage", data=merged_df)
plt.title("Leverage Usage by Sentiment")
plt.show()

In [None]:
win_rate = merged_df.groupby("Classification")["is_win"].mean()
print("\nWin Rate by Sentiment:\n", win_rate)

In [None]:
fear_pnl = merged_df[merged_df["Classification"] == "fear"]["closedPnL"]
greed_pnl = merged_df[merged_df["Classification"] == "greed"]["closedPnL"]

t_stat, p_value = ttest_ind(
    fear_pnl,
    greed_pnl,
    equal_var=False,
    nan_policy="omit"
)

In [None]:
print("\nT-Test Results (Fear vs Greed):")
print("T-Statistic:", t_stat)
print("P-Value:", p_value)

In [None]:
trader_metrics = merged_df.groupby("account").agg(
    total_trades=("closedPnL", "count"),
    total_pnl=("closedPnL", "sum"),
    avg_pnl=("closedPnL", "mean"),
    win_rate=("is_win", "mean"),
    avg_leverage=("leverage", "mean"),
    risk_adj_pnl=("risk_adjusted_pnl", "mean")
).reset_index()

print("\nTrader Metrics Sample:")
print(trader_metrics.head())

In [None]:
features = trader_metrics[
    ["total_trades", "avg_pnl", "win_rate", "avg_leverage"]
].fillna(0)

scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

kmeans = KMeans(n_clusters=3, random_state=42)
trader_metrics["cluster"] = kmeans.fit_predict(scaled_features)

In [None]:
cluster_summary = trader_metrics.groupby("cluster").mean()
print("\nCluster Summary:\n", cluster_summary)

In [None]:
cluster_sentiment_df = merged_df.merge(
    trader_metrics[["account", "cluster"]],
    on="account",
    how="left"
)

cluster_sentiment_perf = cluster_sentiment_df.groupby(
    ["cluster", "Classification"]
)["closedPnL"].mean()

print("\nCluster Performance by Sentiment:\n", cluster_sentiment_perf)

In [None]:
contrarian_perf = merged_df.groupby("contrarian")["closedPnL"].mean()

print("\nContrarian Strategy Performance:")
print("Non-Contrarian Avg PnL:", contrarian_perf[False])
print("Contrarian Avg PnL:", contrarian_perf[True])

In [None]:
print("\nKEY INSIGHTS:")
print("- Market sentiment strongly impacts trader leverage and profitability")
print("- Greed phases show higher leverage and higher downside risk")
print("- Fear phases reward disciplined and contrarian traders")
print("- Certain trader clusters consistently outperform across sentiments")
print("- Sentiment-aware risk management can improve returns and reduce drawdowns")