In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
merged_df=pd.read_csv('/content/merged_trader_sentiment_data.csv')

In [5]:
merged_df.head()

Unnamed: 0,account,symbol,execution_price,size_tokens,size_usd,side,Timestamp IST,Start Position,Direction,closedPnL,...,time,Date,direction,is_profitable,timestamp,value,classification,date,Classification,implied_leverage
0,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.98,16.0,127.68,BUY,02-12-2024 22:50,986.524596,Buy,0.0,...,2024-10-27 03:33:20,2024-10-27,1,False,1730007000,74,Greed,2024-10-27,Greed,0.129424
1,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9855,144.09,1150.63,BUY,02-12-2024 22:50,1002.518996,Buy,0.0,...,2024-10-27 03:33:20,2024-10-27,1,False,1730007000,74,Greed,2024-10-27,Greed,1.147739
2,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9874,142.98,1142.04,BUY,02-12-2024 22:50,1146.558564,Buy,0.0,...,2024-10-27 03:33:20,2024-10-27,1,False,1730007000,74,Greed,2024-10-27,Greed,0.996059
3,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9894,8.73,69.75,BUY,02-12-2024 22:50,1289.488521,Buy,0.0,...,2024-10-27 03:33:20,2024-10-27,1,False,1730007000,74,Greed,2024-10-27,Greed,0.054091
4,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.99,1.41,11.27,BUY,02-12-2024 22:50,1298.215466,Buy,0.0,...,2024-10-27 03:33:20,2024-10-27,1,False,1730007000,74,Greed,2024-10-27,Greed,0.008681


# Statistical Tests

In [8]:
import numpy as np
from scipy.stats import ttest_ind, f_oneway

# Separate sentiment groups
fear_df = merged_df[merged_df["Classification"] == "Fear"]
greed_df = merged_df[merged_df["Classification"] == "Greed"]


T-Test: Profitability (closedPnL)

In [9]:


t_pnl, p_pnl = ttest_ind(
    fear_df["closedPnL"],
    greed_df["closedPnL"],
    equal_var=False
)

t_pnl, p_pnl


(np.float64(-5.864971964610066), np.float64(4.521489314640529e-09))

In [10]:
print("Significant difference" if p_pnl < 0.05 else "No significant difference")


Significant difference


T-Test: Implied Leverage

In [11]:
t_lev, p_lev = ttest_ind(
    fear_df["implied_leverage"],
    greed_df["implied_leverage"],
    equal_var=False
)

t_lev, p_lev


(np.float64(1.6710192804461648), np.float64(0.09472024627666556))

ANOVA

In [12]:
anova_pnl = f_oneway(
    fear_df["closedPnL"],
    greed_df["closedPnL"]
)

anova_pnl


F_onewayResult(statistic=np.float64(45.04466845607577), pvalue=np.float64(1.9320239996241292e-11))

In [14]:
print("Sentiment regime matters" if p_pnl < 0.05 else "Behavior changes more than outcomes")

Sentiment regime matters


# MACHINE LEARNINGMACHINE LEARNING

In [15]:
from sklearn.model_selection import StratifiedKFold, KFold, cross_val_score
from sklearn.metrics import (
    accuracy_score, f1_score, roc_auc_score,
    r2_score, mean_absolute_error
)
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor


Target & Features


In [17]:
# Encode market sentiment (Fear = 0, Greed = 1)
merged_df["sentiment_binary"] = merged_df["Classification"].map({
    "Fear": 0,
    "Greed": 1
})


In [18]:
features = ["sentiment_binary", "size_usd", "implied_leverage", "Fee"]

# Classification target
merged_df["profitable"] = (merged_df["closedPnL"] > 0).astype(int)

X = merged_df[features]
y_cls = merged_df["profitable"]
y_reg = merged_df["closedPnL"]


# XGBoost
 – Classification

In [21]:
!pip install xgboost




In [22]:
from xgboost import XGBClassifier, XGBRegressor

xgb_clf = XGBClassifier(
    n_estimators=300,
    max_depth=5,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric="logloss",
    random_state=42
)

xgb_acc = cross_val_score(xgb_clf, X, y_cls, cv=cv_cls, scoring="accuracy")
xgb_f1  = cross_val_score(xgb_clf, X, y_cls, cv=cv_cls, scoring="f1")
xgb_auc = cross_val_score(xgb_clf, X, y_cls, cv=cv_cls, scoring="roc_auc")


In [23]:
xgb_acc.mean(), xgb_f1.mean(), xgb_auc.mean()


(np.float64(0.6448269668303197),
 np.float64(0.498273969689811),
 np.float64(0.7080825227416051))

# XGBoost – Regression

In [24]:
xgb_reg = XGBRegressor(
    n_estimators=400,
    max_depth=5,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

xgb_r2  = cross_val_score(xgb_reg, X, y_reg, cv=cv_reg, scoring="r2")
xgb_mae = -cross_val_score(xgb_reg, X, y_reg, cv=cv_reg, scoring="neg_mean_absolute_error")


In [25]:
xgb_r2.mean(), xgb_mae.mean()


(np.float64(0.0001827368254136985), np.float64(105.06967443405881))

FINAL RESULTS TABLE

In [27]:
xgb_classification_results = pd.DataFrame({
    "Model": ["XGBoost Classifier"],
    "CV Accuracy": [xgb_acc.mean()],
    "CV F1 Score": [xgb_f1.mean()],
    "CV ROC-AUC": [xgb_auc.mean()]
})

xgb_classification_results


Unnamed: 0,Model,CV Accuracy,CV F1 Score,CV ROC-AUC
0,XGBoost Classifier,0.644827,0.498274,0.708083


In [28]:
xgb_regression_results = pd.DataFrame({
    "Model": ["XGBoost Regressor"],
    "CV R² Score": [xgb_r2.mean()],
    "CV MAE": [xgb_mae.mean()]
})

xgb_regression_results


Unnamed: 0,Model,CV R² Score,CV MAE
0,XGBoost Regressor,0.000183,105.069674


In [29]:
final_results = pd.DataFrame({
    "Model": ["XGBoost Classifier", "XGBoost Regressor"],
    "Task": ["Classification (Win/Loss)", "Regression (PnL)"],
    "Accuracy": [xgb_acc.mean(), None],
    "F1 Score": [xgb_f1.mean(), None],
    "ROC-AUC": [xgb_auc.mean(), None],
    "R² Score": [None, xgb_r2.mean()],
    "MAE": [None, xgb_mae.mean()]
})

final_results


Unnamed: 0,Model,Task,Accuracy,F1 Score,ROC-AUC,R² Score,MAE
0,XGBoost Classifier,Classification (Win/Loss),0.644827,0.498274,0.708083,,
1,XGBoost Regressor,Regression (PnL),,,,0.000183,105.069674
