In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
warnings.filterwarnings("ignore")


In [3]:
os.makedirs('csv_files', exist_ok=True)
os.makedirs('outputs', exist_ok=True)


In [6]:
from google.colab import drive
drive.mount('/content/drive')


ModuleNotFoundError: No module named 'google'

In [None]:
fg = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/fear_greed_index.csv')
fg.head()


In [None]:
tr = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/historical_data.csv')
tr.head()


In [None]:
fg['date'] = pd.to_datetime(fg['date'])

In [None]:
fg['classification'] = fg['classification'].str.strip().str.title()
fg['classification'].value_counts()


In [None]:
fg['date'] = fg['date'].dt.date


In [None]:
tr.columns = [c.strip().lower().replace(' ', '_') for c in tr.columns]
tr['timestamp_ist'] = pd.to_datetime(tr['timestamp_ist'], format="%d-%m-%Y %H:%M")
tr.head()

In [None]:
tr['date'] = tr['timestamp_ist'].dt.date

In [None]:
num_cols = ['execution_price','size_tokens','size_usd','closed_pnl','fee','start_position']
for col in num_cols:
    tr[col] = pd.to_numeric(tr[col], errors='coerce')


In [None]:
tr.isna().sum().to_csv('csv_files/missing_values.csv')


In [None]:
tr = tr.dropna(subset=['execution_price','size_tokens'])


In [None]:
tr = tr[tr['size_tokens'] > 0]
tr = tr[tr['execution_price'] > 0]
tr = tr[tr['size_usd'] > 0]


In [None]:
tr['risk_exposure'] = tr['execution_price'] * tr['size_tokens']


In [None]:
tr['is_long'] = (tr['direction'] == 'long').astype(int)
tr['is_short'] = (tr['direction'] == 'short').astype(int)


In [None]:
tr['profit_flag'] = (tr['closed_pnl'] > 0).astype(int)


In [None]:
tr.to_csv('csv_files/cleaned_hyperliquid.csv', index=False)


In [None]:
fg['date'] = pd.to_datetime(fg['date']).dt.date

In [None]:
df = tr.merge(fg[['date','classification']], how='left', on='date')

df.rename(columns={'classification':'sentiment'}, inplace=True)

print(df.columns)
df['sentiment'].value_counts()


In [None]:
df['sentiment'] = df['sentiment'].fillna('Unknown')


In [None]:
df.to_csv('csv_files/merged_data.csv', index=False)

In [None]:
plt.figure(figsize=(6,4))
df['sentiment'].value_counts().plot(kind='bar')
plt.title("Number of Trades in Fear vs Greed")
plt.xlabel("Sentiment")
plt.ylabel("Count of Trades")
plt.savefig('outputs/trade_count_sentiment.png')
plt.show()

In [None]:
agg = df.groupby('sentiment').agg(
    trades=('account','count'),
    avg_size_usd=('size_usd','mean'),
    avg_size_tokens=('size_tokens','mean'),
    avg_pnl=('closed_pnl','mean'),
    win_rate=('closed_pnl', lambda x: (x > 0).mean()),
    volume_usd=('size_usd','sum')
).reset_index()


In [None]:
agg.to_csv('csv_files/sentiment_metrics.csv', index=False)


In [None]:
plt.figure(figsize=(7,4))
sns.barplot(x='sentiment', y='avg_size_usd', data=agg)
plt.title("Average Trade Size (USD) in Fear vs Greed")
plt.savefig('outputs/avg_size_usd.png')
plt.show()


In [None]:
plt.figure(figsize=(7,4))
sns.barplot(x='sentiment', y='avg_pnl', data=agg)
plt.title("Average PnL in Fear vs Greed")
plt.savefig('outputs/avg_pnl.png')
plt.show()


In [None]:
daily = df.groupby(['date','sentiment']).agg(
    total_pnl=('closed_pnl','sum'),
    total_volume_usd=('size_usd','sum'),
    total_volume_tokens=('size_tokens','sum'),
    avg_pnl=('closed_pnl','mean'),
    win_rate=('closed_pnl', lambda x: (x > 0).mean())
).reset_index()


In [None]:
plt.figure(figsize=(12,4))
plt.plot(daily['date'], daily['total_pnl'])
plt.title("Daily Total PnL")
plt.savefig('outputs/daily_pnl.png')
plt.show()


In [None]:
print(df['size_usd'].describe())
print(df['closed_pnl'].describe())


In [None]:
from scipy.stats import mannwhitneyu

In [None]:
from scipy.stats import mannwhitneyu

fear_size = df[df['sentiment']=='Fear']['size_usd'].dropna()
greed_size = df[df['sentiment']=='Greed']['size_usd'].dropna()

# default is two-sided test
stat_size, p_size = mannwhitneyu(fear_size, greed_size, alternative='two-sided')

fear_pnl = df[df['sentiment']=='Fear']['closed_pnl'].dropna()
greed_pnl = df[df['sentiment']=='Greed']['closed_pnl'].dropna()
stat_pnl, p_pnl = mannwhitneyu(fear_pnl, greed_pnl, alternative='two-sided')

print("Size USD Mann-Whitney p-value:", p_size)
print("Closed PnL Mann-Whitney p-value:", p_pnl)

# Save results
pd.DataFrame([{
    'metric':'size_usd','stat':stat_size,'p_value':p_size
},{
    'metric':'closed_pnl','stat':stat_pnl,'p_value':p_pnl
}]).to_csv('csv_files/stat_test_results.csv', index=False)


In [None]:
acct_day = df.groupby(['account','date','sentiment']).agg(
    n_trades=('account','count'),
    sum_size_usd=('size_usd','sum'),
    mean_size_usd=('size_usd','mean'),
    sum_size_tokens=('size_tokens','sum'),
    avg_pnl=('closed_pnl','mean'),
    total_pnl=('closed_pnl','sum'),
    win_rate=('closed_pnl', lambda x: (x > 0).mean()),
    buy_fraction=('side', lambda x: (x.str.lower() == 'buy').mean() if x.dtype == object else np.nan),
    long_fraction=('direction', lambda x: (x.str.lower() == 'long').mean() if x.dtype == object else np.nan)
).reset_index()

acct_day.to_csv('csv_files/account_day_features.csv', index=False)


In [None]:
plt.figure(figsize=(12,5))
plt.plot(daily['date'], daily['total_pnl'])
plt.title("Daily Total PnL Over Time")
plt.xlabel("Date")
plt.ylabel("PnL")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("outputs/daily_total_pnl.png")
plt.show()


In [None]:
plt.figure(figsize=(12,5))
plt.plot(daily['date'], daily['total_volume_usd'])
plt.title("Daily Trading Volume (USD)")
plt.xlabel("Date")
plt.ylabel("Volume in USD")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("outputs/daily_volume_usd.png")
plt.show()


In [None]:
plt.figure(figsize=(7,4))
sns.barplot(data=agg, x="sentiment", y="avg_size_usd")
plt.title("Average Trade Size (USD) in Fear vs Greed")
plt.savefig('outputs/avg_size_usd_by_sentiment.png')
plt.show()


In [None]:
plt.figure(figsize=(7,4))
sns.barplot(data=agg, x="sentiment", y="avg_pnl")
plt.title("Average PnL in Fear vs Greed")
plt.savefig('outputs/avg_pnl_by_sentiment.png')
plt.show()


In [None]:
plt.figure(figsize=(7,4))
sns.barplot(x='sentiment', y='win_rate', data=agg)
plt.title("Win Rate in Fear vs Greed Periods")
plt.savefig('outputs/win_rate_by_sentiment.png')
plt.show()


In [None]:
plt.figure(figsize=(8,5))
sns.histplot(df['closed_pnl'], bins=50, kde=True)
plt.title("PnL Distribution")
plt.xlabel("Closed PnL")
plt.savefig("outputs/pnl_distribution.png")
plt.show()
