In [5]:
# ============================================
# DATA SCIENCE ASSIGNMENT ‚Äì WEB3 TRADING TEAM
# Author: <Rohan Jambusaria>
# Google Colab Notebook
# ============================================

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# -----------------------------
# 1. PROJECT DIRECTORY SETUP
# -----------------------------
ROOT_DIR = "ds_yourname"
CSV_DIR = f"{ROOT_DIR}/csv_files"
OUT_DIR = f"{ROOT_DIR}/outputs"

os.makedirs(CSV_DIR, exist_ok=True)
os.makedirs(OUT_DIR, exist_ok=True)

# -----------------------------
# 2. LOAD DATASETS (SAFE MODE)
# -----------------------------
import os
import pandas as pd

TRADE_FILE = "/historical_data.csv"
SENTIMENT_FILE = "/fear_greed_index.csv"

if not os.path.exists(TRADE_FILE):
    raise FileNotFoundError("‚ùå historical_data.csv not found. Please upload it to Colab.")

if not os.path.exists(SENTIMENT_FILE):
    raise FileNotFoundError("‚ùå fear_greed_index.csv not found. Please upload it to Colab.")

trades = pd.read_csv(TRADE_FILE)
sentiment = pd.read_csv(SENTIMENT_FILE)

print("‚úÖ Files loaded successfully")


# -----------------------------
# 3. DATA CLEANING & PREP
# -----------------------------

# ---- Trader Data ----
trades.columns = trades.columns.str.lower().str.replace(" ", "_")

trades['timestamp'] = pd.to_datetime(trades['timestamp'], unit='ms', errors='coerce')
trades['date'] = trades['timestamp'].dt.date

trades['closed_pnl'] = pd.to_numeric(trades['closed_pnl'], errors='coerce')
trades['size_usd'] = pd.to_numeric(trades['size_usd'], errors='coerce')

cleaned_trades = trades.dropna(subset=['closed_pnl', 'size_usd'])

cleaned_trades.to_csv(f"{CSV_DIR}/cleaned_trades.csv", index=False)

# ---- Sentiment Data ----
sentiment['date'] = pd.to_datetime(sentiment['date']).dt.date
sentiment['classification'] = sentiment['classification'].str.title()

# -----------------------------
# 4. DAILY TRADER METRICS
# -----------------------------
daily_metrics = cleaned_trades.groupby('date').agg(
    total_pnl=('closed_pnl', 'sum'),
    avg_pnl=('closed_pnl', 'mean'),
    trade_volume=('size_usd', 'sum'),
    trade_count=('size_usd', 'count')
).reset_index()

daily_metrics.to_csv(f"{CSV_DIR}/daily_trader_metrics.csv", index=False)

# -----------------------------
# 5. MERGE WITH MARKET SENTIMENT
# -----------------------------
merged_data = pd.merge(
    daily_metrics,
    sentiment[['date', 'classification', 'value']],
    on='date',
    how='inner'
)

merged_data.to_csv(f"{CSV_DIR}/sentiment_merged_data.csv", index=False)

# -----------------------------
# 6. EXPLORATORY DATA ANALYSIS
# -----------------------------

# ---- PnL vs Sentiment ----
plt.figure()
merged_data.boxplot(column='total_pnl', by='classification')
plt.title("Trader PnL Distribution by Market Sentiment")
plt.suptitle("")
plt.xlabel("Market Sentiment")
plt.ylabel("Total PnL")
plt.savefig(f"{OUT_DIR}/pnl_vs_sentiment.png")
plt.close()

# ---- Volume vs Sentiment ----
plt.figure()
merged_data.boxplot(column='trade_volume', by='classification')
plt.title("Trading Volume by Market Sentiment")
plt.suptitle("")
plt.xlabel("Market Sentiment")
plt.ylabel("Trade Volume (USD)")
plt.savefig(f"{OUT_DIR}/volume_vs_sentiment.png")
plt.close()

# ---- Sentiment Frequency ----
plt.figure()
merged_data['classification'].value_counts().plot(kind='bar')
plt.title("Fear vs Greed Market Distribution")
plt.xlabel("Sentiment")
plt.ylabel("Number of Trading Days")
plt.savefig(f"{OUT_DIR}/fear_greed_distribution.png")
plt.close()

# -----------------------------
# 7. KEY INSIGHTS (TEXT OUTPUT)
# -----------------------------
insights = {
    "Fear_Avg_PnL": merged_data[merged_data['classification'].str.contains("Fear")]['total_pnl'].mean(),
    "Greed_Avg_PnL": merged_data[merged_data['classification'].str.contains("Greed")]['total_pnl'].mean(),
    "Fear_Avg_Volume": merged_data[merged_data['classification'].str.contains("Fear")]['trade_volume'].mean(),
    "Greed_Avg_Volume": merged_data[merged_data['classification'].str.contains("Greed")]['trade_volume'].mean()
}

insights_df = pd.DataFrame.from_dict(insights, orient='index', columns=['Value'])
insights_df.to_csv(f"{CSV_DIR}/summary_insights.csv")

print("‚úÖ Analysis Complete")
print("üìÅ All outputs saved inside:", ROOT_DIR)

# ============================================
# 8. EXPORT FINAL OUTPUTS AS CSV (VISIBLE RESULTS)
# ============================================

# ---- 1. Save Full Merged Dataset ----
merged_data.to_csv(
    f"{CSV_DIR}/final_sentiment_trader_dataset.csv",
    index=False
)

# ---- 2. Create Summary Table ----
summary_table = merged_data.groupby('classification').agg(
    avg_daily_pnl=('total_pnl', 'mean'),
    median_daily_pnl=('total_pnl', 'median'),
    avg_trade_volume=('trade_volume', 'mean'),
    avg_trade_count=('trade_count', 'mean'),
    trading_days=('date', 'count')
).reset_index()

# Save summary CSV in BOTH folders
summary_table.to_csv(
    f"{CSV_DIR}/sentiment_summary_metrics.csv",
    index=False
)

summary_table.to_csv(
    f"{OUT_DIR}/sentiment_summary_metrics.csv",
    index=False
)

# ---- 3. Display Output in Notebook (for verification) ----
print("‚úÖ Final CSV files successfully created\n")
print("üìÑ Summary Metrics Preview:\n")
display(summary_table)

# ---- 4. Confirm Files Exist ----
print("\nüìÅ Files saved in csv_files/:")
print(os.listdir(CSV_DIR))

print("\nüìÅ Files saved in outputs/:")
print(os.listdir(OUT_DIR))



‚úÖ Files loaded successfully
‚úÖ Analysis Complete
üìÅ All outputs saved inside: ds_yourname
‚úÖ Final CSV files successfully created

üìÑ Summary Metrics Preview:



Unnamed: 0,classification,avg_daily_pnl,median_daily_pnl,avg_trade_volume,avg_trade_count,trading_days
0,Extreme Greed,176965.5,176965.5,39406770.0,6962.0,1
1,Fear,6699925.0,6699925.0,704158500.0,133871.0,1
2,Greed,1063206.0,155.5034,38501220.0,12096.333333,3
3,Neutral,158742.4,158742.4,21843230.0,7141.0,1



üìÅ Files saved in csv_files/:
['sentiment_merged_data.csv', 'final_sentiment_trader_dataset.csv', 'daily_trader_metrics.csv', 'sentiment_summary_metrics.csv', 'summary_insights.csv', 'cleaned_trades.csv']

üìÅ Files saved in outputs/:
['pnl_vs_sentiment.png', 'volume_vs_sentiment.png', 'sentiment_summary_metrics.csv', 'fear_greed_distribution.png']


<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>