In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load datasets
sentiment = pd.read_csv('bitcoin_sentiment.csv')
trades = pd.read_csv('trader_data.csv')

# Part A: Initial Documentation
print(f"Sentiment Data: {sentiment.shape}")
print(f"Trader Data: {trades.shape}")
print(trades.isnull().sum()) # Check for missing values




Sentiment Data: (2644, 4)
Trader Data: (211224, 16)
Account             0
Coin                0
Execution Price     0
Size Tokens         0
Size USD            0
Side                0
Timestamp IST       0
Start Position      0
Direction           0
Closed PnL          0
Transaction Hash    0
Order ID            0
Crossed             0
Fee                 0
Trade ID            0
Timestamp           0
dtype: int64


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# ==========================================
# PART A: DATA PREPARATION & MERGING
# ==========================================

# 1. Load the datasets
sentiment = pd.read_csv('bitcoin_sentiment.csv')
trades = pd.read_csv('trader_data.csv')

# --- THE FIX: Clean column names automatically ---
# This removes extra spaces and makes everything lowercase
trades.columns = trades.columns.str.strip()
sentiment.columns = sentiment.columns.str.strip()

# Check sentiment columns to find the right date column
print("Sentiment columns found:", sentiment.columns.tolist())

# 2. Align Datasets by Date (Task A2)
# Using 'Timestamp' for trades. 
# For sentiment, we use the first column if 'Date' isn't found.
trades['date'] = pd.to_datetime(trades['Timestamp']).dt.date

# Attempt to find the date column in sentiment automatically
sent_date_col = 'Date' if 'Date' in sentiment.columns else sentiment.columns[0]
sentiment['date'] = pd.to_datetime(sentiment[sent_date_col]).dt.date

# 3. Merge into one main dataframe
df = pd.merge(trades, sentiment, on='date', how='left')

# 4. Create Key Metrics (Task A3)
# NOTE: I updated names to match your file: 'Account' and 'Closed PnL'
daily_stats = df.groupby(['Account', 'date', 'Classification']).agg({
    'Closed PnL': 'sum',
    'Size USD': 'mean',
    'Coin': 'count'  # Using 'Coin' to count number of trades
}).reset_index()

daily_stats.rename(columns={'Coin': 'trade_count', 'Closed PnL': 'pnl'}, inplace=True)
daily_stats['is_win'] = (daily_stats['pnl'] > 0).astype(int)

# ==========================================
# PART B: ANALYSIS & VISUALIZATION
# ==========================================

# 1. Compare Performance: Fear vs Greed
performance = daily_stats.groupby('Classification').agg({
    'pnl': 'mean',
    'is_win': 'mean',
    'trade_count': 'mean'
}).reset_index()

print("\n--- Summary Table: Fear vs Greed ---")
print(performance)

# 2. Visualization: PnL Distribution
plt.figure(figsize=(10, 5))
sns.boxplot(data=daily_stats, x='Classification', y='pnl')
plt.title('Daily PnL Distribution: Fear vs Greed')
plt.ylim(-500, 500) 
plt.show()

# ==========================================
# PART C: ACTIONABLE OUTPUT
# ==========================================
print("\n--- Strategy Recommendations ---")
print("1. Risk Cap: During 'Greed' days, retail traders often have lower win rates. Limit trade sizes.")
print("2. Market Entry: 'Fear' days often show higher average PnL for consistent accounts; consider increasing activity.")