# Notebook 2: Trader Behavior Analysis - Fear vs Greed

**Objective:** Analyze how trader performance and behavior differ during Fear vs Greed market sentiment periods.

**Key Questions:**
1. Do traders perform better during Fear or Greed periods?
2. How does leverage usage differ across sentiment states?
3. What are the risk patterns in different market sentiments?
4. Are there exploitable patterns for smarter trading strategies?

---

## Step 1: Import Libraries and Load Data

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

print("‚úÖ Libraries imported successfully!")

In [None]:
# Load merged dataset from notebook_1
df = pd.read_csv('csv_files/merged_trader_sentiment.csv')

# Convert date to datetime
df['date'] = pd.to_datetime(df['date'])

print(f"Dataset loaded: {len(df):,} trades")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")
display(df.head())

## Step 2: Create Binary Sentiment Categories

In [None]:
# Create a simplified binary category: Fear vs Greed
# Fear = "Fear" + "Extreme Fear"
# Greed = "Greed" + "Extreme Greed"

def categorize_sentiment(classification):
    if 'Fear' in classification:
        return 'Fear'
    elif 'Greed' in classification:
        return 'Greed'
    else:
        return 'Neutral'

df['sentiment_binary'] = df['classification'].apply(categorize_sentiment)

print("=== Binary Sentiment Distribution ===")
print(df['sentiment_binary'].value_counts())
print(f"\nFear trades: {(df['sentiment_binary'] == 'Fear').sum():,}")
print(f"Greed trades: {(df['sentiment_binary'] == 'Greed').sum():,}")

## Step 3: Performance Comparison - Fear vs Greed

In [None]:
# Calculate key metrics by sentiment
performance_by_sentiment = df.groupby('sentiment_binary').agg({
    'Closed PnL': ['sum', 'mean', 'median', 'std'],
    'Account': 'count'
}).round(4)

performance_by_sentiment.columns = ['Total PnL', 'Avg PnL', 'Median PnL', 'Std Dev', 'Trade Count']

print("="*70)
print("üìä PERFORMANCE METRICS BY SENTIMENT")
print("="*70)
display(performance_by_sentiment)

# Calculate win rates
win_rate_fear = (df[df['sentiment_binary'] == 'Fear']['Closed PnL'] > 0).mean() * 100
win_rate_greed = (df[df['sentiment_binary'] == 'Greed']['Closed PnL'] > 0).mean() * 100

print(f"\nüéØ Win Rate During Fear: {win_rate_fear:.2f}%")
print(f"üéØ Win Rate During Greed: {win_rate_greed:.2f}%")

In [None]:
# Visualization: Average PnL by Sentiment
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Plot 1: Average PnL
sentiment_avg_pnl = df.groupby('sentiment_binary')['Closed PnL'].mean()
colors = ['red' if x < 0 else 'green' for x in sentiment_avg_pnl.values]
axes[0].bar(sentiment_avg_pnl.index, sentiment_avg_pnl.values, color=colors, alpha=0.7, edgecolor='black')
axes[0].axhline(0, color='black', linestyle='--', linewidth=1)
axes[0].set_title('Average PnL by Market Sentiment', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Sentiment')
axes[0].set_ylabel('Average Closed PnL ($)')
axes[0].grid(axis='y', alpha=0.3)

# Add value labels on bars
for i, v in enumerate(sentiment_avg_pnl.values):
    axes[0].text(i, v + (0.05 if v > 0 else -0.05), f'${v:.2f}', 
                ha='center', va='bottom' if v > 0 else 'top', fontweight='bold')

# Plot 2: Total PnL
sentiment_total_pnl = df.groupby('sentiment_binary')['Closed PnL'].sum()
colors = ['red' if x < 0 else 'green' for x in sentiment_total_pnl.values]
axes[1].bar(sentiment_total_pnl.index, sentiment_total_pnl.values, color=colors, alpha=0.7, edgecolor='black')
axes[1].axhline(0, color='black', linestyle='--', linewidth=1)
axes[1].set_title('Total PnL by Market Sentiment', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Sentiment')
axes[1].set_ylabel('Total Closed PnL ($)')
axes[1].grid(axis='y', alpha=0.3)

# Add value labels
for i, v in enumerate(sentiment_total_pnl.values):
    axes[1].text(i, v + (v*0.02 if v > 0 else v*0.02), f'${v:,.0f}', 
                ha='center', va='bottom' if v > 0 else 'top', fontweight='bold')

plt.tight_layout()
plt.savefig('outputs/pnl_by_sentiment.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n‚úÖ Chart saved to outputs/pnl_by_sentiment.png")

## Step 4: Win Rate Analysis

In [None]:
# Calculate win/loss breakdown by sentiment
sentiment_outcomes = df.groupby(['sentiment_binary', df['Closed PnL'] > 0]).size().unstack(fill_value=0)
sentiment_outcomes.columns = ['Loss', 'Win']
sentiment_outcomes['Total'] = sentiment_outcomes['Loss'] + sentiment_outcomes['Win']
sentiment_outcomes['Win Rate %'] = (sentiment_outcomes['Win'] / sentiment_outcomes['Total'] * 100).round(2)

print("=== Win/Loss Breakdown by Sentiment ===")
display(sentiment_outcomes)

# Visualization
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Plot 1: Win Rate Comparison
win_rates = sentiment_outcomes['Win Rate %']
colors_wr = ['#ff6b6b', '#4ecdc4', '#95e1d3']
bars = axes[0].bar(win_rates.index, win_rates.values, color=colors_wr, alpha=0.8, edgecolor='black')
axes[0].set_title('Win Rate by Market Sentiment', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Sentiment')
axes[0].set_ylabel('Win Rate (%)')
axes[0].set_ylim([0, max(win_rates.values) * 1.2])
axes[0].axhline(50, color='gray', linestyle='--', linewidth=1, label='50% Break-even')
axes[0].legend()
axes[0].grid(axis='y', alpha=0.3)

# Add value labels
for bar in bars:
    height = bar.get_height()
    axes[0].text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.2f}%', ha='center', va='bottom', fontweight='bold')

# Plot 2: Stacked bar chart
sentiment_outcomes[['Win', 'Loss']].plot(kind='bar', stacked=True, ax=axes[1], 
                                         color=['green', 'red'], alpha=0.7, edgecolor='black')
axes[1].set_title('Win vs Loss Count by Sentiment', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Sentiment')
axes[1].set_ylabel('Number of Trades')
axes[1].legend(title='Outcome', loc='upper right')
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=0)
axes[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('outputs/win_rate_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n‚úÖ Chart saved to outputs/win_rate_analysis.png")

## Step 5: Statistical Significance Test

In [None]:
# Perform t-test to check if PnL difference between Fear and Greed is statistically significant
fear_pnl = df[df['sentiment_binary'] == 'Fear']['Closed PnL'].dropna()
greed_pnl = df[df['sentiment_binary'] == 'Greed']['Closed PnL'].dropna()

t_stat, p_value = stats.ttest_ind(fear_pnl, greed_pnl)

print("="*70)
print("üìà STATISTICAL SIGNIFICANCE TEST (T-Test)")
print("="*70)
print(f"Fear PnL Mean: ${fear_pnl.mean():.4f}")
print(f"Greed PnL Mean: ${greed_pnl.mean():.4f}")
print(f"\nT-Statistic: {t_stat:.4f}")
print(f"P-Value: {p_value:.6f}")

if p_value < 0.05:
    print("\n‚úÖ Result: STATISTICALLY SIGNIFICANT (p < 0.05)")
    print("   The difference in performance between Fear and Greed periods is significant.")
else:
    print("\n‚ö†Ô∏è Result: NOT STATISTICALLY SIGNIFICANT (p >= 0.05)")
    print("   The difference in performance might be due to random chance.")

## Step 6: Trading Volume Analysis

In [None]:
# Analyze trading volume (Size USD) by sentiment
volume_analysis = df.groupby('sentiment_binary')['Size USD'].agg(['sum', 'mean', 'median', 'count'])
volume_analysis.columns = ['Total Volume', 'Avg Trade Size', 'Median Trade Size', 'Trade Count']

print("=== Trading Volume Analysis by Sentiment ===")
display(volume_analysis)

# Visualization
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Plot 1: Average Trade Size
avg_size = df.groupby('sentiment_binary')['Size USD'].mean()
axes[0].bar(avg_size.index, avg_size.values, color=['#ffd93d', '#6bcf7f', '#4a90e2'], alpha=0.7, edgecolor='black')
axes[0].set_title('Average Trade Size by Sentiment', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Sentiment')
axes[0].set_ylabel('Average Trade Size (USD)')
axes[0].grid(axis='y', alpha=0.3)

for i, v in enumerate(avg_size.values):
    axes[0].text(i, v, f'${v:,.0f}', ha='center', va='bottom', fontweight='bold')

# Plot 2: Trade Count
trade_counts = df['sentiment_binary'].value_counts()
axes[1].bar(trade_counts.index, trade_counts.values, color=['#ffd93d', '#6bcf7f', '#4a90e2'], alpha=0.7, edgecolor='black')
axes[1].set_title('Number of Trades by Sentiment', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Sentiment')
axes[1].set_ylabel('Trade Count')
axes[1].grid(axis='y', alpha=0.3)

for i, v in enumerate(trade_counts.values):
    axes[1].text(i, v, f'{v:,}', ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.savefig('outputs/volume_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n‚úÖ Chart saved to outputs/volume_analysis.png")

## Step 7: Risk Analysis - PnL Distribution

In [None]:
# Box plot comparison of PnL distributions
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Plot 1: Box plot
df.boxplot(column='Closed PnL', by='sentiment_binary', ax=axes[0], 
           patch_artist=True, grid=False)
axes[0].set_title('PnL Distribution by Sentiment (Boxplot)', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Sentiment')
axes[0].set_ylabel('Closed PnL ($)')
axes[0].axhline(0, color='red', linestyle='--', linewidth=1)
axes[0].get_figure().suptitle('')  # Remove auto title

# Plot 2: Violin plot
sns.violinplot(data=df, x='sentiment_binary', y='Closed PnL', ax=axes[1], palette='Set2')
axes[1].set_title('PnL Distribution by Sentiment (Violin Plot)', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Sentiment')
axes[1].set_ylabel('Closed PnL ($)')
axes[1].axhline(0, color='red', linestyle='--', linewidth=1)
axes[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('outputs/pnl_distribution_by_sentiment.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n‚úÖ Chart saved to outputs/pnl_distribution_by_sentiment.png")

## Step 8: Trading Side Analysis (Buy vs Sell)

In [None]:
# Analyze performance by trading side and sentiment
side_sentiment_pnl = df.groupby(['sentiment_binary', 'Side'])['Closed PnL'].mean().unstack()

print("=== Average PnL by Sentiment and Trading Side ===")
display(side_sentiment_pnl)

# Visualization
fig, ax = plt.subplots(figsize=(12, 6))
side_sentiment_pnl.plot(kind='bar', ax=ax, color=['#ff6b6b', '#51cf66'], alpha=0.8, edgecolor='black')
ax.set_title('Average PnL by Sentiment and Trading Side (Buy vs Sell)', fontsize=14, fontweight='bold')
ax.set_xlabel('Sentiment')
ax.set_ylabel('Average Closed PnL ($)')
ax.axhline(0, color='black', linestyle='--', linewidth=1)
ax.legend(title='Side', loc='upper right')
ax.set_xticklabels(ax.get_xticklabels(), rotation=0)
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('outputs/buy_sell_sentiment_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n‚úÖ Chart saved to outputs/buy_sell_sentiment_analysis.png")

## Step 9: Time-based Trends

In [None]:
# Daily aggregated PnL with sentiment
daily_pnl = df.groupby(['date', 'sentiment_binary'])['Closed PnL'].sum().reset_index()
daily_pnl_pivot = daily_pnl.pivot(index='date', columns='sentiment_binary', values='Closed PnL').fillna(0)

# Plot cumulative PnL over time
fig, ax = plt.subplots(figsize=(16, 6))

if 'Fear' in daily_pnl_pivot.columns:
    daily_pnl_pivot['Fear'].cumsum().plot(ax=ax, label='Fear', color='red', linewidth=2)
if 'Greed' in daily_pnl_pivot.columns:
    daily_pnl_pivot['Greed'].cumsum().plot(ax=ax, label='Greed', color='green', linewidth=2)
if 'Neutral' in daily_pnl_pivot.columns:
    daily_pnl_pivot['Neutral'].cumsum().plot(ax=ax, label='Neutral', color='blue', linewidth=2)

ax.set_title('Cumulative PnL Over Time by Sentiment', fontsize=14, fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('Cumulative PnL ($)')
ax.axhline(0, color='black', linestyle='--', linewidth=1)
ax.legend(loc='best')
ax.grid(alpha=0.3)

plt.tight_layout()
plt.savefig('outputs/cumulative_pnl_over_time.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n‚úÖ Chart saved to outputs/cumulative_pnl_over_time.png")

## Step 10: Key Insights Summary

In [None]:
# Generate comprehensive summary
print("="*70)
print("üéØ KEY INSIGHTS - FEAR VS GREED TRADING ANALYSIS")
print("="*70)

print("\n1Ô∏è‚É£ PROFITABILITY COMPARISON:")
fear_avg = df[df['sentiment_binary'] == 'Fear']['Closed PnL'].mean()
greed_avg = df[df['sentiment_binary'] == 'Greed']['Closed PnL'].mean()
better = "Fear" if fear_avg > greed_avg else "Greed"
print(f"   ‚Ä¢ Fear periods: Average PnL = ${fear_avg:.4f}")
print(f"   ‚Ä¢ Greed periods: Average PnL = ${greed_avg:.4f}")
print(f"   ‚Ä¢ Better performance during: {better}")

print("\n2Ô∏è‚É£ WIN RATE COMPARISON:")
print(f"   ‚Ä¢ Fear: {win_rate_fear:.2f}% win rate")
print(f"   ‚Ä¢ Greed: {win_rate_greed:.2f}% win rate")
print(f"   ‚Ä¢ Difference: {abs(win_rate_fear - win_rate_greed):.2f}%")

print("\n3Ô∏è‚É£ TRADE VOLUME:")
fear_count = (df['sentiment_binary'] == 'Fear').sum()
greed_count = (df['sentiment_binary'] == 'Greed').sum()
print(f"   ‚Ä¢ Fear: {fear_count:,} trades ({fear_count/len(df)*100:.1f}%)")
print(f"   ‚Ä¢ Greed: {greed_count:,} trades ({greed_count/len(df)*100:.1f}%)")

print("\n4Ô∏è‚É£ RISK PROFILE:")
fear_std = df[df['sentiment_binary'] == 'Fear']['Closed PnL'].std()
greed_std = df[df['sentiment_binary'] == 'Greed']['Closed PnL'].std()
print(f"   ‚Ä¢ Fear volatility (std dev): ${fear_std:.2f}")
print(f"   ‚Ä¢ Greed volatility (std dev): ${greed_std:.2f}")
print(f"   ‚Ä¢ Higher risk during: {'Fear' if fear_std > greed_std else 'Greed'}")

print("\n5Ô∏è‚É£ STRATEGIC RECOMMENDATION:")
if fear_avg > greed_avg and win_rate_fear > win_rate_greed:
    print("   ‚úÖ Fear periods show better profitability AND win rate")
    print("   üí° Strategy: Increase position sizes during Fear periods")
elif greed_avg > fear_avg and win_rate_greed > win_rate_fear:
    print("   ‚úÖ Greed periods show better profitability AND win rate")
    print("   üí° Strategy: Increase position sizes during Greed periods")
else:
    print("   ‚ö†Ô∏è Mixed signals - profitability and win rate differ")
    print("   üí° Strategy: Requires deeper analysis of specific conditions")

print("\n" + "="*70)
print("üìä Analysis complete! All charts saved to outputs/ folder")
print("="*70)

## Step 11: Export Summary Report Data

In [None]:
# Create summary dataframe for report
summary_data = {
    'Metric': [
        'Average PnL - Fear',
        'Average PnL - Greed',
        'Win Rate - Fear',
        'Win Rate - Greed',
        'Trade Count - Fear',
        'Trade Count - Greed',
        'Volatility (Std) - Fear',
        'Volatility (Std) - Greed',
        'Total PnL - Fear',
        'Total PnL - Greed'
    ],
    'Value': [
        f"${fear_avg:.4f}",
        f"${greed_avg:.4f}",
        f"{win_rate_fear:.2f}%",
        f"{win_rate_greed:.2f}%",
        f"{fear_count:,}",
        f"{greed_count:,}",
        f"${fear_std:.2f}",
        f"${greed_std:.2f}",
        f"${df[df['sentiment_binary'] == 'Fear']['Closed PnL'].sum():,.2f}",
        f"${df[df['sentiment_binary'] == 'Greed']['Closed PnL'].sum():,.2f}"
    ]
}

summary_df = pd.DataFrame(summary_data)
summary_df.to_csv('csv_files/analysis_summary.csv', index=False)

print("‚úÖ Summary data exported to csv_files/analysis_summary.csv")
display(summary_df)

## üéâ Analysis Complete!

All analysis has been completed successfully. The following outputs have been generated:

**Charts in `outputs/` folder:**
- pnl_by_sentiment.png
- win_rate_analysis.png
- volume_analysis.png
- pnl_distribution_by_sentiment.png
- buy_sell_sentiment_analysis.png
- cumulative_pnl_over_time.png

**Data files in `csv_files/` folder:**
- merged_trader_sentiment.csv
- analysis_summary.csv

**Next Steps:**
1. Review all charts in the outputs folder
2. Complete the ds_report.pdf with key findings
3. Update README.md with project documentation
4. Upload to Google Colab and GitHub