# 0.1 — Exploratory Data Analysis
Deep-dive into distributions, patterns, and trader behaviour.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import warnings
warnings.filterwarnings('ignore')

BASE = '.'
TRADERS = ['calm_trader', 'loss_averse_trader', 'overtrader', 'revenge_trader']
LABELS  = {'calm_trader':'Calm','loss_averse_trader':'Loss Averse','overtrader':'Overtrader','revenge_trader':'Revenge'}
COLORS  = {'calm_trader':'#4CAF50','loss_averse_trader':'#FF9800','overtrader':'#F44336','revenge_trader':'#2196F3'}


In [None]:
dfs = {}
for t in TRADERS:
    dfs[t] = pd.read_csv(f'{BASE}/{t}/data/engineered/{t}_engineered.csv', parse_dates=['timestamp'])

# Quick overview
for t in TRADERS:
    print(f"\n── {t} ──")
    print(dfs[t].dtypes)
    print(dfs[t].describe().round(2))


## 1. Balance Curves

In [None]:
fig, ax = plt.subplots(figsize=(14, 5))
for t in TRADERS:
    ax.plot(dfs[t].index, dfs[t]['balance'], label=LABELS[t], color=COLORS[t], linewidth=1.2)
ax.axhline(10000, color='gray', linestyle='--', linewidth=0.8, label='Start ($10K)')
ax.set_title('Account Balance Over Trades', fontsize=14)
ax.set_xlabel('Trade #'); ax.set_ylabel('Balance ($)')
ax.legend(); ax.grid(alpha=0.3)
plt.tight_layout(); plt.show()


## 2. P&L Distributions

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 8))
for ax, t in zip(axes.flat, TRADERS):
    pnl = dfs[t]['profit_loss'].clip(-5000, 5000)
    ax.hist(pnl, bins=80, color=COLORS[t], alpha=0.8, edgecolor='white', linewidth=0.3)
    ax.axvline(0, color='black', linewidth=1)
    ax.set_title(f'{LABELS[t]} — P&L Distribution (clipped ±$5K)')
    ax.set_xlabel('P&L ($)'); ax.set_ylabel('Frequency')
    ax.grid(alpha=0.3)
plt.tight_layout(); plt.show()


## 3. Win Rate by Asset

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 8))
for ax, t in zip(axes.flat, TRADERS):
    wr = dfs[t].groupby('asset')['win'].mean().sort_values(ascending=False) * 100
    bars = ax.bar(wr.index, wr.values, color=COLORS[t], alpha=0.85, edgecolor='white')
    ax.axhline(50, color='gray', linestyle='--', linewidth=1)
    ax.set_title(f'{LABELS[t]} — Win Rate by Asset')
    ax.set_ylabel('Win Rate (%)'); ax.set_ylim(0, 100)
    for bar, v in zip(bars, wr.values):
        ax.text(bar.get_x() + bar.get_width()/2, v + 1, f'{v:.1f}%', ha='center', fontsize=8)
    ax.grid(axis='y', alpha=0.3)
plt.tight_layout(); plt.show()


## 4. Trade Volume by Hour

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 8))
for ax, t in zip(axes.flat, TRADERS):
    hourly = dfs[t].groupby('hour').size()
    ax.bar(hourly.index, hourly.values, color=COLORS[t], alpha=0.85)
    ax.set_title(f'{LABELS[t]} — Trades by Hour')
    ax.set_xlabel('Hour'); ax.set_ylabel('Trade Count')
    ax.grid(axis='y', alpha=0.3)
plt.tight_layout(); plt.show()


## 5. Buy vs Sell Ratio

In [None]:
fig, axes = plt.subplots(1, 4, figsize=(14, 4))
for ax, t in zip(axes, TRADERS):
    counts = dfs[t]['side'].value_counts()
    ax.pie(counts, labels=counts.index, autopct='%1.1f%%',
           colors=['#4CAF50','#F44336'], startangle=90)
    ax.set_title(LABELS[t])
plt.suptitle('Buy vs Sell Distribution', fontsize=13)
plt.tight_layout(); plt.show()


## 6. Correlation Heatmap (Engineered Features)

In [None]:
import matplotlib.pyplot as plt

num_cols = ['quantity','entry_price','exit_price','profit_loss','balance',
            'year','month','day','hour','minute','price_change','price_change_pct',
            'asset_encoded','side_encoded','win']

fig, axes = plt.subplots(2, 2, figsize=(16, 12))
for ax, t in zip(axes.flat, TRADERS):
    corr = dfs[t][num_cols].corr()
    im = ax.imshow(corr, cmap='RdBu_r', vmin=-1, vmax=1)
    ax.set_xticks(range(len(num_cols))); ax.set_xticklabels(num_cols, rotation=45, ha='right', fontsize=7)
    ax.set_yticks(range(len(num_cols))); ax.set_yticklabels(num_cols, fontsize=7)
    ax.set_title(f'{LABELS[t]} — Correlation Matrix')
    plt.colorbar(im, ax=ax)
plt.tight_layout(); plt.show()


## 7. Summary Stats

In [None]:
rows = []
for t in TRADERS:
    df = dfs[t]
    rows.append({
        'Trader': LABELS[t],
        'Total Trades': len(df),
        'Win Rate (%)': round(df['win'].mean()*100, 2),
        'Avg P&L': round(df['profit_loss'].mean(), 2),
        'Std P&L': round(df['profit_loss'].std(), 2),
        'Max P&L': round(df['profit_loss'].max(), 2),
        'Min P&L': round(df['profit_loss'].min(), 2),
        'Final Balance': round(df['balance'].iloc[-1], 2),
    })
pd.DataFrame(rows).set_index('Trader')
