In [None]:
# Imports
import json
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style='whitegrid')
# Create output dir
out_dir = Path('notebooks/output')
out_dir.mkdir(parents=True, exist_ok=True)

In [None]:
# Load draw_stats.json (produced by the harness)
stats_path = Path('draw_stats.json')
if not stats_path.exists():
    raise FileNotFoundError(f'Expected {stats_path} in repo root — run the harness first')
with open(stats_path, 'r') as fh:
    stats = json.load(fh)
total_runs = stats.get('total_runs')
teams_data = stats['teams']
# Build DataFrames: group_df (teams x groups) and pair_df (teams x teams)
group_df = pd.DataFrame({t: teams_data[t]['group_pct'] for t in teams_data}).T.fillna(0)
pair_df = pd.DataFrame({t: teams_data[t]['pair_pct'] for t in teams_data}).T.fillna(0)
# Sort columns/indices for reproducible layout
group_df = group_df.reindex(sorted(group_df.index))
group_df = group_df[sorted(group_df.columns)]
pair_df = pair_df.reindex(index=sorted(pair_df.index), columns=sorted(pair_df.columns))
print('Loaded', len(group_df), 'teams, total_runs =', total_runs)

In [None]:
# Heatmap: group occupancy (teams x groups) — large vertical figure
plt.figure(figsize=(12, max(6, 0.18 * len(group_df))))
ax = sns.heatmap(group_df, cmap='viridis', cbar_kws={'label': 'Percent of runs'}, linewidths=0.05)
ax.set_xlabel('Group')
ax.set_ylabel('Team')
ax.set_title('Group occupancy percentage per team (A..L)')
plt.tight_layout()
out_file = out_dir / 'group_occupancy_heatmap.png'
plt.savefig(out_file, dpi=150)
print('Saved', out_file)
plt.show()

In [None]:
# Utility: show top-n groups per team and write CSV summary
top_groups = {t: list(group_df.loc[t].sort_values(ascending=False).head(3).items()) for t in group_df.index}
tg_rows = []
for t, vals in top_groups.items():
    for g, pct in vals:
        tg_rows.append({'team': t, 'group': g, 'pct': pct})
tg_df = pd.DataFrame(tg_rows)
tg_out = out_dir / 'top_groups_per_team.csv'
tg_df.to_csv(tg_out, index=False)
print('Wrote top groups CSV to', tg_out)
tg_df.head()

In [None]:
# Pairing heatmap for a selectable subset of teams
def pair_heatmap_for(selection, figsize=(8, 6), annot=True, fmt='.1f'):
    sel = [s for s in selection if s in pair_df.index]
    if not sel:
        raise ValueError('No selected teams found in data')
    mat = pair_df.loc[sel, sel]  # percent values
    plt.figure(figsize=figsize)
    ax = sns.heatmap(mat, annot=annot, fmt=fmt, cmap='rocket', linewidths=0.3, cbar_kws={'label': 'Percent of runs'})
    ax.set_title('Pairing frequency among selected teams (%)')
    plt.tight_layout()
    out_file = out_dir / ('pair_heatmap_' + '_'.join([s.replace(' ', '_') for s in sel]) + '.png')
    plt.savefig(out_file, dpi=150)
    print('Saved', out_file)
    plt.show()

# Example use: top teams by variance in group distribution
var = group_df.var(axis=1).sort_values(ascending=False)
top_by_var = list(var.head(8).index)
print('Top-8 teams by group distribution variance (good candidates for pair-heatmap):')
print(top_by_var)
# Draw heatmap for those
pair_heatmap_for(top_by_var)

In [None]:
# Top-5 opponents for every team (CSV)
def top_opponents_df(k=5):
    rows = []
    for t in pair_df.index:
        top = pair_df.loc[t].sort_values(ascending=False).head(k)
        for opp, pct in top.items():
            rows.append({'team': t, 'opponent': opp, 'pct': pct})
    return pd.DataFrame(rows)
tops = top_opponents_df(5)
tops_out = out_dir / 'top5_opponents_per_team.csv'
tops.to_csv(tops_out, index=False)
print('Wrote top-5 opponents CSV to', tops_out)
tops.head(10)

## Next steps
- If the large JSONL arrives, we can add a small aggregator cell to consume it and append to `draw_stats.json` or produce a new aggregated file.
- We can also produce interactive dashboards (Plotly/Altair) or export a few selected PNGs for reporting.
- Tell me which team(s) or pairings you'd like highlighted and I will add dedicated plots.