# Player Replacement Finder - Siamese Neural Network
## All Positions Version (Per-90 Normalized)

Finds similar players across all positions using multi-season data.

In [None]:
%pip install torch torchvision tqdm mplsoccer -q

In [None]:
import pandas as pd
import numpy as np
import random
import json
from pathlib import Path
from typing import Optional, Dict, List, Tuple, Any
import pickle

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from mplsoccer import Radar, FontManager, grid
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

# ==================== CONFIGURATION ====================
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Training hyperparameters
BATCH_SIZE = 64
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5
EPOCHS = 50
EMB_DIM = 64
TRAIN_PAIRS = 50000
VAL_PAIRS = 10000
VAL_SPLIT = 0.2
PATIENCE = 10

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
print(f"Random seed: {SEED}")

# Paths
SIAMESE_DIR = Path(".")
DATA_DIR = SIAMESE_DIR / "data"
MODELS_DIR = SIAMESE_DIR / "models"
OUTPUTS_DIR = SIAMESE_DIR / "outputs"

MODELS_DIR.mkdir(parents=True, exist_ok=True)
OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)

## 1. Load Data

In [None]:
allpos_file = DATA_DIR / "players_allpos_multiseason.csv"
forwards_file = DATA_DIR / "forwards_multiseason.csv"

if allpos_file.exists():
    df = pd.read_csv(allpos_file)
    SOURCE = "allpos_multiseason"
    print(f"ALL POSITIONS: {len(df)} players")
elif forwards_file.exists():
    df = pd.read_csv(forwards_file)
    SOURCE = "forwards_multiseason"
    print(f"Forwards only: {len(df)} players")
else:
    raise FileNotFoundError("No data found! Run 00_process_allpos.ipynb first.")

print(f"Columns: {len(df.columns)}")
if 'Pos' in df.columns:
    print(f"\nPosition breakdown:")
    print(df['Pos'].value_counts())

In [None]:
def find_col(names: List[str]) -> Optional[str]:
    for name in names:
        for col in df.columns:
            if str(col).lower() == name.lower():
                return col
    return None

PLAYER_COL = find_col(['player', 'Player', 'name'])
TEAM_COL = find_col(['squad', 'Squad', 'team', 'Team'])
AGE_COL = find_col(['age', 'Age'])
POS_COL = find_col(['pos', 'Pos', 'position'])
NINETIES_COL = find_col(['90s', '90s_x', '90s_y'])

print(f"Player: {PLAYER_COL}, Team: {TEAM_COL}, Age: {AGE_COL}, Pos: {POS_COL}, 90s: {NINETIES_COL}")

In [None]:
def load_features(filepath: Path) -> Tuple[List[str], Dict[str, str]]:
    features = []
    feature_names = {}
    with open(filepath, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            if '#' in line:
                parts = line.split('#', 1)
                raw_name = parts[0].strip()
                readable_name = parts[1].strip()
                if raw_name and readable_name:
                    features.append(raw_name)
                    feature_names[raw_name] = readable_name
    return features, feature_names

allpos_features = DATA_DIR / 'clustering_features_allpos.txt'
multiseason_features = DATA_DIR / 'clustering_features_multiseason.txt'

if allpos_features.exists():
    FEATURES, FEATURE_NAMES = load_features(allpos_features)
elif multiseason_features.exists():
    FEATURES, FEATURE_NAMES = load_features(multiseason_features)
else:
    raise FileNotFoundError("No features file found!")

FEATURES = [f for f in FEATURES if f in df.columns]
print(f"Loaded {len(FEATURES)} features")

RATE_FEATURES = [
    'Per 90 Minutes', 'Per 90 Minutes.1', 'Per 90 Minutes.2', 'Per 90 Minutes.3',
    'Per 90 Minutes.4', 'Per 90 Minutes.5', 'Per 90 Minutes.6', 'Per 90 Minutes.7',
    'Per 90 Minutes.8', 'Per 90 Minutes.9',
    'Standard.2', 'Standard.3', 'Standard.4', 'Standard.5', 'Standard.6', 'Standard.11',
    'Total.2', 'Short.2', 'Medium.2', 'Long.2',
    'Take-Ons.2', 'Take-Ons.4', 'Aerial Duels.2', 'Challenges.2', 'Expected.4'
]

def get_readable_name(feature: str) -> str:
    return FEATURE_NAMES.get(feature, feature)

In [None]:
df_per90 = df.copy()
if NINETIES_COL and NINETIES_COL in df.columns:
    nineties = df[NINETIES_COL].replace(0, np.nan)
    for feat in FEATURES:
        if feat not in RATE_FEATURES and feat != '90s':
            df_per90[feat] = df[feat] / nineties
    df_per90[FEATURES] = df_per90[FEATURES].fillna(0)
    print("Converted to per-90 format")

In [None]:
X = df_per90[FEATURES].fillna(0).values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print(f"Feature matrix: {X_scaled.shape}")

## 2. Ground Truth Similarity

In [None]:
print("Computing similarity matrix...")
ground_truth = cosine_similarity(X_scaled)
ground_truth = (ground_truth + 1) / 2
print(f"Shape: {ground_truth.shape}")

## 3. Dataset

In [None]:
class PairDataset(Dataset):
    def __init__(self, features, sim_matrix, indices, n_pairs=10000, seed=SEED):
        self.features = torch.FloatTensor(features)
        self.sim = sim_matrix
        self.indices = indices
        self.n = len(indices)
        rng = np.random.RandomState(seed)
        self.pairs = []
        self.labels = []
        for _ in range(n_pairs):
            pair_idx = rng.choice(self.n, 2, replace=False)
            i, j = self.indices[pair_idx[0]], self.indices[pair_idx[1]]
            self.pairs.append((i, j))
            self.labels.append(sim_matrix[i, j])
    def __len__(self): return len(self.pairs)
    def __getitem__(self, idx):
        i, j = self.pairs[idx]
        return self.features[i], self.features[j], torch.FloatTensor([self.labels[idx]])

all_indices = np.arange(len(X_scaled))
train_indices, val_indices = train_test_split(all_indices, test_size=VAL_SPLIT, random_state=SEED)
print(f"Train players: {len(train_indices)}, Val players: {len(val_indices)}")

train_data = PairDataset(X_scaled, ground_truth, train_indices, TRAIN_PAIRS, seed=SEED)
val_data = PairDataset(X_scaled, ground_truth, val_indices, VAL_PAIRS, seed=SEED + 1)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE)
print(f"Train pairs: {len(train_data)}, Val pairs: {len(val_data)}")

## 4. Network

In [None]:
class SiameseNet(nn.Module):
    def __init__(self, in_dim, emb_dim=64):
        super().__init__()
        self.enc = nn.Sequential(
            nn.Linear(in_dim, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(128, emb_dim)
        )
        self.fc = nn.Sequential(
            nn.Linear(emb_dim * 3, 128), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(128, 64), nn.ReLU(),
            nn.Linear(64, 1), nn.Sigmoid()
        )
    def forward(self, x1, x2):
        e1, e2 = self.enc(x1), self.enc(x2)
        combined = torch.cat([torch.abs(e1 - e2), e1 * e2, (e1 + e2) / 2], dim=1)
        return self.fc(combined), e1, e2
    def embed(self, x):
        self.eval()
        with torch.no_grad(): return self.enc(x).cpu().numpy()

model = SiameseNet(len(FEATURES), EMB_DIM).to(device)
print(f"Network: {len(FEATURES)} features -> {EMB_DIM}D embeddings")

## 5. Train

In [None]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)

best_loss = float('inf')
patience_counter = 0
train_losses, val_losses = [], []

print(f"Training {EPOCHS} epochs (patience: {PATIENCE})...\n")
for epoch in range(EPOCHS):
    model.train()
    t_loss = 0.0
    for x1, x2, y in train_loader:
        x1, x2, y = x1.to(device), x2.to(device), y.to(device)
        optimizer.zero_grad()
        pred, _, _ = model(x1, x2)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
        t_loss += loss.item()
    t_loss /= len(train_loader)
    
    model.eval()
    v_loss = 0.0
    with torch.no_grad():
        for x1, x2, y in val_loader:
            x1, x2, y = x1.to(device), x2.to(device), y.to(device)
            pred, _, _ = model(x1, x2)
            v_loss += criterion(pred, y).item()
    v_loss /= len(val_loader)
    
    train_losses.append(t_loss)
    val_losses.append(v_loss)
    scheduler.step(v_loss)
    
    if v_loss < best_loss:
        best_loss = v_loss
        patience_counter = 0
        torch.save(model.state_dict(), MODELS_DIR / 'siamese_allpos_best.pth')
    else:
        patience_counter += 1
        if patience_counter >= PATIENCE:
            print(f"\nEarly stopping at epoch {epoch + 1}!")
            break
    
    if (epoch + 1) % 5 == 0 or epoch == 0:
        print(f"Epoch {epoch + 1:3d} | Train: {t_loss:.4f} | Val: {v_loss:.4f}")

print(f"\nBest validation loss: {best_loss:.4f}")

In [None]:
plt.figure(figsize=(10, 4))
plt.plot(train_losses, label='Train')
plt.plot(val_losses, label='Val')
plt.xlabel('Epoch'); plt.ylabel('Loss')
plt.title(f'Training Curve - All Positions ({SOURCE})')
plt.legend(); plt.grid(alpha=0.3)
plt.savefig(OUTPUTS_DIR / 'training_curve_allpos.png', dpi=150)
plt.show()

## 6. Embeddings

In [None]:
try:
    model.load_state_dict(torch.load(MODELS_DIR / 'siamese_allpos_best.pth', weights_only=True))
except TypeError:
    model.load_state_dict(torch.load(MODELS_DIR / 'siamese_allpos_best.pth'))

embeddings = model.embed(torch.FloatTensor(X_scaled).to(device))
print(f"Embeddings: {embeddings.shape}")

## 7. Replacement Finder

In [None]:
class ReplacementFinder:
    def __init__(self, df, df_per90, emb, player_col, team_col, age_col, pos_col, features, X_scaled):
        self.df = df
        self.df_per90 = df_per90
        self.player_col = player_col
        self.team_col = team_col
        self.age_col = age_col
        self.pos_col = pos_col
        self.features = features
        self.X_scaled = X_scaled
        self.idx = {n: i for i, n in enumerate(df[player_col])}
        self.sim = (cosine_similarity(emb) + 1) / 2
    
    def find(self, name, n=10, exclude_team=True, max_age=None, position=None):
        idx = self._get_idx(name)
        if idx is None:
            print(f"'{name}' not found")
            return None
        res = self.df.copy()
        res['similarity'] = self.sim[idx]
        actual_name = self.df.iloc[idx][self.player_col]
        res = res[res[self.player_col] != actual_name]
        if exclude_team:
            team = self.df.iloc[idx][self.team_col]
            res = res[res[self.team_col] != team]
        if max_age and self.age_col:
            res = res[res[self.age_col] <= max_age]
        if position and self.pos_col:
            res = res[res[self.pos_col].str.contains(position, na=False)]
        return res.nlargest(n, 'similarity')
    
    def _get_idx(self, name):
        return next((i for p, i in self.idx.items() if name.lower() in p.lower()), None)

finder = ReplacementFinder(df, df_per90, embeddings, PLAYER_COL, TEAM_COL, AGE_COL, POS_COL, FEATURES, X_scaled)
print("Finder ready!")

## 8. Visualizations (mplsoccer)

In [None]:
# Radar chart parameters for mplsoccer
RADAR_STATS = {
    'Per 90 Minutes': 'Goals',
    'Per 90 Minutes.1': 'Assists',
    'Per 90 Minutes.5': 'xG',
    'Per 90 Minutes.6': 'xAG',
    'Take-Ons.1': 'Dribbles',
    'Carries.3': 'Prog Carries',
    'KP': 'Key Passes',
    'PrgP': 'Prog Passes',
    'Tackles': 'Tackles',
    'Int': 'Interceptions'
}

RADAR_FEATURES = [f for f in RADAR_STATS.keys() if f in FEATURES]
RADAR_LABELS = [RADAR_STATS[f] for f in RADAR_FEATURES]

# Compute min/max ranges for each stat (5th and 95th percentile)
RADAR_LOW = [np.percentile(df_per90[f].dropna(), 5) for f in RADAR_FEATURES]
RADAR_HIGH = [np.percentile(df_per90[f].dropna(), 95) for f in RADAR_FEATURES]

print(f"Radar chart: {len(RADAR_FEATURES)} stats")
for i, label in enumerate(RADAR_LABELS):
    print(f"  {label}: {RADAR_LOW[i]:.2f} - {RADAR_HIGH[i]:.2f}")

In [None]:
def radar_chart(player1: str, player2: str, save_path: Optional[str] = None):
    """Create a professional radar chart comparing two players using mplsoccer."""
    idx1 = finder._get_idx(player1)
    idx2 = finder._get_idx(player2)
    
    if idx1 is None or idx2 is None:
        print("Player not found")
        return None
    
    name1 = df.iloc[idx1][PLAYER_COL]
    name2 = df.iloc[idx2][PLAYER_COL]
    team1 = df.iloc[idx1][TEAM_COL]
    team2 = df.iloc[idx2][TEAM_COL]
    pos1 = df.iloc[idx1][POS_COL] if POS_COL else ''
    pos2 = df.iloc[idx2][POS_COL] if POS_COL else ''
    similarity = finder.sim[idx1, idx2]
    
    # Get values for radar features
    v1 = df_per90.iloc[idx1][RADAR_FEATURES].values.tolist()
    v2 = df_per90.iloc[idx2][RADAR_FEATURES].values.tolist()
    
    # Create mplsoccer Radar
    radar = Radar(
        params=RADAR_LABELS,
        min_range=RADAR_LOW,
        max_range=RADAR_HIGH,
        round_int=[False] * len(RADAR_LABELS),
        num_rings=4,
        ring_width=1,
        center_circle_radius=1
    )
    
    # Create figure with grid
    fig, axs = grid(figheight=14, grid_height=0.915, title_height=0.06,
                    endnote_height=0.025, title_space=0, endnote_space=0,
                    grid_key='radar', axis=False)
    
    # Draw radar comparison
    radar.setup_axis(ax=axs['radar'])
    rings_inner = radar.draw_circles(ax=axs['radar'], facecolor='#f0f0f0', edgecolor='#cdcdcd')
    radar_output = radar.draw_radar_compare(
        v1, v2, ax=axs['radar'],
        kwargs_radar={'facecolor': '#006B3F', 'alpha': 0.6},
        kwargs_compare={'facecolor': '#CE1126', 'alpha': 0.6}
    )
    radar_poly, radar_poly2, vertices1, vertices2 = radar_output
    range_labels = radar.draw_range_labels(ax=axs['radar'], fontsize=10)
    param_labels = radar.draw_param_labels(ax=axs['radar'], fontsize=12)
    
    # Title
    title1 = axs['title'].text(0.01, 0.65, name1, fontsize=20, fontweight='bold',
                               ha='left', va='center', color='#006B3F')
    title2 = axs['title'].text(0.01, 0.25, f"{team1} | {pos1}", fontsize=14,
                               ha='left', va='center', color='#333333')
    title3 = axs['title'].text(0.99, 0.65, name2, fontsize=20, fontweight='bold',
                               ha='right', va='center', color='#CE1126')
    title4 = axs['title'].text(0.99, 0.25, f"{team2} | {pos2}", fontsize=14,
                               ha='right', va='center', color='#333333')
    title5 = axs['title'].text(0.5, 0.5, f"Similarity: {similarity:.1%}", fontsize=16,
                               ha='center', va='center', color='#555555', fontweight='bold')
    
    # Endnote
    axs['endnote'].text(0.5, 0.5, 'Data: FBref | Player Replacement Finder',
                        fontsize=10, ha='center', va='center', color='#666666')
    
    if save_path:
        fig.savefig(save_path, dpi=150, bbox_inches='tight', facecolor='white')
    
    plt.show()
    
    return {
        'player1': {'name': name1, 'team': team1, 'position': pos1, 'values': v1},
        'player2': {'name': name2, 'team': team2, 'position': pos2, 'values': v2},
        'labels': RADAR_LABELS,
        'similarity': float(similarity)
    }

print("radar_chart() ready - using mplsoccer!")

In [None]:
def pizza_chart(player: str, save_path: Optional[str] = None):
    """Create a pizza chart (single player percentile rankings) using mplsoccer."""
    from mplsoccer import PyPizza
    
    idx = finder._get_idx(player)
    if idx is None:
        print("Player not found")
        return None
    
    name = df.iloc[idx][PLAYER_COL]
    team = df.iloc[idx][TEAM_COL]
    pos = df.iloc[idx][POS_COL] if POS_COL else ''
    
    # Calculate percentiles for each stat
    values = df_per90.iloc[idx][RADAR_FEATURES].values
    percentiles = []
    for i, feat in enumerate(RADAR_FEATURES):
        all_vals = df_per90[feat].dropna().values
        pct = (np.sum(all_vals < values[i]) / len(all_vals)) * 100
        percentiles.append(int(round(pct)))
    
    # Create pizza chart
    baker = PyPizza(
        params=RADAR_LABELS,
        background_color="#f4f4f4",
        straight_line_color="#000000",
        straight_line_lw=1,
        last_circle_color="#000000",
        last_circle_lw=1,
        other_circle_lw=0,
        inner_circle_size=20
    )
    
    fig, ax = baker.make_pizza(
        percentiles,
        figsize=(10, 10),
        color_blank_space="same",
        slice_colors=["#006B3F"] * len(RADAR_LABELS),
        value_colors=["#ffffff"] * len(RADAR_LABELS),
        value_bck_colors=["#006B3F"] * len(RADAR_LABELS),
        blank_alpha=0.4,
        kwargs_slices=dict(edgecolor="#000000", zorder=2, linewidth=1),
        kwargs_params=dict(color="#000000", fontsize=12, va="center"),
        kwargs_values=dict(color="#ffffff", fontsize=12, fontweight='bold',
                          bbox=dict(edgecolor="#000000", facecolor="#006B3F",
                                   boxstyle="round,pad=0.2", lw=1))
    )
    
    # Title
    fig.text(0.515, 0.975, f"{name}", size=22, ha="center", fontweight='bold', color="#006B3F")
    fig.text(0.515, 0.945, f"{team} | {pos}", size=14, ha="center", color="#666666")
    fig.text(0.515, 0.025, "Percentile Rank vs All Players | Data: FBref",
             size=10, ha="center", color="#666666")
    
    if save_path:
        fig.savefig(save_path, dpi=150, bbox_inches='tight', facecolor='#f4f4f4')
    
    plt.show()
    
    return {'name': name, 'team': team, 'position': pos, 'percentiles': percentiles, 'labels': RADAR_LABELS}

print("pizza_chart() ready!")

In [None]:
def find_replacement(name: str, n: int = 10, max_age: Optional[int] = None, 
                     position: Optional[str] = None):
    """Find and display player replacements."""
    res = finder.find(name, n=n, max_age=max_age, position=position)
    if res is not None:
        cols = [PLAYER_COL, TEAM_COL, 'similarity']
        if POS_COL in res.columns: cols.insert(2, POS_COL)
        if AGE_COL in res.columns: cols.insert(3, AGE_COL)
        display(res[cols].reset_index(drop=True))
    return res

In [None]:
def explain_similarity(player1: str, player2: str, top_n: int = 10):
    """Explain why two players are similar."""
    idx1 = finder._get_idx(player1)
    idx2 = finder._get_idx(player2)
    
    if idx1 is None or idx2 is None:
        print("Player not found")
        return None
    
    name1 = df.iloc[idx1][PLAYER_COL]
    name2 = df.iloc[idx2][PLAYER_COL]
    pos1 = df.iloc[idx1][POS_COL] if POS_COL else ''
    pos2 = df.iloc[idx2][POS_COL] if POS_COL else ''
    similarity = finder.sim[idx1, idx2]
    
    v1 = df_per90.iloc[idx1][FEATURES].values
    v2 = df_per90.iloc[idx2][FEATURES].values
    v1_scaled = X_scaled[idx1]
    v2_scaled = X_scaled[idx2]
    
    norm1 = np.linalg.norm(v1_scaled)
    norm2 = np.linalg.norm(v2_scaled)
    contributions = (v1_scaled * v2_scaled) / (norm1 * norm2 + 1e-8)
    
    analysis = pd.DataFrame({
        'Feature': [get_readable_name(f) for f in FEATURES],
        'Raw': FEATURES,
        name1: v1,
        name2: v2,
        'Contribution': contributions
    })
    
    print("=" * 70)
    print(f"{name1} ({pos1}) vs {name2} ({pos2})")
    print(f"Overall Similarity: {similarity:.1%}")
    print("=" * 70)
    print(f"\nTop {top_n} matching features:")
    for _, row in analysis.nlargest(top_n, 'Contribution').iterrows():
        print(f"  {row['Feature']:30s} | {row[name1]:6.2f} vs {row[name2]:6.2f} | +{row['Contribution']:.3f}")
    
    return analysis

## 9. Example Usage

In [None]:
# find_replacement("Semenyo")

In [None]:
# radar_chart("Semenyo", "Elanga")

In [None]:
# pizza_chart("Kudus")

In [None]:
# explain_similarity("Semenyo", "Elanga")

## 10. Export for Web App

In [None]:
# Save embeddings
df_embeddings = df.copy()
for i in range(EMB_DIM):
    df_embeddings[f'emb_{i}'] = embeddings[:, i]
df_embeddings.to_csv(DATA_DIR / f"players_{SOURCE}_embeddings.csv", index=False)

# Save finder pickle
with open(MODELS_DIR / 'finder_allpos.pkl', 'wb') as f:
    pickle.dump({
        'model': model.state_dict(),
        'scaler': scaler,
        'features': FEATURES,
        'feature_names': FEATURE_NAMES,
        'embeddings': embeddings,
        'similarity': finder.sim,
        'source': SOURCE,
        'seed': SEED,
        'emb_dim': EMB_DIM,
        'radar_features': RADAR_FEATURES,
        'radar_labels': RADAR_LABELS,
        'radar_low': RADAR_LOW,
        'radar_high': RADAR_HIGH
    }, f)

print("Saved model and embeddings!")

In [None]:
def export_for_supabase():
    """Export data in format ready for Supabase import."""
    
    # Players table
    players = []
    for i, row in df.iterrows():
        players.append({
            'id': i,
            'name': row[PLAYER_COL],
            'squad': row[TEAM_COL],
            'position': row[POS_COL] if POS_COL else None,
            'age': int(row[AGE_COL]) if AGE_COL and pd.notna(row[AGE_COL]) else None,
            'nation': row.get('Nation', None),
            'league': row.get('League', None),
        })
    
    # Stats (per-90 values for radar)
    stats = []
    for i, row in df_per90.iterrows():
        for j, feat in enumerate(RADAR_FEATURES):
            stats.append({
                'player_id': i,
                'stat_name': RADAR_LABELS[j],
                'stat_key': feat,
                'value': round(float(row[feat]), 3) if pd.notna(row[feat]) else 0,
                'min_range': round(RADAR_LOW[j], 3),
                'max_range': round(RADAR_HIGH[j], 3)
            })
    
    # Similarities (top 50 per player)
    similarities = []
    for i in range(len(df)):
        sim_scores = finder.sim[i]
        top_indices = np.argsort(sim_scores)[-51:-1][::-1]
        for rank, j in enumerate(top_indices, 1):
            similarities.append({
                'player_id': i,
                'similar_player_id': int(j),
                'similarity': round(float(sim_scores[j]), 4),
                'rank': rank
            })
    
    # Save to JSON
    with open(DATA_DIR / 'export_players.json', 'w') as f:
        json.dump(players, f)
    with open(DATA_DIR / 'export_stats.json', 'w') as f:
        json.dump(stats, f)
    with open(DATA_DIR / 'export_similarities.json', 'w') as f:
        json.dump(similarities, f)
    
    print(f"Exported:")
    print(f"  - {len(players)} players")
    print(f"  - {len(stats)} stat records")
    print(f"  - {len(similarities)} similarity records")
    
# export_for_supabase()