In [None]:
import re
import pandas as pd
from typing import List
from pathlib import Path
from src.viz import make_pitch, add_title
from src.metrics import team_length, team_width, team_centroid

# Load your CSVs (adjust filenames to your downloads)
trk_home_wide = pd.read_csv(
    "/Users/jacksimonson/Documents/repos/football-viz/data/raw/metrica/Sample_Game_1_RawTrackingData_Home_Team.csv",
    header = 2
)
trk_away_wide = pd.read_csv(
    "/Users/jacksimonson/Documents/repos/football-viz/data/raw/metrica/Sample_Game_1_RawTrackingData_Away_Team.csv",
    header = 2
)
def metrica_paired_wide_to_long(df: pd.DataFrame, team_label: str) -> pd.DataFrame:
    """
    Converts Metrica 'paired' wide format (X col named like 'player1', next Y col unnamed)
    into tidy long: frame, time_s, team, entity_type, player, x, y
    - entity_type: 'player' or 'ball'
    - player: player number (string) for players; 'ball' for the ball
    """
    out = []
    cols: List[str] = list(df.columns)
    cols_lower = [str(c).strip().lower() for c in cols]
    df = df.copy()
    df.columns = cols_lower

    # normalize metadata column names
    if "time [s]" in df.columns:
        df.rename(columns={"time [s]": "time_s"}, inplace=True)
    id_cols = [c for c in ("period", "frame", "time_s") if c in df.columns]

    # figure out where the coordinate pairs start (right after time_s)
    if "time_s" in df.columns:
        start_idx = df.columns.get_loc("time_s") + 1
    else:
        # fallbacks if your file uses a different time header
        for key in ("time [s]", "time[s]", "time"):
            if key in cols_lower:
                start_idx = df.columns.get_loc(key) + 1
                break
        else:
            start_idx = 3  # typical layout: period, frame, time_s then coords

    # walk through columns in pairs: (X-name, Y-unnamed)
    i = start_idx
    n = len(df.columns)
    while i < n:
        xcol = df.columns[i]
        ycol = df.columns[i+1] if i+1 < n else None

        # stop if we’ve run out of pairs
        if ycol is None:
            break

        # skip completely empty pairs
        if df[xcol].isna().all() and df[ycol].isna().all():
            i += 2
            continue

        # interpret entity: 'playerNN' or 'ball'
        ent = xcol
        if ent.startswith("player"):
            m = re.match(r"player(\d+)$", ent)
            if not m:
                # try to peel digits anyway
                pid = re.sub(r"\D+", "", ent) or None
            else:
                pid = m.group(1)
            entity_type = "player"
            player = pid
        elif ent == "ball":
            entity_type = "ball"
            player = "ball"
        else:
            # unexpected label (e.g., goalkeeper), treat as player-like with best-effort id
            entity_type = "player"
            player = re.sub(r"\D+", "", ent) or ent

        temp = df[id_cols + [xcol, ycol]].copy()
        temp.rename(columns={xcol: "x", ycol: "y"}, inplace=True)
        temp["team"] = team_label.lower()
        temp["entity_type"] = entity_type
        temp["player"] = player

        # enforce numeric types where possible
        for c in ("frame", "time_s", "x", "y"):
            if c in temp.columns:
                temp[c] = pd.to_numeric(temp[c], errors="coerce")

        out.append(temp)
        i += 2

    if not out:
        raise ValueError(
            "Could not parse any (x,y) pairs. "
            f"First 25 columns were: {list(df.columns)[:25]}"
        )

    long = pd.concat(out, ignore_index=True)

    # keep only players for team-level shape metrics
    long_players = long[long["entity_type"] == "player"].copy()

    # final column order
    cols_final = ["frame"] + (["time_s"] if "time_s" in long_players.columns else []) + ["team", "player", "x", "y"]
    return long_players[cols_final].dropna(subset=["frame", "x", "y"])

# 2) Convert to tidy long for each team
home_long = metrica_paired_wide_to_long(trk_home_wide, "home")
away_long = metrica_paired_wide_to_long(trk_away_wide, "away")
trk_long = pd.concat([home_long, away_long], ignore_index=True)

# 3) Pick a valid frame and compute metrics
frame = int(trk_long["frame"].astype(int).iloc[100])   # any existing frame index is fine
f_home = trk_long.query("team == 'home' and frame == @frame")

# scale your slice to meters
f_scaled = f_home.copy()
f_scaled["x_m"] = f_scaled["x"] * 105.0
f_scaled["y_m"] = f_scaled["y"] * 68.0

from src.metrics import team_length, team_width, team_centroid
L = team_length(f_scaled["x"], f_scaled["y"])
W = team_width(f_scaled["x"], f_scaled["y"])
cx, cy = team_centroid(f_scaled["x"], f_scaled["y"])

# Plot player positions for that frame
pitch, fig, ax = make_pitch(pitch_type="metricasports", pitch_length=105, pitch_width=68)
ax.scatter(f_scaled["x"], f_scaled["y"], s=120, alpha=0.85)
add_title(fig, f"Metrica → meters (105×68) — frame {frame}")