In [7]:
import asyncio
import csv
import json
from typing import Any, Dict, List, Optional, Set
from playwright.async_api import async_playwright

def deep_find_keys(node: Any, targets: Set[str], found: Dict[str, Any]) -> None:
    """Depth-first search that records first occurrence of each key in `targets`."""
    if not (targets - set(found.keys())):
        return
    if isinstance(node, dict):
        for k, v in node.items():
            if k in targets and k not in found:
                found[k] = v
            deep_find_keys(v, targets, found)
    elif isinstance(node, list):
        for v in node:
            deep_find_keys(v, targets, found)

def get_in(obj: Any, path: List[str], default=None):
    cur = obj
    for k in path:
        if isinstance(cur, dict) and k in cur:
            cur = cur[k]
        else:
            return default
    return cur

def full_name(entry: dict) -> Optional[str]:
    given = get_in(entry, ["playerStats","player","playerName","givenName"])
    sur   = get_in(entry, ["playerStats","player","playerName","surname"])
    if given or sur:
        return " ".join([x for x in [given, sur] if x]).strip()
    # fallbacks
    given = get_in(entry, ["player","player","playerName","givenName"])
    sur   = get_in(entry, ["player","player","playerName","surname"])
    if given or sur:
        return " ".join([x for x in [given, sur] if x]).strip()
    nm = get_in(entry, ["playerStats","player","playerName"]) or get_in(entry, ["player","player","playerName"])
    return str(nm).strip() if nm else None

def flatten_row(entry: dict, side_label: str) -> dict:
    stats = get_in(entry, ["playerStats", "stats"], {}) or {}
    ext   = stats.get("extendedStats") or {}
    cl    = stats.get("clearances") or {}

    row = {
        "name": full_name(entry),
        "number": get_in(entry, ["player", "jumperNumber"]),
        "position": get_in(entry, ["player", "player", "position"]),
        "playerId": get_in(entry, ["playerStats", "player", "playerId"]) \
                    or get_in(entry, ["player","player","playerId"]),
        "teamId": entry.get("teamId") or get_in(entry, ["playerStats","teamId"]),
        "teamSide": side_label,

        # core box score
        "goals": stats.get("goals"),
        "behinds": stats.get("behinds"),
        "kicks": stats.get("kicks"),
        "handballs": stats.get("handballs"),
        "disposals": stats.get("disposals"),
        "marks": stats.get("marks"),
        "hitouts": stats.get("hitouts"),
        "tackles": stats.get("tackles"),

        # clearances
        "centreClearances": cl.get("centreClearances"),
        "stoppageClearances": cl.get("stoppageClearances"),
        "totalClearances": cl.get("totalClearances"),

        # i50 / r50
        "inside50s": stats.get("inside50s"),
        "rebound50s": stats.get("rebound50s"),

        # frees
        "freesFor": stats.get("freesFor"),
        "freesAgainst": stats.get("freesAgainst"),

        # possessions
        "contestedPossessions": stats.get("contestedPossessions"),
        "uncontestedPossessions": stats.get("uncontestedPossessions"),

        # metres/meters gained
        "metresGained": stats.get("metresGained") if "metresGained" in stats else stats.get("metersGained"),

        # extras often present
        "marksInside50": stats.get("marksInside50"),
        "contestedMarks": stats.get("contestedMarks"),
        "onePercenters": stats.get("onePercenters"),
        "bounces": stats.get("bounces"),
        "intercepts": stats.get("intercepts"),
        "turnovers": stats.get("turnovers"),
        "scoreInvolvements": stats.get("scoreInvolvements"),
        "goalAssists": stats.get("goalAssists"),
        "shotsAtGoal": stats.get("shotsAtGoal"),

        # efficiency & fantasy
        "disposalEfficiency": stats.get("disposalEfficiency"),
        "dreamTeamPoints": stats.get("dreamTeamPoints"),
        "ratingPoints": stats.get("ratingPoints"),

        # TOG & update time
        "timeOnGroundPercentage": get_in(entry, ["playerStats","timeOnGroundPercentage"]),
        "rowLastUpdated": get_in(entry, ["playerStats","lastUpdated"]),
    }

    for k in ("effectiveKicks","kickEfficiency","kickToHandballRatio","effectiveDisposals"):
        if k in ext:
            row[k] = ext[k]
    return row

def write_csv(rows: List[dict], path: str):
    base_cols = [
        "name","number","position","playerId","teamId","teamSide",
        "goals","behinds","kicks","handballs","disposals","marks","hitouts","tackles",
        "centreClearances","stoppageClearances","totalClearances",
        "inside50s","rebound50s",
        "freesFor","freesAgainst",
        "contestedPossessions","uncontestedPossessions",
        "marksInside50","contestedMarks","onePercenters","bounces","intercepts","turnovers",
        "scoreInvolvements","goalAssists","shotsAtGoal",
        "disposalEfficiency","metresGained","dreamTeamPoints","ratingPoints",
        "timeOnGroundPercentage","rowLastUpdated",
        "effectiveKicks","kickEfficiency","kickToHandballRatio","effectiveDisposals",
    ]
    dyn = []
    for r in rows:
        for k in r.keys():
            if k not in base_cols and k not in dyn:
                dyn.append(k)
    cols = base_cols + dyn
    with open(path, "w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=cols)
        w.writeheader()
        for r in rows:
            w.writerow({k: r.get(k, "") for k in cols})

async def scrape_match_to_csv(url: str, base: Optional[str] = None):
    """Scrape AFL Match Centre URL and write 3 CSVs."""
    if base is None:
        tail = url.rstrip("/").split("/")[-1] or "afl_match"
        base = f"match{tail}" if tail.isdigit() else tail

    captured: List[Any] = []
    async with async_playwright() as pw:
        browser = await pw.chromium.launch(headless=True)
        context = await browser.new_context(
            user_agent=(
                "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
                "(KHTML, like Gecko) Chrome/124 Safari/537.36"
            ),
            locale="en-AU",
        )
        page = await context.new_page()

        async def grab(resp):
            try:
                ct = (resp.headers or {}).get("content-type", "")
                rtype = resp.request.resource_type
                if ("application/json" in ct.lower()) or (rtype in ("xhr","fetch")):
                    txt = await resp.text()
                    if txt:
                        try:
                            captured.append(json.loads(txt))
                        except Exception:
                            pass
            except Exception:
                pass

        page.on("response", lambda resp: asyncio.create_task(grab(resp)))
        await page.goto(url, wait_until="domcontentloaded", timeout=60_000)
        try:
            await page.wait_for_load_state("networkidle", timeout=30_000)
        except Exception:
            pass
        await asyncio.sleep(5)

        targets = {"homeTeamPlayerStats", "awayTeamPlayerStats"}
        found: Dict[str, Any] = {}
        for blob in captured:
            deep_find_keys(blob, targets, found)
            if targets.issubset(found.keys()):
                break

        home = found.get("homeTeamPlayerStats") or []
        away = found.get("awayTeamPlayerStats") or []

        home_rows = [flatten_row(x, "home") for x in home]
        away_rows = [flatten_row(x, "away") for x in away]
        all_rows  = home_rows + away_rows

        home_csv = f"{base}_homeTeamPlayerStats.csv"
        away_csv = f"{base}_awayTeamPlayerStats.csv"
        all_csv  = f"{base}_allPlayers.csv"

        write_csv(home_rows, home_csv)
        write_csv(away_rows, away_csv)
        write_csv(all_rows,  all_csv)

        await browser.close()

    print({"homeCount": len(home_rows), "awayCount": len(away_rows), "csv": [home_csv, away_csv, all_csv]})


In [8]:
URL = "https://www.afl.com.au/afl/matches/7150"
BASE = "match7150"  # filenames will start with this


In [9]:
await scrape_match_to_csv(URL, base=BASE)


  @classmethod


Error: BrowserType.launch: Executable doesn't exist at /Users/user/Library/Caches/ms-playwright/chromium_headless_shell-1181/chrome-mac/headless_shell
╔════════════════════════════════════════════════════════════╗
║ Looks like Playwright was just installed or updated.       ║
║ Please run the following command to download new browsers: ║
║                                                            ║
║     playwright install                                     ║
║                                                            ║
║ <3 Playwright Team                                         ║
╚════════════════════════════════════════════════════════════╝

In [12]:
# ONE-CELL NOTEBOOK: Scrape AFL Match Centre (7150) and export player stats to CSVs

# --- Install deps into the current Jupyter kernel and fetch Chromium ---
import sys, subprocess, asyncio, csv, json, os
from typing import Any, Dict, List, Optional, Set

def _sh(*args):
    try:
        subprocess.run(args, check=True)
    except Exception as e:
        print("Command failed (continuing):", " ".join(args), "\n", e)

_sh(sys.executable, "-m", "pip", "install", "-q", "playwright", "pandas")
_sh(sys.executable, "-m", "playwright", "install", "chromium")

# --- Imports after install ---
import pandas as pd
from playwright.async_api import async_playwright

# --- Utils ---
def deep_find_keys(node: Any, targets: Set[str], found: Dict[str, Any]) -> None:
    """DFS that records the first occurrence of each key in `targets`."""
    if not (targets - set(found.keys())):
        return
    if isinstance(node, dict):
        for k, v in node.items():
            if k in targets and k not in found:
                found[k] = v
            deep_find_keys(v, targets, found)
    elif isinstance(node, list):
        for v in node:
            deep_find_keys(v, targets, found)

def get_in(obj: Any, path: List[str], default=None):
    cur = obj
    for k in path:
        if isinstance(cur, dict) and k in cur:
            cur = cur[k]
        else:
            return default
    return cur

def full_name(entry: dict) -> Optional[str]:
    given = get_in(entry, ["playerStats","player","playerName","givenName"])
    sur   = get_in(entry, ["playerStats","player","playerName","surname"])
    if given or sur:
        return " ".join([x for x in [given, sur] if x]).strip()
    # fallbacks
    given = get_in(entry, ["player","player","playerName","givenName"])
    sur   = get_in(entry, ["player","player","playerName","surname"])
    if given or sur:
        return " ".join([x for x in [given, sur] if x]).strip()
    nm = get_in(entry, ["playerStats","player","playerName"]) or get_in(entry, ["player","player","playerName"])
    return str(nm).strip() if nm else None

def flatten_row(entry: dict, side_label: str) -> dict:
    stats = get_in(entry, ["playerStats", "stats"], {}) or {}
    ext   = stats.get("extendedStats") or {}
    cl    = stats.get("clearances") or {}

    row = {
        "name": full_name(entry),
        "number": get_in(entry, ["player", "jumperNumber"]),
        "position": get_in(entry, ["player", "player", "position"]),
        "playerId": get_in(entry, ["playerStats", "player", "playerId"]) \
                    or get_in(entry, ["player","player","playerId"]),
        "teamId": entry.get("teamId") or get_in(entry, ["playerStats","teamId"]),
        "teamSide": side_label,

        # core box score
        "goals": stats.get("goals"),
        "behinds": stats.get("behinds"),
        "kicks": stats.get("kicks"),
        "handballs": stats.get("handballs"),
        "disposals": stats.get("disposals"),
        "marks": stats.get("marks"),
        "hitouts": stats.get("hitouts"),
        "tackles": stats.get("tackles"),

        # clearances
        "centreClearances": cl.get("centreClearances"),
        "stoppageClearances": cl.get("stoppageClearances"),
        "totalClearances": cl.get("totalClearances"),

        # i50 / r50
        "inside50s": stats.get("inside50s"),
        "rebound50s": stats.get("rebound50s"),

        # frees
        "freesFor": stats.get("freesFor"),
        "freesAgainst": stats.get("freesAgainst"),

        # possessions
        "contestedPossessions": stats.get("contestedPossessions"),
        "uncontestedPossessions": stats.get("uncontestedPossessions"),

        # metres/meters gained
        "metresGained": stats.get("metresGained") if "metresGained" in stats else stats.get("metersGained"),

        # extras often present
        "marksInside50": stats.get("marksInside50"),
        "contestedMarks": stats.get("contestedMarks"),
        "onePercenters": stats.get("onePercenters"),
        "bounces": stats.get("bounces"),
        "intercepts": stats.get("intercepts"),
        "turnovers": stats.get("turnovers"),
        "scoreInvolvements": stats.get("scoreInvolvements"),
        "goalAssists": stats.get("goalAssists"),
        "shotsAtGoal": stats.get("shotsAtGoal"),

        # efficiency & fantasy
        "disposalEfficiency": stats.get("disposalEfficiency"),
        "dreamTeamPoints": stats.get("dreamTeamPoints"),
        "ratingPoints": stats.get("ratingPoints"),

        # TOG & update time
        "timeOnGroundPercentage": get_in(entry, ["playerStats","timeOnGroundPercentage"]),
        "rowLastUpdated": get_in(entry, ["playerStats","lastUpdated"]),
    }
    for k in ("effectiveKicks","kickEfficiency","kickToHandballRatio","effectiveDisposals"):
        if k in ext:
            row[k] = ext[k]
    return row

def write_csv(rows: List[dict], path: str):
    base_cols = [
        "name","number","position","playerId","teamId","teamSide",
        "goals","behinds","kicks","handballs","disposals","marks","hitouts","tackles",
        "centreClearances","stoppageClearances","totalClearances",
        "inside50s","rebound50s",
        "freesFor","freesAgainst",
        "contestedPossessions","uncontestedPossessions",
        "marksInside50","contestedMarks","onePercenters","bounces","intercepts","turnovers",
        "scoreInvolvements","goalAssists","shotsAtGoal",
        "disposalEfficiency","metresGained","dreamTeamPoints","ratingPoints",
        "timeOnGroundPercentage","rowLastUpdated",
        "effectiveKicks","kickEfficiency","kickToHandballRatio","effectiveDisposals",
    ]
    dyn = []
    for r in rows:
        for k in r.keys():
            if k not in base_cols and k not in dyn:
                dyn.append(k)
    cols = base_cols + dyn
    with open(path, "w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=cols)
        w.writeheader()
        for r in rows:
            w.writerow({k: r.get(k, "") for k in cols})

# --- Async scraper (works in Jupyter) ---
async def scrape_match_to_csv(url: str, base: Optional[str] = None):
    if base is None:
        tail = url.rstrip("/").split("/")[-1] or "afl_match"
        base = f"match{tail}" if tail.isdigit() else tail

    captured: List[Any] = []

    async with async_playwright() as pw:
        browser = await pw.chromium.launch(headless=True)
        context = await browser.new_context(
            user_agent=(
                "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
                "(KHTML, like Gecko) Chrome/124 Safari/537.36"
            ),
            locale="en-AU",
        )
        page = await context.new_page()

        async def grab(resp):
            try:
                ct = (resp.headers or {}).get("content-type", "")
                rtype = resp.request.resource_type
                if ("application/json" in ct.lower()) or (rtype in ("xhr","fetch")):
                    txt = await resp.text()
                    if txt:
                        try:
                            captured.append(json.loads(txt))
                        except Exception:
                            pass
            except Exception:
                pass

        page.on("response", lambda resp: asyncio.create_task(grab(resp)))

        await page.goto(url, wait_until="domcontentloaded", timeout=60_000)
        try:
            await page.wait_for_load_state("networkidle", timeout=30_000)
        except Exception:
            pass
        await asyncio.sleep(5)

        targets = {"homeTeamPlayerStats", "awayTeamPlayerStats"}
        found: Dict[str, Any] = {}
        for blob in captured:
            deep_find_keys(blob, targets, found)
            if targets.issubset(found.keys()):
                break

        home = found.get("homeTeamPlayerStats") or []
        away = found.get("awayTeamPlayerStats") or []

        home_rows = [flatten_row(x, "home") for x in home]
        away_rows = [flatten_row(x, "away") for x in away]
        all_rows  = home_rows + away_rows

        home_csv = f"/Users/user/{base}_homeTeamPlayerStats.csv"
        away_csv = f"/Users/user/{base}_awayTeamPlayerStats.csv"
        all_csv  = f"/Users/user/{base}_allPlayers.csv"

        write_csv(home_rows, home_csv)
        write_csv(away_rows, away_csv)
        write_csv(all_rows,  all_csv)

        await browser.close()

    return {"homeCount": len(home_rows), "awayCount": len(away_rows), "csv": [home_csv, away_csv, all_csv]}

# --- Configure & RUN (supports top-level await in Jupyter) ---
URL  = "https://www.afl.com.au/afl/matches/7150"
BASE = "match7150"

# If the notebook already has an event loop, just await; otherwise try to run it.
try:
    loop = asyncio.get_running_loop()
    # Running loop (Jupyter) -> use top-level await
    result = await scrape_match_to_csv(URL, base=BASE)  # type: ignore  # top-level await is supported in IPython
except RuntimeError:
    # No running loop (plain Python) -> run normally
    result = asyncio.run(scrape_match_to_csv(URL, base=BASE))

print("Done:", result)

# --- Quick preview ---
for p in result["csv"]:
    if os.path.exists(p):
        print("Saved:", p)

if os.path.exists(f"/Users/user/{BASE}_allPlayers.csv"):
    df = pd.read_csv(f"/Users/user/{BASE}_allPlayers.csv")
    display(df.head(10))
    display(df.groupby("teamSide").size().to_frame("players"))


You are using a frozen ffmpeg browser which does not receive updates anymore on mac12. Please update to the latest version of your operating system to test up-to-date browsers.
Done: {'homeCount': 23, 'awayCount': 23, 'csv': ['/Users/user/match7150_homeTeamPlayerStats.csv', '/Users/user/match7150_awayTeamPlayerStats.csv', '/Users/user/match7150_allPlayers.csv']}
Saved: /Users/user/match7150_homeTeamPlayerStats.csv
Saved: /Users/user/match7150_awayTeamPlayerStats.csv
Saved: /Users/user/match7150_allPlayers.csv


Unnamed: 0,name,number,position,playerId,teamId,teamSide,goals,behinds,kicks,handballs,...,disposalEfficiency,metresGained,dreamTeamPoints,ratingPoints,timeOnGroundPercentage,rowLastUpdated,effectiveKicks,kickEfficiency,kickToHandballRatio,effectiveDisposals
0,Karl Amon,10,WR,CD_I297354,CD_T80,home,0.0,0.0,15.0,12.0,...,96.3,353.0,98.0,10.1,83.0,2025-08-07T12:22:27.011+0000,15.0,100.0,1.3,26.0
1,Tom Barrass,37,FB,CD_I990290,CD_T80,home,0.0,0.0,6.0,2.0,...,100.0,128.0,29.0,6.4,86.0,2025-08-07T12:22:27.011+0000,6.0,100.0,3.0,8.0
2,Josh Battle,24,BPL,CD_I998134,CD_T80,home,0.0,1.0,19.0,7.0,...,80.8,464.0,91.0,11.8,92.0,2025-08-07T12:22:27.011+0000,14.0,73.7,2.7,21.0
3,Luke Breust,22,SUB,CD_I280744,CD_T80,home,0.0,1.0,3.0,1.0,...,75.0,59.0,19.0,-1.0,26.0,2025-08-07T12:22:27.011+0000,2.0,66.7,3.0,3.0
4,Mabior Chol,18,CHF,CD_I994077,CD_T80,home,3.0,2.0,10.0,4.0,...,64.3,220.0,88.0,11.1,75.0,2025-08-07T12:22:27.011+0000,5.0,50.0,2.5,9.0
5,Massimo D'Ambrosio,16,WL,CD_I1005144,CD_T80,home,0.0,0.0,13.0,11.0,...,83.3,289.0,84.0,9.3,80.0,2025-08-07T12:22:27.011+0000,9.0,69.2,1.2,20.0
6,Calsher Dear,35,INT,CD_I1032100,CD_T80,home,1.0,2.0,5.0,3.0,...,50.0,115.0,36.0,2.5,56.0,2025-08-07T12:22:27.011+0000,1.0,20.0,1.7,4.0
7,Jack Ginnivan,33,FPL,CD_I1012857,CD_T80,home,0.0,0.0,13.0,13.0,...,57.7,311.0,78.0,8.5,84.0,2025-08-07T12:22:27.011+0000,5.0,38.5,1.0,15.0
8,Jack Gunston,19,HFFL,CD_I291351,CD_T80,home,4.0,0.0,8.0,3.0,...,81.8,215.0,77.0,12.0,85.0,2025-08-07T12:22:27.011+0000,6.0,75.0,2.7,9.0
9,Blake Hardwick,15,BPR,CD_I993794,CD_T80,home,0.0,0.0,7.0,4.0,...,81.8,123.0,41.0,8.1,89.0,2025-08-07T12:22:27.011+0000,5.0,71.4,1.8,9.0


Unnamed: 0_level_0,players
teamSide,Unnamed: 1_level_1
away,23
home,23


In [1]:
# ONE-CELL: AFL matchPlays -> CSV/NDJSON (handles token + 401)
# Requires: requests (and pandas only for the optional preview)

import json, csv, os, sys
from typing import Any, Dict, Iterable, List, Tuple, Set

import requests

TOKEN_URL = "https://api.afl.com.au/cfs/afl/WMCTok"
URL       = "https://sapi.afl.com.au/afl/matchPlays/CD_M20250142207"
BASE      = "CD_M20250142207"  # change if you want different filenames

# ---------------- Token + HTTP ----------------
def get_afl_token(timeout: float = 15.0) -> str:
    r = requests.post(TOKEN_URL, timeout=timeout)
    r.raise_for_status()
    data = r.json()
    tok = data.get("token")
    if not tok:
        raise RuntimeError("No 'token' in WMCTok response")
    return tok

def fetch_json_with_token(url: str, token: str, timeout: float = 30.0) -> Any:
    headers = {
        "Accept": "application/json, text/plain, */*",
        "User-Agent": (
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/124 Safari/537.36"
        ),
        "x-media-mis-token": token,
        # These mimic the site’s normal calls and can help avoid 401s
        "Origin": "https://www.afl.com.au",
        "Referer": "https://www.afl.com.au/",
    }
    r = requests.get(url, headers=headers, timeout=timeout)
    if r.status_code == 401:
        raise RuntimeError("401 Unauthorized: token expired/invalid (re-run to refresh the token)")
    r.raise_for_status()
    return r.json()

# -------------- Discovery (find play list) --------------
def _iter_nodes(root: Any) -> Iterable[Any]:
    stack, seen = [root], set()
    while stack:
        cur = stack.pop()
        ident = id(cur)
        if ident in seen:
            continue
        seen.add(ident)
        yield cur
        if isinstance(cur, dict):
            for v in cur.values():
                if isinstance(v, (dict, list)):
                    stack.append(v)
        elif isinstance(cur, list):
            for v in cur:
                if isinstance(v, (dict, list)):
                    stack.append(v)

def find_event_list(root: Any) -> Tuple[List[Dict[str, Any]], str]:
    """Return (events, where_found). Looks for common keys then falls back to best-looking list[dict]."""
    if isinstance(root, list) and root and isinstance(root[0], dict):
        return root, "$"
    if isinstance(root, dict):
        for key in ("plays", "matchPlays", "events", "data", "chains"):
            val = root.get(key)
            if isinstance(val, list) and val and isinstance(val[0], dict):
                return val, f"$.{key}"
    candidates: List[Tuple[int, List[Dict[str, Any]], str]] = []
    for node in _iter_nodes(root):
        if isinstance(node, list) and node and isinstance(node[0], dict):
            score = 0
            keys = " ".join(node[0].keys()).lower()
            for hint in ("period","quarter","qtr","time","display","event","type","team","player","x","y","score"):
                if hint in keys:
                    score += 1
            candidates.append((score, node, "<heuristic>"))
    if candidates:
        candidates.sort(key=lambda t: t[0], reverse=True)
        return candidates[0][1], candidates[0][2]
    return [], "<not found>"

# ---------------- Flattening ----------------
def flatten_dict(d: Dict[str, Any], parent: str = "", sep: str = ".") -> Dict[str, Any]:
    """Flatten nested dicts; lists of dicts => JSON strings; lists of scalars => '|' joined."""
    out: Dict[str, Any] = {}
    for k, v in d.items():
        nk = f"{parent}{sep}{k}" if parent else k
        if isinstance(v, dict):
            out.update(flatten_dict(v, nk, sep))
        elif isinstance(v, list):
            if v and all(isinstance(x, dict) for x in v):
                out[nk] = json.dumps(v, ensure_ascii=False)
            else:
                out[nk] = "|".join(str(x) for x in v)
        else:
            out[nk] = v
    return out

def choose_columns(flat_rows: List[Dict[str, Any]]) -> List[str]:
    preferred = [
        "id",
        "period","period.number","quarter","qtr",
        "time","clock","displayTime","period.displayTime","period.secondsRemaining",
        "type","eventType","playType","subType","result","outcome",
        "x","y","position.x","position.y",
        "teamId","team.id","team.abbrev","team.name",
        "playerId","player.id","player.name","player.playerName.givenName","player.playerName.surname",
        "homeScore","awayScore","score","scoreValue",
    ]
    all_keys = set().union(*[r.keys() for r in flat_rows]) if flat_rows else set()
    ordered = [k for k in preferred if k in all_keys]
    remaining = sorted(all_keys - set(ordered))
    return ordered + remaining

# ---------------- Writing ----------------
def write_csv(rows: List[Dict[str, Any]], path: str) -> None:
    if not rows:
        open(path, "w", encoding="utf-8").close()
        return
    cols = choose_columns(rows)
    with open(path, "w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=cols, extrasaction="ignore")
        w.writeheader()
        for r in rows:
            w.writerow({c: r.get(c, "") for c in cols})

def write_ndjson(rows: List[Dict[str, Any]], path: str) -> None:
    with open(path, "w", encoding="utf-8") as f:
        for r in rows:
            f.write(json.dumps(r, ensure_ascii=False) + "\n")

# ---------------- Run ----------------
token = get_afl_token()
data  = fetch_json_with_token(URL, token)

raw_path = f"{BASE}_raw.json"
with open(raw_path, "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

events, where = find_event_list(data)
flat_rows = [flatten_dict(e) for e in events]

csv_path   = f"{BASE}_plays.csv"
ndjson_path= f"{BASE}_plays.ndjson"
write_csv(flat_rows, csv_path)
write_ndjson(events, ndjson_path)

print("Found events at:", where)
print("Rows:", len(flat_rows))
print("Wrote:\n ", raw_path, "\n ", csv_path, "\n ", ndjson_path)

# Optional preview if pandas is installed
try:
    import pandas as pd
    if os.path.exists(csv_path):
        display(pd.read_csv(csv_path).head(10))
except Exception:
    pass


Found events at: <heuristic>
Rows: 8
Wrote:
  CD_M20250142207_raw.json 
  CD_M20250142207_plays.csv 
  CD_M20250142207_plays.ndjson


Unnamed: 0,x,y,teamId,playerId,behindInfo,description,displayOrder,disposal,periodSeconds,shotAtGoal
0,-62,-15,CD_T80,CD_I1005144,,Loose Ball Get Crumb,1967,,1598,
1,-65,-16,CD_T80,CD_I1005144,,Kick,1968,effective,1600,
2,-40,-16,CD_T80,CD_I1017094,,Uncontested Mark,1969,,1601,
3,-38,-16,CD_T80,CD_I1017094,,Kick,1970,effective,1606,
4,-15,-59,CD_T80,CD_I994077,,Uncontested Mark,1971,,1608,
5,-23,-60,CD_T80,CD_I994077,,Kick,1972,effective,1618,
6,-43,-48,CD_T80,CD_I297354,,Uncontested Mark,1973,,1620,
7,-43,-48,CD_T80,CD_I297354,,Kick,1974,effective,1622,
