In [None]:
# ==========================================
# FRANCE PASS NETWORK (STARTING XI ONLY)
# GEPI-READY | DIRECTED | WC 2018 FINAL
# ==========================================

# Install dependencies
!pip install pandas requests networkx

# -------------------------------
# Mount Google Drive
# -------------------------------
from google.colab import drive
drive.mount('/content/drive')

import os
import requests
import pandas as pd
from collections import Counter

SAVE_DIR = "/content/drive/MyDrive/gephi_france_pass_network_XI"
os.makedirs(SAVE_DIR, exist_ok=True)

# -------------------------------
# Load StatsBomb events
# France vs Croatia – WC 2018 Final
# -------------------------------
MATCH_ID = 8658
EVENTS_URL = f"https://raw.githubusercontent.com/statsbomb/open-data/master/data/events/{MATCH_ID}.json"

events = requests.get(EVENTS_URL).json()
print(f"Loaded {len(events):,} events")

# -------------------------------
# Extract France Starting XI
# -------------------------------
starting_xi = set()

for ev in events:
    if ev.get("type", {}).get("name") == "Starting XI" \
       and ev.get("team", {}).get("name") == "France":
        starting_xi = {
            p["player"]["name"] for p in ev["tactics"]["lineup"]
        }
        break

print("France Starting XI:")
for p in sorted(starting_xi):
    print(" -", p)

# -------------------------------
# Filter France passes (XI only)
# -------------------------------
passes = [
    ev for ev in events
    if ev.get("type", {}).get("name") == "Pass"
    and ev.get("team", {}).get("name") == "France"
]

rows = []
for ev in passes:
    passer = ev.get("player", {}).get("name")
    receiver = ev.get("pass", {}).get("recipient", {}).get("name")

    if passer in starting_xi and receiver in starting_xi:
        rows.append((passer, receiver))

df_passes = pd.DataFrame(rows, columns=["passer", "receiver"])
print(f"Passes among Starting XI: {len(df_passes):,}")

# -------------------------------
# Create Nodes table (numeric IDs)
# -------------------------------
players = sorted(starting_xi)
player2id = {p: i + 1 for i, p in enumerate(players)}

nodes_df = pd.DataFrame({
    "Id": list(player2id.values()),
    "Label": list(player2id.keys()),
    "Team": "France"
})

# -------------------------------
# Create Edges table (Directed)
# -------------------------------
edge_counter = Counter(
    (player2id[row.passer], player2id[row.receiver])
    for row in df_passes.itertuples()
)

edges_df = pd.DataFrame(
    [(u, v, "Directed", w) for (u, v), w in edge_counter.items()],
    columns=["Source", "Target", "Type", "Weight"]
).sort_values("Weight", ascending=False)

# -------------------------------
# Save CSVs for Gephi
# -------------------------------
nodes_path = os.path.join(SAVE_DIR, "france_XI_nodes.csv")
edges_path = os.path.join(SAVE_DIR, "france_XI_edges.csv")

nodes_df.to_csv(nodes_path, index=False)
edges_df.to_csv(edges_path, index=False)

print("\nSaved Gephi-ready files:")
print(nodes_path)
print(edges_path)

# -------------------------------
# Sanity checks
# -------------------------------
print("\nNodes (should be 11):", nodes_df.shape[0])
print("Edges:", edges_df.shape[0])
print("\nTop edges:")
print(edges_df.head())

Mounted at /content/drive
Loaded 2,978 events
France Starting XI:
 - Antoine Griezmann
 - Benjamin Pavard
 - Blaise Matuidi
 - Hugo Lloris
 - Kylian Mbappé Lottin
 - Lucas Hernández Pi
 - N''Golo Kanté
 - Olivier Giroud
 - Paul Pogba
 - Raphaël Varane
 - Samuel Yves Umtiti
Passes among Starting XI: 194

Saved Gephi-ready files:
/content/drive/MyDrive/gephi_france_pass_network_XI/france_XI_nodes.csv
/content/drive/MyDrive/gephi_france_pass_network_XI/france_XI_edges.csv

Nodes (should be 11): 11
Edges: 76

Top edges:
    Source  Target      Type  Weight
40       9       5  Directed       8
27       4       8  Directed       7
23       9       2  Directed       6
35       6       1  Directed       6
67       8       1  Directed       6
