In [9]:
import pandas as pd
import numpy as np
import networkx as nx

# === 1. LOAD DATA ===
df = pd.read_csv(r"/Users/marclambertes/Python/Matches/Men/2024-2025/Eredivisie 2024-2025/AZ 2-1 Ajax.csv")

# === 2. FILTER ONE TEAM (AJAX) ===
OneTeam = df.loc[df['contestantId'] == 'd0zdg647gvgc95xdtk1vpbkys'].reset_index()

# === 3. CREATE TIME COLUMN ===
OneTeam["newsecond"] = 60 * OneTeam["timeMin"] + OneTeam["timeSec"]
OneTeam.sort_values(by='newsecond', inplace=True)

# === 4. IDENTIFY PASSERS AND RECIPIENTS ===
OneTeam['passer'] = OneTeam['playerName']
OneTeam['recipient'] = OneTeam['passer'].shift(-1)

# === 5. FILTER FOR PASSES ===
Passes = OneTeam[OneTeam['typeId'] == 1]
Completions = Passes[Passes['outcome'] == 1].copy()

# === 6. FILTER PASSES BEFORE FIRST SUBSTITUTION ===
Subs = OneTeam[OneTeam['typeId'] == 18]
SubTimes = Subs["newsecond"]
SubOne = SubTimes.min()
Completions = Completions[Completions['newsecond'] < SubOne]

# === 7. CALCULATE AVERAGE LOCATIONS FOR NETWORK ===
average_locs_and_count = Completions.groupby('passer').agg({'x': ['mean'], 'y': ['mean', 'count']})
average_locs_and_count.columns = ['x', 'y', 'count']

# === 8. PASSING LINKS BETWEEN PLAYERS ===
passes_between = Completions.groupby(['passer', 'recipient']).id.count().reset_index()
passes_between.rename(columns={'id': 'pass_count'}, inplace=True)
passes_between = passes_between.merge(average_locs_and_count, left_on='passer', right_index=True)
passes_between = passes_between.merge(average_locs_and_count, left_on='recipient', right_index=True, suffixes=['', '_end'])
passes_between = passes_between[passes_between['pass_count'] > 2]

# === 9. BUILD NETWORK GRAPH ===
G = nx.DiGraph()
for _, row in passes_between.iterrows():
    G.add_edge(row['passer'], row['recipient'], weight=row['pass_count'])
G_undirected = G.to_undirected()

# === 10. CALCULATE NETWORK METRICS ===
in_degree = dict(G.in_degree(weight='weight'))
out_degree = dict(G.out_degree(weight='weight'))
betweenness = nx.betweenness_centrality(G, weight='weight', normalized=True)
closeness = nx.closeness_centrality(G)
eigenvector = nx.eigenvector_centrality_numpy(G, weight='weight')
degree_centrality = nx.degree_centrality(G)
pagerank = nx.pagerank(G, weight='weight')
harmonic_centrality = nx.harmonic_centrality(G)
katz_centrality = nx.katz_centrality_numpy(G, weight='weight')
load_centrality = nx.load_centrality(G, weight='weight')
hubs, authorities = nx.hits(G, max_iter=1000, normalized=True)

# === 11. GLOBAL NETWORK METRICS ===
density = nx.density(G)
reciprocity = nx.reciprocity(G)
assortativity = nx.degree_assortativity_coefficient(G)
avg_clustering = nx.average_clustering(G, weight='weight')
communities = list(nx.community.label_propagation_communities(G_undirected))
modularity = nx.community.quality.modularity(G_undirected, communities)
avg_path_length = nx.average_shortest_path_length(G, weight='weight') if nx.is_strongly_connected(G) else None
graph_radius = nx.radius(G_undirected) if nx.is_connected(G_undirected) else None
graph_periphery = list(nx.periphery(G_undirected)) if nx.is_connected(G_undirected) else None
graph_center = list(nx.center(G_undirected)) if nx.is_connected(G_undirected) else None
articulation_points = list(nx.articulation_points(G_undirected))
bridges = list(nx.bridges(G_undirected))

# === 12. COMBINE INTO PLAYER-LEVEL METRICS DATAFRAME ===
metrics = pd.DataFrame({
    'In-Degree': in_degree,
    'Out-Degree': out_degree,
    'Betweenness': betweenness,
    'Closeness': closeness,
    'Eigenvector': eigenvector,
    'Degree Centrality': degree_centrality,
    'PageRank': pagerank,
    'Harmonic Centrality': harmonic_centrality,
    'Katz Centrality': katz_centrality,
    'Load Centrality': load_centrality,
    'Hubs': hubs,
    'Authorities': authorities
}).fillna(0).sort_values(by='Betweenness', ascending=False)

# === 13. PRINT GLOBAL METRICS ===
print("Global Metrics:")
print(f"Network Density: {density:.4f}")
print(f"Network Reciprocity: {reciprocity:.4f}")
print(f"Network Assortativity: {assortativity:.4f}")
print(f"Modularity: {modularity:.4f}")
if avg_path_length is not None:
    print(f"Average Path Length: {avg_path_length:.4f}")
print(f"Graph Radius: {graph_radius}")
print(f"Graph Periphery: {graph_periphery}")
print(f"Graph Center: {graph_center}")
print(f"Number of Articulation Points: {len(articulation_points)}")
print(f"Number of Bridges: {len(bridges)}")

print("\nPlayer-Level Metrics:")
print(metrics)

# === 14. EXPORT PLAYER METRICS ===
metrics.to_csv('pass_network_metrics_expanded.csv', index=True)

# === 15. EXTRACT endX and endY FROM QUALIFIERS ===
type_cols = [col for col in df.columns if '/qualifierId' in col]
df['endX'] = 0.0
df['endY'] = 0.0

for i in range(len(df)):
    for j in range(len(type_cols)):
        if df.at[i, type_cols[j]] == 140:
            df.at[i, 'endX'] = df.at[i, f'qualifier/{j}/value']
        elif df.at[i, type_cols[j]] == 141:
            df.at[i, 'endY'] = df.at[i, f'qualifier/{j}/value']

# === 16. MERGE endX/endY INTO PASSES ===
Completions = Completions.merge(df[['id', 'endX', 'endY']], on='id', how='left')

# === 17. ENSURE NUMERIC COLUMNS ===
cols_to_convert = ['x', 'y', 'endX', 'endY']
for col in cols_to_convert:
    Completions[col] = pd.to_numeric(Completions[col], errors='coerce')

Completions.dropna(subset=['x', 'y', 'endX', 'endY'], inplace=True)

# === 18. CALCULATE PASS LENGTH, RECEIVER VALUE, AND TOTAL VALUE ===
Completions['passer_betweenness'] = Completions['passer'].map(betweenness)
Completions['recipient_betweenness'] = Completions['recipient'].map(betweenness)

# Euclidean pass length
Completions['pass_length'] = np.sqrt(
    (Completions['endX'] - Completions['x']) ** 2 +
    (Completions['endY'] - Completions['y']) ** 2
)

# Receiver-only value
Completions['receiver_value'] = Completions['pass_length'] * Completions['recipient_betweenness']

# Combined value (passer + recipient)
Completions['centrality_pass_value'] = Completions['pass_length'] * (
    (Completions['passer_betweenness'] + Completions['recipient_betweenness']) / 2
)

# === 19. DISPLAY TOP VALUED PASSES ===
top_valued_passes = Completions.sort_values(by='centrality_pass_value', ascending=False)
print("\nTop Centrality-Weighted Passes (with Receiver Value):")
print(top_valued_passes[['passer', 'recipient', 'pass_length',
                         'passer_betweenness', 'recipient_betweenness',
                         'receiver_value', 'centrality_pass_value']].head(10))

# === 20. EXPORT TO EXCEL ===
Completions.to_excel('centrality_weighted_passes_with_receiver.xlsx', index=False)


  A = nx.adjacency_matrix(G, nodelist=nodelist, weight=weight).todense().T
  A = nx.adjacency_matrix(G, nodelist=list(G), dtype=float)


Global Metrics:
Network Density: 0.3727
Network Reciprocity: 0.7805
Network Assortativity: 0.0260
Modularity: 0.0000
Graph Radius: 2
Graph Periphery: ['C. Akpom', 'W. Weghorst', 'D. Klaassen']
Graph Center: ['B. van den Boomen', 'A. Gaaei', 'J. Šutalo', 'R. Pasveer', 'S. Berghuis', 'J. Hato', 'Y. Baas', 'K. Taylor']
Number of Articulation Points: 0
Number of Bridges: 0

Player-Level Metrics:
                   In-Degree  Out-Degree  Betweenness  Closeness  Eigenvector  \
B. van den Boomen         24          34     0.296296   0.581818     0.299266   
J. Hato                   23          26     0.131481   0.492308     0.266832   
S. Berghuis               16          12     0.122222   0.492308     0.144472   
Y. Baas                   56          59     0.105556   0.640000     0.561288   
A. Gaaei                  21          19     0.066667   0.492308     0.216022   
R. Pasveer                31          34     0.055556   0.581818     0.373406   
K. Taylor                 15          

In [13]:
import pandas as pd
import numpy as np
import networkx as nx

# === 1. LOAD MATCH DATA ===
df = pd.read_csv(r"/Users/marclambertes/Python/Matches/Men/2024-2025/Eredivisie 2024-2025/AZ 2-1 Ajax.csv")

# === 2. FILTER ONE TEAM (Ajax) ===
OneTeam = df[df['contestantId'] == 'd0zdg647gvgc95xdtk1vpbkys'].reset_index()

# === 3. CREATE TIME COLUMN ===
OneTeam["newsecond"] = 60 * OneTeam["timeMin"] + OneTeam["timeSec"]
OneTeam.sort_values(by='newsecond', inplace=True)

# === 4. IDENTIFY PASSERS AND RECIPIENTS ===
OneTeam['passer'] = OneTeam['playerName']
OneTeam['recipient'] = OneTeam['passer'].shift(-1)

# === 5. FILTER COMPLETED PASSES ===
Passes = OneTeam[OneTeam['typeId'] == 1]
Completions = Passes[Passes['outcome'] == 1].copy()

# === 6. REMOVE PASSES AFTER FIRST SUBSTITUTION ===
Subs = OneTeam[OneTeam['typeId'] == 18]
SubOne = Subs["newsecond"].min()
Completions = Completions[Completions['newsecond'] < SubOne]

# === 7. AVERAGE LOCATIONS FOR PASS NETWORK ===
average_locs = Completions.groupby('passer').agg({'x': ['mean'], 'y': ['mean', 'count']})
average_locs.columns = ['x', 'y', 'count']

# === 8. PASSING LINKS ===
passes_between = Completions.groupby(['passer', 'recipient']).id.count().reset_index()
passes_between.rename(columns={'id': 'pass_count'}, inplace=True)
passes_between = passes_between.merge(average_locs, left_on='passer', right_index=True)
passes_between = passes_between.merge(average_locs, left_on='recipient', right_index=True, suffixes=['', '_end'])
passes_between = passes_between[passes_between['pass_count'] > 2]

# === 9. BUILD NETWORK GRAPH ===
G = nx.DiGraph()
for _, row in passes_between.iterrows():
    G.add_edge(row['passer'], row['recipient'], weight=row['pass_count'])

# === 10. CALCULATE CENTRALITY METRICS ===
betweenness = nx.betweenness_centrality(G, weight='weight', normalized=True)

# === 11. ADD endX / endY FROM QUALIFIERS ===
type_cols = [col for col in df.columns if '/qualifierId' in col]
df['endX'] = 0.0
df['endY'] = 0.0
for i in range(len(df)):
    for j in range(len(type_cols)):
        if df.at[i, type_cols[j]] == 140:
            df.at[i, 'endX'] = df.at[i, f'qualifier/{j}/value']
        elif df.at[i, type_cols[j]] == 141:
            df.at[i, 'endY'] = df.at[i, f'qualifier/{j}/value']

# === 12. MERGE endX / endY INTO COMPLETIONS ===
Completions = Completions.merge(df[['id', 'endX', 'endY']], on='id', how='left')

# === 13. CLEAN COORDINATES (Ensure Numeric) ===
for col in ['x', 'y', 'endX', 'endY']:
    df[col] = pd.to_numeric(df[col], errors='coerce')
    Completions[col] = pd.to_numeric(Completions[col], errors='coerce')

df.dropna(subset=['x', 'y', 'endX', 'endY'], inplace=True)
Completions.dropna(subset=['x', 'y', 'endX', 'endY'], inplace=True)

# === 14. CENTRALITY SCORES PER PASS ===
Completions['passer_betweenness'] = Completions['passer'].map(betweenness)
Completions['recipient_betweenness'] = Completions['recipient'].map(betweenness)

# === 15. CALCULATE PASS LENGTH ===
Completions['pass_length'] = np.sqrt(
    (Completions['endX'] - Completions['x']) ** 2 +
    (Completions['endY'] - Completions['y']) ** 2
)

# === 16. CENTRALITY-BASED VALUE ===
Completions['receiver_value'] = Completions['pass_length'] * Completions['recipient_betweenness']
Completions['centrality_pass_value'] = Completions['pass_length'] * (
    (Completions['passer_betweenness'] + Completions['recipient_betweenness']) / 2
)

# === 17. LOAD MATRIX-BASED xT GRID ===
xT = pd.read_csv("xT_grid.csv", header=None)
xT = np.array(xT)
xT_rows, xT_cols = xT.shape
print(f"xT grid shape: {xT_rows} rows × {xT_cols} cols")

# === 18. BIN COORDINATES INTO xT ZONES (df contains full events) ===
df['x1_bin'] = pd.cut(df['x'], bins=xT_cols, labels=False)
df['y1_bin'] = pd.cut(df['y'], bins=xT_rows, labels=False)
df['x2_bin'] = pd.cut(df['endX'], bins=xT_cols, labels=False)
df['y2_bin'] = pd.cut(df['endY'], bins=xT_rows, labels=False)

df.dropna(subset=['x1_bin', 'y1_bin', 'x2_bin', 'y2_bin'], inplace=True)
df[['x1_bin', 'y1_bin', 'x2_bin', 'y2_bin']] = df[['x1_bin', 'y1_bin', 'x2_bin', 'y2_bin']].astype(int)

# === 19. LOOKUP xT VALUES FROM MATRIX ===
df['start_zone_value'] = df[['y1_bin', 'x1_bin']].apply(lambda x: xT[x[0]][x[1]], axis=1)
df['end_zone_value'] = df[['y2_bin', 'x2_bin']].apply(lambda x: xT[x[0]][x[1]], axis=1)
df['xt_value'] = df['end_zone_value'] - df['start_zone_value']

# === 20. MERGE xT VALUE INTO COMPLETIONS ===
Completions = Completions.merge(df[['id', 'xt_value']], on='id', how='left')

# === 21. DISPLAY TOP PASSES ===
top_passes = Completions.sort_values(by='centrality_pass_value', ascending=False)
print("\nTop Passes (Centrality + xT):")
print(top_passes[['passer', 'recipient', 'pass_length',
                  'passer_betweenness', 'recipient_betweenness',
                  'receiver_value', 'centrality_pass_value', 'xt_value']].head(10))

# === 22. EXPORT TO EXCEL ===
Completions.to_excel("passes_with_centrality_and_xt_matrix.xlsx", index=False)


xT grid shape: 8 rows × 12 cols

Top Passes (Centrality + xT):
                passer    recipient  pass_length  passer_betweenness  \
125  B. van den Boomen      J. Hato    67.239349            0.296296   
56   B. van den Boomen  S. Berghuis    58.640941            0.296296   
54   B. van den Boomen      J. Hato    43.830697            0.296296   
233  B. van den Boomen      Y. Baas    43.111599            0.296296   
128  B. van den Boomen      Y. Baas    40.958516            0.296296   
187          K. Taylor  S. Berghuis    91.930898            0.055556   
87   B. van den Boomen   R. Pasveer    42.837834            0.296296   
245  B. van den Boomen    J. Šutalo    49.857798            0.296296   
38   B. van den Boomen     A. Gaaei    40.428208            0.296296   
80   B. van den Boomen  S. Berghuis    34.693083            0.296296   

     recipient_betweenness  receiver_value  centrality_pass_value  xt_value  
125               0.131481        8.840729              14.381750 