In [91]:
from statsbombpy import sb
import pandas as pd
import numpy as np

In [92]:
df = sb.matches(competition_id=11, season_id=27)

In [93]:
df.columns

Index(['match_id', 'match_date', 'kick_off', 'competition', 'season',
       'home_team', 'away_team', 'home_score', 'away_score', 'match_status',
       'match_status_360', 'last_updated', 'last_updated_360', 'match_week',
       'competition_stage', 'stadium', 'referee', 'home_managers',
       'away_managers', 'data_version', 'shot_fidelity_version',
       'xy_fidelity_version'],
      dtype='object')

In [94]:
teams1 = df['home_team'].unique()
teams2 = df['away_team'].unique()

In [95]:
teams = set(teams1) | set(teams2)

In [96]:
def get_team_id_matches(team, df):
    idvalues = df[(df['home_team'] == team) | (df['away_team'] == team)]['match_id'].unique()
    return idvalues

In [100]:
def get_players(team, df):
    #given a team, it will return its players
    idvalues = get_team_id_matches(team, df)
    players = set()
    for id in idvalues:
        lineup = sb.lineups(match_id=id)[team]
        players.update(lineup['player_name'].unique())
    print(players)
    return players

In [101]:
lineups = {}

for team in teams:
    lineups[f'lineup_{team}'] = set() 
    lineups[f'lineup_{team}'].update(get_players(team, df=df))  

{'Mickaël Ciani', 'Francesco Bardi', 'Óscar Esau Duarte Gaitán', 'Joan Jordán Moreno', 'Mamadou Sylla Diallo', 'Marco Asensio Willemsen', 'Gerard Moreno Balaguero', 'Víctor Guillermo Álvarez Delgado', 'Roberto Antonio Correa Silva', 'Juan Rafael Fuentes Hernández', 'Lluís López Mármol', 'Antonio José Raillo Arenas', 'Jorge Franco Alviz', 'Álvaro González Soberón', 'José Alberto Cañas Ruiz Herrera', 'Víctor Sánchez Mata', 'Anaitz Arbilla Zabala', 'Francisco Montañés Claverías', 'Giedrius Arlauskis', 'Rubén Duarte Sánchez', 'Javier López Rodríguez', 'Andrés Tomás Prieto Albert', 'Enzo Pablo Roco Roco', 'Hernán Arsenio Pérez González', 'Abraham González Casanova', 'Pau López Sabata', 'Papa Kouly Diop', 'Salvador Sevilla López', 'Felipe Salvador Caicedo Corozo'}
{'Ignacio Camacho Barnola', 'Arthur Etienne Boka', 'Aarón Escandell Banacloche', 'Juan Carlos Pérez López', 'Idriss Carlos Kameni', 'Nordin Amrabat', 'Marcos Alberto Angeleri', 'Francisco Guillermo Ochoa Magaña', 'Juan Pablo Añor A

In [124]:
def get_events(team_ids):
    events = pd.DataFrame()
    for id in team_ids:
        match_events = sb.events(match_id=id)
        events = pd.concat([events, match_events], axis=0, ignore_index=True)
    return events

In [126]:
def calculate_passes_ij(player1, player2, events_df):
    #calculates probability of pass from player 1 to player 2 based on an estimator
    #relevant events (pass)
    pass_events = events_df[events_df['type']=='Pass']
    
    #relevant passes (players)
    pass_events_ij = pass_events[(pass_events['player'] == player1) & (pass_events['pass_recipient'] == player2)]
    total_i_pass = pass_events[pass_events['player']==player1]

    #length
    passes_ij = len(pass_events_ij)
    passes_i = len(total_i_pass)


    return passes_ij/passes_i if passes_i >0 else 0

In [127]:
def calculate_pass_matrix(team, df, lineups, team_events):
    # First, we get the IDs of the matches that the team took part in
    #team_ids = get_team_id_matches(team, df)
    
    # Now, we get the players
    players = lineups[f'lineup_{team}']
    
    # Initialize an empty pass matrix for the given team
    num_players = len(players)
    pass_matrix = np.zeros((num_players, num_players))

    #i want to keep track og which players represent the [i][j] values
    player_index_map = {player: i for i, player in enumerate(players)}

    
    # Now, we get all of the events relevant to the team
    #team_events = get_events(team_ids)

    print(f"Building pass matrix for '{team}'...")
    for i, player1 in enumerate(players):
        for j, player2 in enumerate(players):
            if player1 != player2:
                pass_matrix[i, j] = calculate_passes_ij(player1, player2, team_events)
        
        #print(f"Finished processing passes for {player1}. Access pass proportions with index {i}")

    print("Pass matrix construction complete.")
    #print(f"Pass Matrix for '{team}':")
    
    return pass_matrix, player_index_map

In [128]:
import warnings
#para que no aparezca el warning
warnings.filterwarnings("ignore", category=UserWarning, module='statsbombpy')

In [129]:
pass_matrices = {}
player_index_map = {}

In [131]:
for team in teams:
    team_ids = get_team_id_matches(team, df)
    team_events = get_events(team_ids)
    pass_matrices[team], player_index_map[team] = calculate_pass_matrix(team, df, lineups, team_events)

Building pass matrix for 'Espanyol'...
Pass matrix construction complete.
Building pass matrix for 'Málaga'...
Pass matrix construction complete.
Building pass matrix for 'Barcelona'...
Pass matrix construction complete.
Building pass matrix for 'Getafe'...
Pass matrix construction complete.
Building pass matrix for 'Granada'...
Pass matrix construction complete.
Building pass matrix for 'RC Deportivo La Coruña'...
Pass matrix construction complete.
Building pass matrix for 'Eibar'...
Pass matrix construction complete.
Building pass matrix for 'Villarreal'...
Pass matrix construction complete.
Building pass matrix for 'Celta Vigo'...
Pass matrix construction complete.
Building pass matrix for 'Valencia'...
Pass matrix construction complete.
Building pass matrix for 'Real Betis'...
Pass matrix construction complete.
Building pass matrix for 'Sevilla'...
Pass matrix construction complete.
Building pass matrix for 'Atlético Madrid'...
Pass matrix construction complete.
Building pass matri

In [132]:
def pass_effectiveness(matrix, player_index_maps):
    data = []

    #keys have the team values
    for team in matrix.keys():
        # we get the pass matrix of the team
        pass_matrix = matrix[team]
        # now the players  index for the team
        player_index_map = player_index_maps[team]
        
        # go over each player in the pass matrix
        for i in range(len(pass_matrix)):
            # sum  values for the current player
            e = np.sum(pass_matrix[i])
            
            # now we will get the player name from the index to be able to link the pass effectivenesss
            player_name = [name for name, idx in player_index_map.items() if idx == i][0]
            
            # add this data to a list
            data.append({'team': team, 'player_name': player_name, 'pass_effectiveness': e})

    # add all data to a df
    df = pd.DataFrame(data, columns=['team', 'player_name', 'pass_effectiveness'])
    
    return df

In [133]:
df_pass_effectiveness = pass_effectiveness(pass_matrices, player_index_map)

In [134]:
df_pass_effectiveness

Unnamed: 0,team,player_name,pass_effectiveness
0,Espanyol,Antonio José Raillo Arenas,0.938053
1,Espanyol,Jorge Franco Alviz,0.851163
2,Espanyol,Salvador Sevilla López,0.924699
3,Espanyol,Mickaël Ciani,0.885417
4,Espanyol,Juan Rafael Fuentes Hernández,0.939828
...,...,...,...
607,Real Sociedad,Igor Zubeldia Elorza,1.000000
608,Real Sociedad,Alberto De La Bella Madureño,0.882426
609,Real Sociedad,Jonathas Cristian de Jesus,0.875000
610,Real Sociedad,Sergio Canales Madrazo,0.880626


In [135]:
df_pass_effectiveness_sorted = df_pass_effectiveness.sort_values(by='pass_effectiveness', ascending=False)

In [145]:
df_pass_effectiveness_sorted[(df_pass_effectiveness_sorted['pass_effectiveness'] < 1.0) & 
                             (df_pass_effectiveness_sorted['pass_effectiveness'] > 0.0)]


Unnamed: 0,team,player_name,pass_effectiveness
504,Real Madrid,Marcos Llorente Moreno,1.000000
253,Celta Vigo,Diego Alende López,1.000000
564,Rayo Vallecano,Luis Carlos Fariña Olivera,1.000000
223,Villarreal,Rodrigo Hernández Cascante,0.986667
86,Barcelona,Arda Turan,0.982372
...,...,...,...
350,Sevilla,Gaël Kakuta,0.533333
267,Valencia,Carlos Carbonell Gil,0.500000
43,Málaga,Hachim Mastour,0.500000
392,Atlético Madrid,Guilherme Magdalena Siqueira,0.428571
