In [268]:
## imports

import pandas as pd
import numpy as np

from tqdm import tqdm

In [242]:
# other imports
from itscalledsoccer.client import AmericanSoccerAnalysis
asa_client = AmericanSoccerAnalysis()

Gathering all players
Gathering all teams
Gathering all stadia
Gathering all managers
Gathering all referees
Finished initializing client


In [3]:
## Getting the data

In [3]:
stadia = asa_client.get_stadia(leagues='mls')

In [4]:
stadia.columns.to_list()

['stadium_id',
 'stadium_name',
 'capacity',
 'year_built',
 'roof',
 'turf',
 'street',
 'city',
 'province',
 'country',
 'postal_code',
 'latitude',
 'longitude',
 'field_x',
 'field_y',
 'competition']

In [5]:
refrees = asa_client.get_referees(leagues='mls')
refrees.columns.to_list()

['referee_id', 'referee_name', 'nationality', 'birth_date', 'competition']

In [6]:
managers = asa_client.get_managers(leagues='mls')
managers.columns.to_list()

['manager_id', 'manager_name', 'nationality', 'competition']

In [7]:
teams = asa_client.get_teams(leagues='mls')
teams.columns.to_list()

['team_id', 'team_name', 'team_short_name', 'team_abbreviation', 'competition']

In [8]:
players = asa_client.get_players(leagues='mls')
players.columns.to_list()

['player_id',
 'player_name',
 'birth_date',
 'nationality',
 'primary_broad_position',
 'primary_general_position',
 'secondary_broad_position',
 'secondary_general_position',
 'season_name',
 'height_ft',
 'height_in',
 'weight_lb',
 'competition']

In [9]:
games = asa_client.get_games(leagues='mls')
games.columns.to_list()

['game_id',
 'date_time_utc',
 'home_score',
 'away_score',
 'home_team_id',
 'away_team_id',
 'referee_id',
 'stadium_id',
 'home_manager_id',
 'away_manager_id',
 'expanded_minutes',
 'season_name',
 'matchday',
 'attendance',
 'knockout_game',
 'last_updated_utc',
 'extra_time',
 'penalties',
 'home_penalties',
 'away_penalties']

In [48]:
games['season_name']

0       2023
1       2023
2       2023
3       2023
4       2023
        ... 
4411    2013
4412    2013
4413    2013
4414    2013
4415    2013
Name: season_name, Length: 4416, dtype: int64

## Filtering out the players of interest

* Selecting only the wingers

In [126]:
# midfielders, primary position
players_midfield = players[players['primary_broad_position']== 'MF']

In [88]:
# wingers filteration, skip for now
# players_wingers = players_midfield[players_midfield['primary_general_position'] == 'W']

In [89]:
# strict wingers who do not have a secondary position defined, skip for now
# players_wingers = players_wingers[players_wingers['secondary_broad_position'].isnull()]

In [127]:
# select the wingers who players in sesons 2020 to 2023
start_season = 2020
end_season = 2023
desired_range = [str(yr) for yr in list(range(start_season, end_season+1))]

players_midfield = players_midfield[players_midfield['season_name'].apply(lambda seasons: set(seasons).issuperset(set(desired_range)))]

In [130]:
players_midfield

Unnamed: 0,player_id,player_name,birth_date,nationality,primary_broad_position,primary_general_position,secondary_broad_position,secondary_general_position,season_name,height_ft,height_in,weight_lb,competition
805,0Oq632k7Q6,Danny Leyva,2003-05-05,USA,MF,CM,,DM,"[2019, 2020, 2021, 2022, 2023]",5.0,10.0,139.0,mls
850,0Oq6lrerM6,Gastón Giménez,1991-07-27,Argentina,MF,DM,,,"[2020, 2021, 2022, 2023]",6.0,2.0,181.0,mls
854,0Oq6lyNdM6,Wil Trapp,1993-01-15,USA,MF,DM,,,"[2013, 2014, 2015, 2016, 2017, 2018, 2019, 202...",5.0,8.0,152.0,mls
862,0Oq6w947Q6,Michael Baldisimo,2000-04-13,Canada,MF,CM,,DM,"[2020, 2021, 2022, 2023]",5.0,6.0,154.0,mls
867,0Oq6wel7Q6,Cameron Duke,2001-02-13,USA,MF,CM,,,"[2020, 2021, 2022, 2023]",5.0,7.0,135.0,mls
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3451,XVqKYp2DM0,Yuya Kubo,1993-12-24,Japan,MF,CM,,,"[2020, 2021, 2022, 2023]",5.0,10.0,159.0,mls
3478,xW5prbllQg,Artur,1996-03-11,Brazil,MF,DM,,,"[2017, 2018, 2019, 2020, 2021, 2022, 2023]",5.0,11.0,150.0,mls
3481,xW5prK4BQg,Ariel Lassiter,1994-09-27,Costa Rica,MF,W,,,"[2015, 2016, 2017, 2018, 2020, 2021, 2022, 2023]",5.0,10.0,154.0,mls
3552,zeQZJN9jQK,Lucas Zelarayán,1992-06-20,Argentina,MF,AM,,,"[2020, 2021, 2022, 2023]",5.0,8.0,154.0,mls


In [260]:
list(players_midfield.player_id.unique())

['0Oq632k7Q6',
 '0Oq6lrerM6',
 '0Oq6lyNdM6',
 '0Oq6w947Q6',
 '0Oq6wel7Q6',
 '0Oq6wjlzQ6',
 '0Oq6wv26Q6',
 '0Oq6wxj6Q6',
 '0x5gXlrq7O',
 '2lqRk3lnQr',
 '2lqRkYZnQr',
 '2lqRkZXDQr',
 '2lqRR4Rxqr',
 '2lqRRvNxqr',
 '2lqRRZy2qr',
 '2vQ14B6Kqr',
 '2vQ1LYxbqr',
 '2vQ1LYY0qr',
 '2vQ1Nv7LMr',
 '315VXoL7Q9',
 '4JMA97YkMK',
 '4JMAaaxkMK',
 '4JMAoJgYqK',
 '4wM42Belqj',
 '7vQ73NxYQD',
 '7vQ7BmAzqD',
 '7vQ7BYdeqD',
 '7vQ7rmxEqD',
 '7vQ7rPpOqD',
 '7vQ7rvNDqD',
 '7VqGj3RA5v',
 '7VqGjBpz5v',
 '9vQ22OOoQK',
 '9vQ22pVaQK',
 '9vQ2d3X75K',
 '9vQ2dAKY5K',
 'a35rKZZwqL',
 'a35rlyZ4QL',
 'a35ryXO2ML',
 'aDQ0PjnGQE',
 'aDQ0PwXWQE',
 'BLMv1GlQxe',
 'BLMvlPy3qx',
 'e7Mz9DgGQr',
 'e7MzKKJ5r0',
 'EGMP1jAA5a',
 'eV5D9A9qKn',
 'eV5DLDVJ5K',
 'eV5DLedE5K',
 'eVq3a0ZgMW',
 'eVq3a8XyMW',
 'eVq3apWvMW',
 'eVq3jYZv5W',
 'eVq3xk1VQW',
 'eVq3xXDDQW',
 'gjMNv7PpMK',
 'gjMNvjYvMK',
 'gOMn66nAMw',
 'gOMnR96lMw',
 'gOMnRPxXMw',
 'gOMnRxreMw',
 'gpMOa6zrqz',
 'gpMOoOpOMz',
 'gpMOyGlqzy',
 'jYQJOaZMGR',
 'KAqB9KxB5b',
 'KAqBgZVQ

### Extracting the xgoals per game of one player using his player_id

In [221]:
desired_range

['2020', '2021', '2022', '2023']

In [222]:
player_xgoals = asa_client.get_player_xgoals(leagues = 'mls', player_ids = "0Oq632k7Q6", season_name=desired_range, split_by_games = True)

In [223]:
player_xgoals.columns.to_list()

['player_id',
 'game_id',
 'team_id',
 'general_position',
 'minutes_played',
 'shots',
 'shots_on_target',
 'goals',
 'xgoals',
 'xplace',
 'goals_minus_xgoals',
 'key_passes',
 'primary_assists',
 'xassists',
 'primary_assists_minus_xassists',
 'xgoals_plus_xassists',
 'points_added',
 'xpoints_added']

In [224]:
player_xgoals

Unnamed: 0,player_id,game_id,team_id,general_position,minutes_played,shots,shots_on_target,goals,xgoals,xplace,goals_minus_xgoals,key_passes,primary_assists,xassists,primary_assists_minus_xassists,xgoals_plus_xassists,points_added,xpoints_added
0,0Oq632k7Q6,0Oq660Agq6,jYQJ19EqGR,DM,7,0,0,0,0.0,0.0,0.0,0,0,0.0000,0.0000,0.0000,0,0.0
1,0Oq632k7Q6,0Oq6BXX2q6,jYQJ19EqGR,FB,11,0,0,0,0.0,0.0,0.0,0,0,0.0000,0.0000,0.0000,0,0.0
2,0Oq632k7Q6,0Oq6OArxQ6,jYQJ19EqGR,DM,12,0,0,0,0.0,0.0,0.0,0,0,0.0000,0.0000,0.0000,0,0.0
3,0Oq632k7Q6,4JMAVBBD5K,jYQJ19EqGR,ST,4,0,0,0,0.0,0.0,0.0,0,0,0.0000,0.0000,0.0000,0,0.0
4,0Oq632k7Q6,4JMAyJnYMK,pzeQZ6xQKw,CM,15,0,0,0,0.0,0.0,0.0,1,0,0.2624,-0.2624,0.2624,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,0Oq632k7Q6,wvq9RmW1QW,jYQJ19EqGR,DM,96,0,0,0,0.0,0.0,0.0,0,0,0.0000,0.0000,0.0000,0,0.0
60,0Oq632k7Q6,XVqKBxVO50,jYQJ19EqGR,DM,73,0,0,0,0.0,0.0,0.0,2,0,0.2379,-0.2379,0.2379,0,0.0
61,0Oq632k7Q6,xW5pEgVYqg,pzeQZ6xQKw,AM,8,0,0,0,0.0,0.0,0.0,0,0,0.0000,0.0000,0.0000,0,0.0
62,0Oq632k7Q6,xW5pVBdYQg,jYQJ19EqGR,DM,48,0,0,0,0.0,0.0,0.0,0,0,0.0000,0.0000,0.0000,0,0.0


### Filtering out the player matches where he played less than 10 mins and did not play as CM

In [225]:
#condition1 = player_xgoals['general_position'] == 'CM'
condition2 = player_xgoals['minutes_played']>=10

In [226]:
player_xgoals=player_xgoals[condition2]

In [227]:
player_xgoals

Unnamed: 0,player_id,game_id,team_id,general_position,minutes_played,shots,shots_on_target,goals,xgoals,xplace,goals_minus_xgoals,key_passes,primary_assists,xassists,primary_assists_minus_xassists,xgoals_plus_xassists,points_added,xpoints_added
1,0Oq632k7Q6,0Oq6BXX2q6,jYQJ19EqGR,FB,11,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,0.0
2,0Oq632k7Q6,0Oq6OArxQ6,jYQJ19EqGR,DM,12,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,0.0
4,0Oq632k7Q6,4JMAyJnYMK,pzeQZ6xQKw,CM,15,0,0,0,0.0,0.0,0.0,1,0,0.2624,-0.2624,0.2624,0,0.0
5,0Oq632k7Q6,7vQ7nmaGqD,jYQJ19EqGR,FB,38,2,0,0,0.0521,-0.0521,-0.0521,1,0,0.0494,-0.0494,0.1015,0,0.0475
6,0Oq632k7Q6,7vQ7nmJLqD,jYQJ19EqGR,FB,47,1,0,0,0.1048,-0.1048,-0.1048,1,0,0.031,-0.031,0.1358,0,0.1618
7,0Oq632k7Q6,7VqG8GdxQv,pzeQZ6xQKw,CM,91,2,2,0,0.0706,0.4976,-0.0706,2,0,0.148,-0.148,0.2186,0,0.0505
9,0Oq632k7Q6,9vQ2B0GrQK,pzeQZ6xQKw,DM,10,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,0.0
10,0Oq632k7Q6,9Yqd3nrL5v,jYQJ19EqGR,CM,57,0,0,0,0.0,0.0,0.0,1,0,0.0465,-0.0465,0.0465,0,0.0
11,0Oq632k7Q6,9Yqd8B08qv,jYQJ19EqGR,DM,84,1,1,0,0.0299,0.0785,-0.0299,0,0,0.0,0.0,0.0299,0,0.0257
12,0Oq632k7Q6,9YqdZE4mMv,pzeQZ6xQKw,ST,16,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,0.0


In [229]:
# adding the season name to the above dataset

player_xgoals = pd.merge(player_xgoals, games, on='game_id', how='left')

In [230]:
player_xgoals.columns

Index(['player_id', 'game_id', 'team_id', 'general_position', 'minutes_played',
       'shots', 'shots_on_target', 'goals', 'xgoals', 'xplace',
       'goals_minus_xgoals', 'key_passes', 'primary_assists', 'xassists',
       'primary_assists_minus_xassists', 'xgoals_plus_xassists',
       'points_added', 'xpoints_added', 'date_time_utc', 'home_score',
       'away_score', 'home_team_id', 'away_team_id', 'referee_id',
       'stadium_id', 'home_manager_id', 'away_manager_id', 'expanded_minutes',
       'season_name', 'matchday', 'attendance', 'knockout_game',
       'last_updated_utc', 'extra_time', 'penalties', 'home_penalties',
       'away_penalties'],
      dtype='object')

In [235]:
temp1 = player_xgoals[['player_id','minutes_played', 'xgoals','season_name']].sort_values(by='season_name')

In [236]:
## second player df

In [247]:
player2_xgoals = asa_client.get_player_xgoals(leagues = 'mls', player_ids = "0Oq6lrerM6", season_name=desired_range, split_by_games = True)

In [250]:
condition2 = player2_xgoals['minutes_played']>=10

In [251]:
player2_xgoals=player2_xgoals[condition2]

In [252]:
player2_xgoals = pd.merge(player2_xgoals, games, on='game_id', how='left')

In [253]:
temp2 = player2_xgoals[['player_id','xgoals','season_name']].sort_values(by='season_name')

In [258]:
temp2.shape

(96, 3)

In [232]:
## function to provide the specific metric for each player as above 

In [403]:
def filter_metric(players_df, games_df, client=asa_client, method='get_player_xgoals', start_season=2020, end_season=2023):
    '''
    returns df of player specific metric in the given season start to end ranges
    returned df contains the metric and the season_name column
    '''
    desired_range = [str(yr) for yr in list(range(start_season, end_season+1))]
    players_df = players_df[players_df['season_name'].apply(lambda seasons: set(seasons).issuperset(set(desired_range)))]    
    
    # getting the specific metric
    player_id_list = list(players_df.player_id.unique())
    
    dfs = []
    
    # get the method based on the client and the method name specified as arguments
    if hasattr(client, method):
        func = getattr(client, method)
    
    # columns to keep based on provided method
    
    if method == 'get_player_xgoals':
        columns_to_keep = ['player_id', 'minutes_played','xgoals', 'goals', 'season_name']
    if method == 'get_player_xpass':
        columns_to_keep = ['player_id', 'minutes_played', 'attempted_passes', 'pass_completion_percentage','season_name']
    if method == 'get_player_goals_added' or method == 'get_goalkeeper_goals_added':
        columns_to_keep = ['player_id', 'minutes_played', 'data', 'season_name']   
    if method == 'get_goalkeeper_xgoals':
        columns_to_keep = ['player_id', 'minutes_played', 'xgoals_gk_faced', 'saves', 'goals_conceded','season_name']         
        
    # iterate to get the dfs per player id
    for player_id in tqdm(player_id_list):
        print(f"Getting the metrics for {player_id}") 
        df = func(leagues = 'mls', player_ids = player_id, season_name=desired_range, split_by_games = True)
        
        # filter to meet minimum play time
        df = df[df['minutes_played']>=10]
        
        # add season name to the df
        df = pd.merge(df, games_df, on='game_id', how='left')
        
        # keep only wanted columns
        df = df[columns_to_keep].sort_values(by='season_name')
        
        dfs.append(df)
    
    # concatenate and return result
    df_concat = pd.concat(dfs, ignore_index=True)
    
    return df_concat

In [309]:
temp = filter_metric(players_midfield, games, asa_client, 'get_player_xgoals', start_season=2020, end_season=2023)

  6%|████▉                                                                             | 7/115 [00:00<00:01, 63.76it/s]

Getting the metrics for 0Oq632k7Q6
Getting the metrics for 0Oq6lrerM6
Getting the metrics for 0Oq6lyNdM6
Getting the metrics for 0Oq6w947Q6
Getting the metrics for 0Oq6wel7Q6
Getting the metrics for 0Oq6wjlzQ6
Getting the metrics for 0Oq6wv26Q6
Getting the metrics for 0Oq6wxj6Q6
Getting the metrics for 0x5gXlrq7O
Getting the metrics for 2lqRk3lnQr
Getting the metrics for 2lqRkYZnQr
Getting the metrics for 2lqRkZXDQr
Getting the metrics for 2lqRR4Rxqr
Getting the metrics for 2lqRRvNxqr


 21%|████████████████▉                                                                | 24/115 [00:00<00:01, 74.99it/s]

Getting the metrics for 2lqRRZy2qr
Getting the metrics for 2vQ14B6Kqr
Getting the metrics for 2vQ1LYxbqr
Getting the metrics for 2vQ1LYY0qr
Getting the metrics for 2vQ1Nv7LMr
Getting the metrics for 315VXoL7Q9
Getting the metrics for 4JMA97YkMK
Getting the metrics for 4JMAaaxkMK
Getting the metrics for 4JMAoJgYqK
Getting the metrics for 4wM42Belqj
Getting the metrics for 7vQ73NxYQD
Getting the metrics for 7vQ7BmAzqD
Getting the metrics for 7vQ7BYdeqD
Getting the metrics for 7vQ7rmxEqD
Getting the metrics for 7vQ7rPpOqD
Getting the metrics for 7vQ7rvNDqD
Getting the metrics for 7VqGj3RA5v


 37%|██████████████████████████████▎                                                  | 43/115 [00:00<00:00, 82.56it/s]

Getting the metrics for 7VqGjBpz5v
Getting the metrics for 9vQ22OOoQK
Getting the metrics for 9vQ22pVaQK
Getting the metrics for 9vQ2d3X75K
Getting the metrics for 9vQ2dAKY5K
Getting the metrics for a35rKZZwqL
Getting the metrics for a35rlyZ4QL
Getting the metrics for a35ryXO2ML
Getting the metrics for aDQ0PjnGQE
Getting the metrics for aDQ0PwXWQE
Getting the metrics for BLMv1GlQxe
Getting the metrics for BLMvlPy3qx
Getting the metrics for e7Mz9DgGQr
Getting the metrics for e7MzKKJ5r0
Getting the metrics for EGMP1jAA5a
Getting the metrics for eV5D9A9qKn
Getting the metrics for eV5DLDVJ5K


 53%|██████████████████████████████████████████▉                                      | 61/115 [00:00<00:00, 82.24it/s]

Getting the metrics for eV5DLedE5K
Getting the metrics for eVq3a0ZgMW
Getting the metrics for eVq3a8XyMW
Getting the metrics for eVq3apWvMW
Getting the metrics for eVq3jYZv5W
Getting the metrics for eVq3xk1VQW
Getting the metrics for eVq3xXDDQW
Getting the metrics for gjMNv7PpMK
Getting the metrics for gjMNvjYvMK
Getting the metrics for gOMn66nAMw
Getting the metrics for gOMnR96lMw
Getting the metrics for gOMnRPxXMw
Getting the metrics for gOMnRxreMw
Getting the metrics for gpMOa6zrqz
Getting the metrics for gpMOoOpOMz
Getting the metrics for gpMOyGlqzy
Getting the metrics for jYQJOaZMGR


 70%|████████████████████████████████████████████████████████▎                        | 80/115 [00:00<00:00, 83.28it/s]

Getting the metrics for KAqB9KxB5b
Getting the metrics for KAqBgZVQbg
Getting the metrics for KAqBjWZBqb
Getting the metrics for KPqj7jY4Q6
Getting the metrics for KPqj7OVPQ6
Getting the metrics for KPqjKWwPQ6
Getting the metrics for KPqjodRYQ6
Getting the metrics for KPqjodW4Q6
Getting the metrics for KPqjzJ2q6v
Getting the metrics for kRQa207YMK
Getting the metrics for KXMe4l1Q64
Getting the metrics for KXMe87ZPQ6
Getting the metrics for KXMeAPkM64
Getting the metrics for ljqE2EWkQx
Getting the metrics for ljqE2PwVQx


 77%|██████████████████████████████████████████████████████████████▋                  | 89/115 [00:01<00:00, 80.86it/s]

Getting the metrics for ljqEjLaEQx
Getting the metrics for NWMWeDYqlz
Getting the metrics for NWMWlEAD5l
Getting the metrics for NWMWlX6K5l
Getting the metrics for Oa5wAK9WQ1
Getting the metrics for Oa5wVgVYM1
Getting the metrics for Oa5wVzeWM1
Getting the metrics for Oa5wYKEXQ1
Getting the metrics for p6qbenoB50
Getting the metrics for p6qbeZ1p50
Getting the metrics for p6qbOeRXQ0
Getting the metrics for p6qbOkywQ0
Getting the metrics for Pk5L1BXPqO
Getting the metrics for Pk5Lgax7MO
Getting the metrics for raMyAGEBMd


 93%|██████████████████████████████████████████████████████████████████████████▍     | 107/115 [00:01<00:00, 76.86it/s]

Getting the metrics for raMyAZR2Md
Getting the metrics for raMyrZKR5d
Getting the metrics for Vj58YwDDq8
Getting the metrics for Vj58zZ2Q8n
Getting the metrics for vzqo70PBqa
Getting the metrics for vzqo78BJqa
Getting the metrics for vzqorrRk5a
Getting the metrics for wvq9bOBQWn
Getting the metrics for wvq9p10l5W
Getting the metrics for wvq9pBZm5W
Getting the metrics for wvq9wYNMWn
Getting the metrics for XVqKeBkKM0
Getting the metrics for XVqKeYLNM0
Getting the metrics for XVqKWAZBq0
Getting the metrics for XVqKWZp2q0


100%|████████████████████████████████████████████████████████████████████████████████| 115/115 [00:01<00:00, 78.02it/s]

Getting the metrics for XVqKYp2DM0
Getting the metrics for xW5prbllQg
Getting the metrics for xW5prK4BQg
Getting the metrics for zeQZJN9jQK
Getting the metrics for zeQZJX6KQK





In [310]:
temp

Unnamed: 0,player_id,minutes_played,xgoals,goals,season_name
0,0Oq632k7Q6,19,0.0000,0,2020
1,0Oq632k7Q6,49,0.0000,0,2020
2,0Oq632k7Q6,16,0.0283,0,2021
3,0Oq632k7Q6,99,0.0316,0,2021
4,0Oq632k7Q6,33,0.0000,0,2021
...,...,...,...,...,...
10494,zeQZJX6KQK,11,0.0000,0,2023
10495,zeQZJX6KQK,10,0.0000,0,2023
10496,zeQZJX6KQK,46,0.0000,0,2023
10497,zeQZJX6KQK,61,0.0000,0,2023


### Discrimination ability of a metric within a season computation

In [404]:
# Discrimination - Most of the variability between player metrics reflects the true variation in player ability.
# and not chance variation or noise from small sample sizes.

# we compare the average intrinsic variability of a metric to the total between player variation in this metric.


def discrimination_season(df, metric='xgoals', season = 2020):
    ''' returns discrimination ability of the metric in the specified season''' 
    # Filter rows with non-null values in the specified column
    data = df[['season_name', 'player_id', metric]].dropna(subset=[metric])
    # Filter rows based on the specified season
    data = data[data['season_name'] == season]
    
    # Calculate sample intra_variance and sample cross_variance
    data['cross_variance'] = data[metric].var(ddof=1)
    result_data = data.groupby('player_id').agg(
        intra_variance=pd.NamedAgg(column=metric, aggfunc=lambda x: x.var(ddof=1)),
        cross_variance=pd.NamedAgg(column='cross_variance', aggfunc='mean')
    )
    
    # Calculate and return the discrimination score
    return 1 - (result_data['intra_variance'].mean() / result_data['cross_variance'].mean())
    
    # return result_data

In [433]:
samp_var = discrimination_season(temp, metric='xgoals', season = 2023)

In [434]:
samp_var

0.18161304268285272

## Stability

In [386]:
# Stability helps in understanding a player's metric across seasons
# it could be used for future acquisitions. 
# If we have a stable metric we have more confidence that this year's performance will be predictive of next year's performance.
# A metric can become unstable particularly if it is context dependent, (eg players performance varies due to team changes)
# Or, if a player's skill set tends to change year to year.

# We define stability as a metric which describes how much we expect a single player metric to vary over time after removing chanve variability.
# The metric specifically targets the sensitivity of a metric to change in context or intrinsic player skill over time.

In [440]:
def stability_scores(df, metric='xgoals'):
    '''
    returns the stability metric 
    '''
    
    btw_season_variance = df.groupby('player_id').agg(
       bs_variance=pd.NamedAgg(column=metric, aggfunc=lambda x: x.var(ddof=1))
    )['bs_variance'].mean()
    
    within_season_player_variance = df.groupby(['player_id', 'season_name']).agg(
        wsp_variance=pd.NamedAgg(column=metric, aggfunc=lambda x: x.var(ddof=1))
    )['wsp_variance'].mean()
    
    overall_variance = df[metric].var(ddof=1)
    
    stability = 1 - ((btw_season_variance - within_season_player_variance) / (overall_variance - within_season_player_variance))
    
    return stability
        
        

In [441]:
stability_scores(temp)

0.6782845968515967

In [395]:
players[players['player_name'] == 'Lionel Messi']

vzqowm7qap

Unnamed: 0,player_id,player_name,birth_date,nationality,primary_broad_position,primary_general_position,secondary_broad_position,secondary_general_position,season_name,height_ft,height_in,weight_lb,competition
3279,vzqowm7qap,Lionel Messi,1987-06-24,Argentina,FW,ST,,W,2023,5.0,7.0,159.0,mls


In [398]:
player_xpass = asa_client.get_player_xpass(leagues ='mls', player_ids = 'vzqowm7qap', split_by_games = True)

In [399]:
player_xpass.columns.to_list()

['player_id',
 'game_id',
 'team_id',
 'general_position',
 'minutes_played',
 'attempted_passes',
 'pass_completion_percentage',
 'xpass_completion_percentage',
 'passes_completed_over_expected',
 'passes_completed_over_expected_p100',
 'avg_distance_yds',
 'avg_vertical_distance_yds',
 'share_team_touches']

In [402]:
player_xpass

Unnamed: 0,player_id,game_id,team_id,general_position,minutes_played,attempted_passes,pass_completion_percentage,xpass_completion_percentage,passes_completed_over_expected,passes_completed_over_expected_p100,avg_distance_yds,avg_vertical_distance_yds,share_team_touches
0,vzqowm7qap,0Oq6607zq6,zeQZkL1MKw,AM,102,70,0.7429,0.7328,0.7021,1.003,17.8246,6.1974,0.1092
1,vzqowm7qap,0x5gEZrjQ7,zeQZkL1MKw,CB,41,20,0.7,0.7489,-0.9779,-4.8895,16.0699,8.5618,0.0497
2,vzqowm7qap,gjMNAk04QK,zeQZkL1MKw,W,97,46,0.7391,0.7738,-1.5965,-3.4707,19.1503,4.9709,0.0796
3,vzqowm7qap,gOMnEB4JQw,zeQZkL1MKw,ST,34,18,0.6667,0.6663,0.0068,0.0377,19.6895,8.5828,0.0365
4,vzqowm7qap,gpMO0VR75z,zeQZkL1MKw,ST,37,19,0.7368,0.8155,-1.4947,-7.8667,15.1092,3.7292,0.0354
5,vzqowm7qap,odMX0jroMY,zeQZkL1MKw,ST,98,41,0.878,0.8288,2.0181,4.9223,17.8723,5.3542,0.0911


In [280]:
player_goals_added = asa_client.get_player_goals_added(leagues='mls', split_by_games = True)

HTTPError: 524 Server Error:  for url: https://app.americansocceranalysis.com/api/v1/mls/players/goals-added?split_by_games=True

In [21]:
player_goals_added.columns.to_list()

['player_id', 'team_id', 'general_position', 'minutes_played', 'data']

In [22]:
player_goals_added['data']

0       [{'action_type': 'Dribbling', 'goals_added_raw...
1       [{'action_type': 'Dribbling', 'goals_added_raw...
2       [{'action_type': 'Dribbling', 'goals_added_raw...
3       [{'action_type': 'Dribbling', 'goals_added_raw...
4       [{'action_type': 'Dribbling', 'goals_added_raw...
                              ...                        
2198    [{'action_type': 'Dribbling', 'goals_added_raw...
2199    [{'action_type': 'Dribbling', 'goals_added_raw...
2200    [{'action_type': 'Dribbling', 'goals_added_raw...
2201    [{'action_type': 'Dribbling', 'goals_added_raw...
2202    [{'action_type': 'Dribbling', 'goals_added_raw...
Name: data, Length: 2203, dtype: object

In [23]:
player_salaries = asa_client.get_player_salaries(leagues='mls')

In [24]:
player_salaries.columns.to_list()

['player_id',
 'team_id',
 'season_name',
 'position',
 'base_salary',
 'guaranteed_compensation',
 'mlspa_release']

In [None]:
gk_xgoals = asa_client.get_goalkeeper_xgoals(leagues='mls', split_by_games = True)

In [26]:
gk_xgoals.columns.to_list()

['player_id',
 'team_id',
 'minutes_played',
 'shots_faced',
 'goals_conceded',
 'saves',
 'share_headed_shots',
 'xgoals_gk_faced',
 'goals_minus_xgoals_gk',
 'goals_divided_by_xgoals_gk']

In [27]:
gk_goals_added = asa_client.get_goalkeeper_goals_added(leagues='mls')

In [28]:
gk_goals_added.columns.to_list()

['player_id', 'team_id', 'minutes_played', 'data']

In [29]:
team_xgoals = asa_client.get_team_xgoals(leagues='mls')

In [30]:
team_xgoals.columns.to_list()

['team_id',
 'count_games',
 'shots_for',
 'shots_against',
 'goals_for',
 'goals_against',
 'goal_difference',
 'xgoals_for',
 'xgoals_against',
 'xgoal_difference',
 'goal_difference_minus_xgoal_difference',
 'points',
 'xpoints']

In [31]:
team_xpass = asa_client.get_team_xpass(leagues='mls')

In [32]:
team_xpass.columns.to_list()

['team_id',
 'count_games',
 'attempted_passes_for',
 'pass_completion_percentage_for',
 'xpass_completion_percentage_for',
 'passes_completed_over_expected_for',
 'passes_completed_over_expected_p100_for',
 'avg_vertical_distance_for',
 'attempted_passes_against',
 'pass_completion_percentage_against',
 'xpass_completion_percentage_against',
 'passes_completed_over_expected_against',
 'passes_completed_over_expected_p100_against',
 'avg_vertical_distance_against',
 'passes_completed_over_expected_difference',
 'avg_vertical_distance_difference']

In [33]:
team_goals_added = asa_client.get_team_goals_added(leagues='mls')

In [34]:
team_goals_added.columns.to_list()

['team_id', 'minutes', 'data']

In [35]:
# team_salaries = asa_client.get_team_salaries(leagues = 'mls')

In [36]:
game_xgoals = asa_client.get_game_xgoals(leagues="mls")

In [37]:
game_xgoals.columns.to_list()

['game_id',
 'date_time_utc',
 'home_team_id',
 'home_goals',
 'home_team_xgoals',
 'home_player_xgoals',
 'away_team_id',
 'away_goals',
 'away_team_xgoals',
 'away_player_xgoals',
 'goal_difference',
 'team_xgoal_difference',
 'player_xgoal_difference',
 'final_score_difference',
 'home_xpoints',
 'away_xpoints']

In [38]:
player_xgoals[player_xgoals['player_id'] == '9z5ka8adQA']

Unnamed: 0,player_id,team_id,general_position,minutes_played,shots,shots_on_target,goals,xgoals,xplace,goals_minus_xgoals,key_passes,primary_assists,xassists,primary_assists_minus_xassists,xgoals_plus_xassists,points_added,xpoints_added
596,9z5ka8adQA,lgpMOvnQzy,AM,7057,190,72,23,26.4695,-0.3245,-3.4695,171,21,18.3749,2.6251,44.8444,12.0358,17.9115


In [39]:
player_xgoals[player_xgoals['player_id'] == 'vzqowm7qap']

Unnamed: 0,player_id,team_id,general_position,minutes_played,shots,shots_on_target,goals,xgoals,xplace,goals_minus_xgoals,key_passes,primary_assists,xassists,primary_assists_minus_xassists,xgoals_plus_xassists,points_added,xpoints_added
2115,vzqowm7qap,zeQZkL1MKw,ST,409,20,7,1,2.5464,-1.0855,-1.5464,6,2,1.4902,0.5098,4.0366,0.0061,1.6225


In [40]:
# Get all players named "Andre"
messi_df = asa_client.get_players(names="Lionel")
zlatan_df = asa_client.get_players(names="Zlatan")
ryan_gauld_df = asa_client.get_players(names="gauld")

In [41]:
ryan_gauld_df

Unnamed: 0,player_id,player_name,birth_date,nationality,primary_broad_position,primary_general_position,secondary_broad_position,secondary_general_position,season_name,height_ft,height_in,weight_lb,competition
1513,9z5ka8adQA,Ryan Gauld,1995-12-16,Scotland,MF,AM,,,"[2021, 2022, 2023]",5.0,7.0,150.0,mls


In [42]:
messi_df

Unnamed: 0,player_id,player_name,birth_date,nationality,primary_broad_position,primary_general_position,secondary_broad_position,secondary_general_position,season_name,height_ft,height_in,weight_lb,competition
3279,vzqowm7qap,Lionel Messi,1987-06-24,Argentina,FW,ST,,W,2023,5.0,7.0,159.0,mls


In [43]:
zlatan_df

Unnamed: 0,player_id,player_name,birth_date,nationality,primary_broad_position,primary_general_position,secondary_broad_position,secondary_general_position,season_name,height_ft,height_in,weight_lb,competition
2157,gpMOoayMzy,Zlatan Ibrahimovic,1981-10-03,Sweden,FW,ST,,,"[2018, 2019]",6.0,5.0,209.0,mls


In [44]:
asa_games = asa_client.get_games(leagues='mls')

In [45]:
asa_games.shape

(4416, 20)

In [46]:
asa_games.season_name.unique()

array([2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013],
      dtype=int64)

In [47]:
asa_xgoals = asa_client.get_player_xgoals(leagues ='mls')

In [48]:
asa_xgoals[asa_xgoals['player_id'] == 'vzqowm7qap']

Unnamed: 0,player_id,team_id,general_position,minutes_played,shots,shots_on_target,goals,xgoals,xplace,goals_minus_xgoals,key_passes,primary_assists,xassists,primary_assists_minus_xassists,xgoals_plus_xassists,points_added,xpoints_added
2115,vzqowm7qap,zeQZkL1MKw,ST,409,20,7,1,2.5464,-1.0855,-1.5464,6,2,1.4902,0.5098,4.0366,0.0061,1.6225


In [49]:
asa_xgoals[asa_xgoals['player_id'] == 'gpMOoayMzy']

Unnamed: 0,player_id,team_id,general_position,minutes_played,shots,shots_on_target,goals,xgoals,xplace,goals_minus_xgoals,key_passes,primary_assists,xassists,primary_assists_minus_xassists,xgoals_plus_xassists,points_added,xpoints_added
1151,gpMOoayMzy,kaDQ0wRqEv,ST,5388,275,116,53,41.1103,6.148,11.8897,84,13,11.0938,1.9062,52.2041,22.974,22.4662


In [50]:
asa_xgoals[asa_xgoals['player_id'] == '9z5ka8adQA']

Unnamed: 0,player_id,team_id,general_position,minutes_played,shots,shots_on_target,goals,xgoals,xplace,goals_minus_xgoals,key_passes,primary_assists,xassists,primary_assists_minus_xassists,xgoals_plus_xassists,points_added,xpoints_added
596,9z5ka8adQA,lgpMOvnQzy,AM,7057,190,72,23,26.4695,-0.3245,-3.4695,171,21,18.3749,2.6251,44.8444,12.0358,17.9115


In [51]:
asa_games

Unnamed: 0,game_id,date_time_utc,home_score,away_score,home_team_id,away_team_id,referee_id,stadium_id,home_manager_id,away_manager_id,expanded_minutes,season_name,matchday,attendance,knockout_game,last_updated_utc,extra_time,penalties,home_penalties,away_penalties
0,KAqBod7VQb,2023-12-09 21:00:00 UTC,2,1,mvzqoLZQap,eVq3ya6MWO,gjMNk4v5Kp,2vQ1eawQrA,KPqjO38Q6v,NWMWxKe5lz,102,2023,45,20802,True,2023-12-09 23:14:03 UTC,,,,
1,eVq3lwWjQW,2023-12-03 02:30:00 UTC,2,0,eVq3ya6MWO,YgOMngl5wN,2lqRymD5r0,7vQ7xbOMD1,NWMWxKe5lz,vzqoAjJMap,106,2023,44,22221,True,2023-12-06 12:05:10 UTC,,,,
2,Oa5w6ZmYM1,2023-12-02 23:00:00 UTC,2,3,NWMWlBK5lz,mvzqoLZQap,vzqoWjkqap,Oa5wKLY514,vzqozWB5ap,KPqjO38Q6v,132,2023,44,25513,True,2023-12-06 11:59:11 UTC,1.0,,,
3,jYQJyN0DqG,2023-11-27 02:30:00 UTC,0,1,jYQJ19EqGR,eVq3ya6MWO,EGMPLlxMaY,9Yqda07QvJ,odMXxreMYL,NWMWxKe5lz,101,2023,43,33649,True,2023-11-29 22:26:50 UTC,,,,
4,Xj5YzbNGqb,2023-11-27 00:00:00 UTC,1,0,YgOMngl5wN,Z2vQ1xlqrA,wvq9vKlQWn,0x5g6ojM7O,vzqoAjJMap,N6MmpyLQEG,99,2023,43,20319,True,2023-11-29 22:23:05 UTC,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4411,xW5pyO8Jqg,2013-03-03 03:30:00 UTC,0,1,jYQJ19EqGR,APk5LGOMOW,zeQZl7D5Kw,9Yqda07QvJ,KXMeab1M64,315VKkEq9x,101,2013,1,38998,False,,,,,
4412,KPqjK3VYQ6,2013-03-03 01:30:00 UTC,1,0,mKAqBBmqbg,pzeQZ6xQKw,vzqoWjkqap,eVq3alGMWO,a35rDN4QL6,gjMNk8v5Kp,100,2013,1,18075,False,,,,,
4413,kRQaolgj5K,2013-03-03 01:00:00 UTC,2,0,YgOMngl5wN,EKXMeX3Q64,EGMPPnAMaY,0x5g6ojM7O,4JMA4X9QKg,vzqoAjJMap,95,2013,1,20019,False,,,,,
4414,9vQ2ZGnmQK,2013-03-02 23:30:00 UTC,1,0,lgpMOvnQzy,kRQabn8MKZ,Oa5w9R9Q14,ljqEDnO5x0,ljqEWzAMx0,gjMNk0Y5Kp,97,2013,1,21000,False,,,,,
