In [1]:
import pandas as pd
from statsbombpy import sb
import plotly.express as px
from datetime import datetime, timedelta
import matplotlib.pyplot as plt

In [2]:
def preprocessing(euro_competition_id,euro_season_id,match_id):
    euro_matches = sb.matches(competition_id=euro_competition_id, season_id=euro_season_id)
    events_data=sb.events(match_id=match_id)
    score={euro_matches[euro_matches.match_id==match_id].home_team.values[0]:
           euro_matches[euro_matches.match_id==match_id].home_score.values[0],
           euro_matches[euro_matches.match_id==match_id].away_team.values[0]:
           euro_matches[euro_matches.match_id==match_id].away_score.values[0]}
    events_data.sort_values(["minute","timestamp"],inplace=True)
    parsed_time=events_data["timestamp"].apply(lambda x: datetime.strptime(x,"%H:%M:%S.%f"))
    time_differences = parsed_time.diff().dt.total_seconds()
    events_data["time_differences"]=[x if ((x>0)&(x<60*5)) else 0 for x in time_differences]
    return events_data,score

In [3]:
def enrich_data(events_data,team,other_team,score):
    kpi_summary = []
    team_events = events_data[events_data["team"] == team]

    # Total shots
    shots = len(team_events[team_events["type"] == "Shot"])
    # Total xg
    shot_statsbomb_xg=team_events["shot_statsbomb_xg"].sum()
    # Total passes
    passes = len(team_events[team_events["type"] == "Pass"])

    # Pass accuracy
    completed_passes = len(team_events[(team_events["type"] == "Pass") & (team_events["pass_outcome"].isnull())])
    pass_accuracy = (completed_passes / passes) * 100

    # Total duels won
    duels_won = len(team_events[(team_events["type"] == "Duel") & (team_events["duel_outcome"] == "Won")])

    # Total tackles
    tackles = len(team_events[team_events["type"] == "Tackle"])

    # Total interceptions
    interceptions = len(team_events[team_events["type"] == "Interception"])

    # Total clearances
    clearances = len(team_events[team_events["type"] == "Clearance"])

    # Percentage of possession 
    team_possession=events_data[(events_data['possession_team']==team)& (events_data['type']!="Pressure")].duration.sum()
    other_team_possession=events_data[(events_data['possession_team']==other_team)& (events_data['type']!="Pressure")].duration.sum()
    possession = team_possession/(team_possession+other_team_possession)
    kpi_summary.append({
        "team": team,
        "score": score[team],
        "shots": shots,
        'shot_statsbomb_xg':shot_statsbomb_xg,
        "passes": passes,
        "pass_accuracy": pass_accuracy,
        "duels_won": duels_won,
        "tackles": tackles,
        "interceptions": interceptions,
        "clearances": clearances,
        "possession": possession
    })
    return kpi_summary

In [4]:
def get_summary(events_data:pd.DataFrame,score:dict):# Calculate KPIs for each team
    kpi_summary=pd.DataFrame()
    teams = events_data["team"].unique()
    

    for team in teams:
        other_team=[opponent for opponent in teams if opponent != team][0]
        df_temp=pd.DataFrame(enrich_data(events_data,team,other_team,score))
        kpi_summary=pd.concat([kpi_summary, df_temp], ignore_index=True)
    return  kpi_summary

In [5]:
competitions = sb.competitions()
womens_euro_competition = competitions[competitions['competition_name'] == "UEFA Women's Euro"]
womens_euro_2022 = womens_euro_competition[womens_euro_competition['season_name'] == '2022']




In [12]:
kpi_summary_df=pd.DataFrame()
euro_competition_id=womens_euro_2022.competition_id.unique()[0]
euro_season_id=womens_euro_2022.season_id.unique()[0]
for match_id in sb.matches(competition_id=euro_competition_id, season_id=euro_season_id).match_id.unique() :
    events_data,score=preprocessing(euro_competition_id,euro_season_id,match_id)
    df_temp=get_summary(events_data,score)
    kpi_summary_df=pd.concat([kpi_summary_df, df_temp], ignore_index=True)




In [14]:
kpi_summary_df.groupby("team").mean()

Unnamed: 0_level_0,score,shots,shot_statsbomb_xg,passes,pass_accuracy,duels_won,tackles,interceptions,clearances,possession
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Austria Women's,0.75,15.0,0.950931,396.5,68.642141,4.0,0.0,13.25,21.25,0.467063
Belgium Women's,0.75,6.0,0.583729,451.25,74.072363,6.75,0.0,20.0,31.75,0.457269
Denmark Women's,0.333333,11.333333,1.310782,403.333333,66.782358,7.333333,0.0,22.666667,19.333333,0.398139
England Women's,3.666667,18.0,2.634094,559.166667,80.159468,5.833333,0.0,17.833333,21.5,0.591982
France Women's,2.0,21.4,2.657386,554.4,81.282006,7.2,0.0,19.8,16.0,0.545218
Germany Women's,2.333333,17.833333,1.943465,496.333333,74.334376,5.5,0.0,16.5,22.833333,0.535319
Iceland Women's,1.0,13.0,1.859393,338.0,63.326419,3.666667,0.0,21.333333,26.666667,0.393394
Italy Women's,0.666667,18.333333,1.199016,479.666667,77.277434,5.666667,0.0,19.333333,15.333333,0.564004
Netherlands Women's,2.0,13.75,1.272733,516.0,80.008482,7.5,0.0,14.75,26.25,0.523759
Northern Ireland,0.333333,5.333333,0.306272,274.666667,65.705053,3.666667,0.0,16.333333,25.0,0.310591


In [None]:
womens_euro_2022.competition_id.values

array([53])