In [1]:
import pandas as pd
from statsbombpy import sb

In [2]:
def get_data(match_ids):
    event_data_tot = pd.DataFrame()
    for match_id in match_ids:
        event_data = sb.events(match_id=match_id)
        print(event_data.type.value_counts())
        df_360 = pd.read_json(
            f"/Users/borgwardt/Documents/repos/open-data/data/three-sixty/{match_id}.json"  # noqa: E501
        )
        df_merged = pd.merge(
            event_data, df_360, how="left", left_on="id", right_on="event_uuid"
        )
        event_data_tot = pd.concat(
            [event_data_tot, df_merged], ignore_index=True
        )  # noqa: E501
    return event_data_tot



def preprocess_data(df_raw):
    df_preprocessed = df_raw.sort_values(["match_id","index"])
    df_preprocessed["event_time"]=df_preprocessed.minute.values*60+df_preprocessed.second.values
    df_preprocessed.reset_index(inplace=True)
    return df_preprocessed


def create_kpis(df_match):
    kpi_summary = pd.DataFrame()
    for team in df_match.team.unique():
        other_team = [t for t in df_match.team.unique() if t != team]
        team_events = df_match[df_match.team == team]
        other_team_events = df_match[df_match.team == other_team[0]]

        # Total goals
        goals_scored = team_events[
            team_events["shot_outcome"] == "Goal"
        ].shape[  # noqa: E501
            0
        ]  # noqa: E501
        goals_conceded = other_team_events[
            other_team_events["shot_outcome"] == "Goal"
        ].shape[
            0
        ]  # noqa: E501

        # Total shots
        shots = len(team_events[team_events["type"] == "Shot"])
        # Total xg
        shot_statsbomb_xg_scored = team_events["shot_statsbomb_xg"].sum()
        shot_statsbomb_xg_conceded = other_team_events[
            "shot_statsbomb_xg"
        ].sum()  # noqa: E501
        # Total passes
        passes = len(team_events[team_events["type"] == "Pass"])

        # Pass accuracy
        completed_passes = len(
            team_events[
                (team_events["type"] == "Pass")
                & (team_events["pass_outcome"].isnull())  # noqa: E501
            ]
        )
        pass_accuracy = (completed_passes / passes) * 100

        # Total interceptions
        interceptions = len(team_events[team_events["type"] == "Interception"])

        # Total clearances
        clearances = len(team_events[team_events["type"] == "Clearance"])

        # Percentage of possession
        team_possession_seconds = team_events[
            (team_events["type"] != "Pressure")
        ].duration.sum()
        other_team_possession_seconds = other_team_events[
            (other_team_events["type"] != "Pressure")
        ].duration.sum()

        kpi_summary_temp = pd.DataFrame(
            {
                "goals_scored": [goals_scored],
                "goals_conceded": [goals_conceded],
                "shot_statsbomb_xg_scored": [shot_statsbomb_xg_scored],
                "shot_statsbomb_xg_conceded": [shot_statsbomb_xg_conceded],
                "shots": [shots],
                "passes": [passes],
                "pass_accuracy": [pass_accuracy],
                "interceptions": [interceptions],
                "clearances": [clearances],
                "possession": [
                    team_possession_seconds
                    / (other_team_possession_seconds + team_possession_seconds)
                ],
            },
            index=[team],
        )
        kpi_summary = pd.concat(
            [kpi_summary, kpi_summary_temp], ignore_index=False
        )  # noqa: E501
    return kpi_summary


def color_cells(row):
    cell_color = []
    for val in row:
        if row["high_is_good"] * row["Team Values"] < row["high_is_good"] * (
            row["Average"] - 0.25 * row["STD"]
        ):
            color = "red"
        elif row["high_is_good"] * row["Team Values"] > row["high_is_good"] * (
            row["Average"] + 0.25 * row["STD"]
        ):
            color = "green"
        else:
            color = "orange"
        cell_color.append(f"color: {color}")
    return cell_color

In [3]:
competitions = sb.competitions()
womens_euro_competition = competitions[
    competitions["competition_name"] == "UEFA Women's Euro"
]
womens_euro_2022 = womens_euro_competition[
    womens_euro_competition["season_name"] == "2022"
]
euro_competition_id = womens_euro_2022.competition_id.unique()[0]
euro_season_id = womens_euro_2022.season_id.unique()[0]



In [4]:
match_ids = sb.matches(
        competition_id=euro_competition_id, season_id=euro_season_id
    ).match_id

In [5]:

event_data_tot=pd.DataFrame()
for match_id in match_ids:
    event_data = sb.events(match_id=match_id)
    df_360 = pd.read_json(
        f"/Users/borgwardt/Documents/repos/open-data/data/three-sixty/{match_id}.json"  # noqa: E501
    )
    df_merged = pd.merge(
        event_data, df_360, how="left", left_on="id", right_on="event_uuid"
    )
    event_data_tot = pd.concat(
        [event_data_tot, df_merged], ignore_index=True
    )



In [6]:
center_back=pd.DataFrame()
for index, row in event_data_tot.iterrows():
    
    if isinstance(row.tactics, dict):
        player_temp=[]
        match_id=row.match_id
        team=row.team
        index=row["index"]
        for player in row.tactics["lineup"]:
            if 2<player["position"]["id"]<6:
                player_temp.append(player["player"]["id"])
        center_back_temp=pd.DataFrame({"match_id":[match_id],"team":[team],"index":[index],"center_id":[player_temp]})
        center_back=pd.concat([center_back,center_back_temp])

In [7]:
df_center=pd.merge(event_data_tot,center_back,how="left",on=["match_id","index","team"]).sort_values(["match_id","team","index"])

In [8]:
df_center["center_id"].ffill(inplace=True)

In [9]:
df_preprocessed = preprocess_data(df_center)

In [10]:
grouped_teams=df_preprocessed.groupby("match_id").team.unique()
teams_df_1 = grouped_teams.apply(pd.Series)

# Rename the columns
teams_df_1.columns = ['team_1', 'team_2']

# Reset the index to have match_id as a column
teams_df_1.reset_index(inplace=True)
teams_df_2=teams_df_1.copy()
teams_df_2.columns=["match_id","team_2","team_1"]
df_teams=pd.concat([teams_df_2,teams_df_1])
df_teams.columns=["match_id","team","opponent"]

In [11]:
df_preprocessed=df_preprocessed.merge(df_teams,how="left",on=["match_id","team"])

In [12]:
# Create a boolean mask to identify rows where the value switches to "From Goal Kick"
mask = (df_preprocessed['play_pattern'].shift(1) != "From Goal Kick") & (df_preprocessed['play_pattern'] == "From Goal Kick")

# Get the rows where the mask is True
result = df_preprocessed[mask]

In [23]:
df_goal_kick=pd.DataFrame(data={"event_time":result.minute.values*60+result.second.values,"opponent":result.team.values,"match_id":result.match_id.values,"goal_kick_time":result.minute.values*60+result.second.values},index=result.timestamp.index).sort_values(["event_time"])
df_preprocessed.sort_values("event_time",inplace=True)

In [26]:
df_preprocessed=pd.merge_asof(df_preprocessed,df_goal_kick,on="event_time",by=['match_id', 'opponent'],direction="backward" )

In [28]:
df_preprocessed["delta_goal_kick"]=df_preprocessed["event_time"]-df_preprocessed["goal_kick_time"]

In [30]:
mask = df_preprocessed.apply(lambda row: row['player_id'] in row['center_id'], axis=1)

# Get the rows where the mask is True
df_delta_goal_kick=df_preprocessed[mask][["center_id","player_id","location","delta_goal_kick"]]

In [35]:
df_delta_goal_kick[df_delta_goal_kick["delta_goal_kick"]<5]


Unnamed: 0,center_id,player_id,location,delta_goal_kick
4240,"[32216, 10125]",32216.0,"[57.2, 52.3]",3.0
4260,"[32216, 10125]",32216.0,"[53.3, 49.6]",4.0
9078,"[10119, 225394]",225394.0,"[49.0, 16.8]",3.0
10439,"[15789, 87185, 10088]",15789.0,"[44.5, 74.5]",4.0
12238,"[10165, 10155]",10165.0,"[62.9, 57.6]",4.0
...,...,...,...,...
99616,"[10271, 10125]",10125.0,"[30.7, 41.1]",1.0
99651,"[10271, 10125]",10271.0,"[30.9, 19.4]",3.0
99662,"[10271, 10125]",10271.0,"[33.0, 18.0]",4.0
99894,"[4642, 18999]",18999.0,"[63.9, 16.0]",3.0
