In [None]:
import matplotlib.pyplot as plt
from mplsoccer import Pitch, Sbopen
import numpy as np
import pandas as pd

In [None]:
col = {
    "gray": "#878787",
    "violet": "#976cf4",
    "turquoise": "#23969a",
    "coral": "#c86f6d",
    "ocher": "#b98327",
    }

In [None]:
#opening data using competition method
parser = Sbopen()
df_competitions = parser.competition()

In [None]:
competition_name = "UEFA Women's Euro"
season_name = "2022"
bind = (df_competitions["competition_name"] == competition_name) & (df_competitions["season_name"] == season_name)
competition_id = df_competitions[bind]["competition_id"].to_numpy()[0]
season_id = df_competitions[bind]["season_id"].to_numpy()[0]
print(f"{competition_name} {season_name}")
print(f"Competition ID: {competition_id}")
print(f"Season ID: {season_id}")

In [None]:
#opening data using match method
df_matches = parser.match(competition_id=competition_id, season_id=season_id)

In [None]:
team_name = "Germany Women's"
player_name = "Lena Oberdorf"

In [None]:
bind_matches = (df_matches["home_team_name"] ==  team_name) | (df_matches["away_team_name"] ==  team_name)

In [None]:
match_ids = df_matches[bind_matches]["match_id"].to_numpy()
print(f"{team_name} matchen in {competition_name} {season_name}")
for match_id in match_ids:
    home_team_name = df_matches[df_matches["match_id"] == match_id]["home_team_name"].unique()[0]
    away_team_name = df_matches[df_matches["match_id"] == match_id]["away_team_name"].unique()[0]
    print(f"Match ID: {match_id}: {home_team_name} - {away_team_name}")

In [None]:
# lineup in first match
df_lineup = parser.lineup(match_ids[-1])
player_id = df_lineup[df_lineup["player_name"] == player_name]["player_id"].to_numpy()[0]
print(f"Player name: {player_name}")
print(f"Player ID: {player_id}")


### Analysis of the passes

In [None]:
# match_id = 3835322  # Germany vs Denmark
# match_id = 3835330 # Germany vs Spain
# match_id = 3835322 # Germany vs Finland

match_id = 3844385  # Germany vs Austria
# match_id = 3845507  # Germany vs France
# match_id = 3847567  # Germany vs England

df, _, _, _ = parser.event(match_id)
#get team names
team1, team2 = df.team_name.unique()
if team1 == team_name:
    oppenent_team_name = team2
else:
    oppenent_team_name = team1
print(f"Team 1: {team1}")
print(f"Team 2: {team2}")
print(f"Opponent team: {oppenent_team_name}")

In [None]:
# include only "normal" passes, i.e., no corners, free-kicks, throw-ins etc.
df_passes = df[(df["type_name"] == "Pass") & (df["sub_type_name"].isna())]
bind_passes_oberdorf = (df_passes["team_name"] == team_name) & (df_passes["player_id"] == player_id)
df_passes_oberdorf = df_passes[bind_passes_oberdorf]
nrof_passes = df_passes_oberdorf.shape[0]
nrof_passes_forward = df_passes_oberdorf[df_passes_oberdorf.end_x > df_passes_oberdorf.x].shape[0]
print(f"Number of passes by Oberdorf: {nrof_passes}")
print(f"Number of passses forward: {nrof_passes_forward}")

for match_player_id in df[(df["team_name"] == team_name) & (df["player_id"].notna())]["player_id"].unique():
    match_player_name = df[df["player_id"] == match_player_id]["player_name"].to_numpy()[0]
    bind_passes_match_player = (df_passes["team_name"] == team_name) & (df_passes["player_id"] == match_player_id)
    df_passes_match_player = df_passes[bind_passes_match_player]
    nrof_passes_match_player = df_passes_match_player.shape[0]
    nrof_passes_forward_match_player = df_passes_match_player[df_passes_match_player.end_x > df_passes_match_player.x].shape[0]
    print(f"Player name: {match_player_name}: {nrof_passes_match_player} ({nrof_passes_forward_match_player})")

In [None]:
pitch = Pitch(line_color='black')
fig, ax = pitch.grid(grid_height=0.9, title_height=0.06, axis=False,
                     endnote_height=0.04, title_space=0, endnote_space=0)
pitch.scatter(df_passes_oberdorf["x"], df_passes_oberdorf["y"], alpha = .4, s = 225, color = "blue", ax=ax['pitch'])

In [None]:
pitch = Pitch(pitch_type='statsbomb', line_color='black')
fig, ax = pitch.draw(figsize=(16, 11), constrained_layout=True, tight_layout=False)
fig.set_facecolor('#22312b')
pitch.arrows(
    df_passes_oberdorf.x,
    df_passes_oberdorf.y,
    df_passes_oberdorf.end_x,
    df_passes_oberdorf.end_y,
    ax=ax,
    width=2,
    headwidth=10,
    headlength=10,
    color='blue',
    label='completed passes')

### Passing network analysis

In [None]:
#check for index of first sub
sub = df.loc[df["type_name"] == "Substitution"].loc[df["team_name"] == team_name].iloc[0]["index"]
#make df with successfull passes by Germany until the first substitution
mask_germany = (df.type_name == 'Pass') & (df.team_name == team_name) & (df.index < sub) & (df.outcome_name.isnull()) & (df.sub_type_name != "Throw-in")
#taking necessary columns
df_pass = df.loc[mask_germany, ['x', 'y', 'end_x', 'end_y', "player_name", "pass_recipient_name"]]
#adjusting that only the surname of a player is presented.
df_pass["player_name"] = df_pass["player_name"].apply(lambda x: str(x).split()[-1])
df_pass["pass_recipient_name"] = df_pass["pass_recipient_name"].apply(lambda x: str(x).split()[-1])

In [None]:
scatter_df = pd.DataFrame()
for i, name in enumerate(df_pass["player_name"].unique()):
    passx = df_pass.loc[df_pass["player_name"] == name]["x"].to_numpy()
    recx = df_pass.loc[df_pass["pass_recipient_name"] == name]["end_x"].to_numpy()
    passy = df_pass.loc[df_pass["player_name"] == name]["y"].to_numpy()
    recy = df_pass.loc[df_pass["pass_recipient_name"] == name]["end_y"].to_numpy()
    scatter_df.at[i, "player_name"] = name
    #make sure that x and y location for each circle representing the player is the average of passes and receptions
    scatter_df.at[i, "x"] = np.mean(np.concatenate([passx, recx]))
    scatter_df.at[i, "y"] = np.mean(np.concatenate([passy, recy]))
    #calculate number of passes
    scatter_df.at[i, "no"] = df_pass.loc[df_pass["player_name"] == name].count().iloc[0]

#adjust the size of a circle so that the player who made more passes
scatter_df['marker_size'] = (scatter_df['no'] / scatter_df['no'].max() * 1500)

In [None]:
#counting passes between players
lines_df = df_pass.groupby(['player_name', 'pass_recipient_name']).x.count().reset_index()
lines_df.rename({'x':'pass_count'}, axis='columns', inplace=True)
#setting a treshold. You can try to investigate how it changes when you change it.
lines_df = lines_df[lines_df['pass_count']>=3]

In [None]:
#Drawing pitch
# pitch = Pitch(line_color='grey')
# fig, ax = pitch.grid(grid_height=0.9, title_height=0.06, axis=False,
#                      endnote_height=0.04, title_space=0, endnote_space=0)

pitch = Pitch(pitch_type='statsbomb', line_color='grey')
fig, ax = pitch.draw(figsize=(16, 11), constrained_layout=True, tight_layout=False)
#Scatter the location on the pitch
pitch.scatter(scatter_df.x, scatter_df.y, s=scatter_df.marker_size, color=col.get("violet"), edgecolors='grey', linewidth=1, alpha=1, ax=ax, zorder = 3)
#annotating player name
for i, row in scatter_df.iterrows():
    pitch.annotate(row.player_name, xy=(row.x, row.y), c='black', va='center', ha='center', weight = "bold", size=16, ax=ax, zorder = 4)

# fig.suptitle("Player location - Germany", fontsize = 30)
plt.show()

In [None]:
#plot once again pitch and vertices
nw_color = col.get("ocher")
pitch = Pitch(pitch_type='statsbomb', line_color='grey')
fig, ax = pitch.draw(figsize=(16, 11), constrained_layout=True, tight_layout=False)
pitch.scatter(scatter_df.x, scatter_df.y, s=scatter_df.marker_size, color=nw_color, edgecolors='grey', linewidth=1, alpha=1, ax=ax, zorder = 3)
for i, row in scatter_df.iterrows():
    pitch.annotate(row.player_name, xy=(row.x, row.y), c='black', va='center', ha='center', weight = "bold", size=16, ax=ax, zorder = 4)

for i, row in lines_df.iterrows():
        player1 = row["player_name"]
        player2 = row['pass_recipient_name']
        #take the average location of players to plot a line between them
        player1_x = scatter_df.loc[scatter_df["player_name"] == player1]['x'].iloc[0]
        player1_y = scatter_df.loc[scatter_df["player_name"] == player1]['y'].iloc[0]
        player2_x = scatter_df.loc[scatter_df["player_name"] == player2]['x'].iloc[0]
        player2_y = scatter_df.loc[scatter_df["player_name"] == player2]['y'].iloc[0]
        num_passes = row["pass_count"]
        #adjust the line width so that the more passes, the wider the line
        line_width = (num_passes / lines_df['pass_count'].max() * 10)
        #plot lines on the pitch
        pitch.lines(player1_x, player1_y, player2_x, player2_y,
                        alpha=1, lw=line_width, zorder=2, color=nw_color, ax = ax)

# fig.suptitle(f"Germany Passing Network against {oppenent_team_name.split()[0]}", fontsize = 24)
fig.savefig("PassingNework.svg", format="svg")
plt.show()

### Defensive actions

In [None]:
df["type_name"].unique()

In [None]:
df[(df["type_name"] == "Duel") & (df["player_name"] == player_name)][["player_name", "type_name", "sub_type_name", "play_pattern_name", "outcome_name"]]

In [None]:
df[(df["type_name"] == "Interception") & (df["player_name"] == player_name)][["player_name", "type_name", "sub_type_name", "play_pattern_name", "outcome_name"]]

In [None]:
df[(df["type_name"] == "Carry") & (df["player_name"] == player_name)][["player_name", "type_name", "sub_type_name", "play_pattern_name", "outcome_name"]]


In [None]:
df[(df["type_name"] == "Pressure") & (df["player_name"] == player_name)][["player_name", "type_name", "sub_type_name", "play_pattern_name", "outcome_name"]]

In [None]:
df[(df["type_name"] == "Block") & (df["player_name"] == player_name)][["player_name", "type_name", "sub_type_name", "play_pattern_name", "outcome_name"]]


In [None]:
df[(df["type_name"] == "Ball Recovery") & (df["player_name"] == player_name)][["player_name", "type_name", "sub_type_name", "play_pattern_name", "outcome_name"]]

In [None]:
df_duel = pd.DataFrame()
df_interception = pd.DataFrame()
for match_id in match_ids:
    df_idx, _, _, _ = parser.event(match_id)
    df_idx_duel = df_idx[(df_idx["type_name"] == "Duel") & (df_idx["player_name"] == player_name)]
    df_idx_interception = df_idx[(df_idx["type_name"] == "Interception") & (df_idx["player_name"] == player_name)]
    df_duel = pd.concat([df_duel, df_idx_duel], axis=0)
    df_interception = pd.concat([df_interception, df_idx_interception], axis=0)


In [None]:
df_duel.shape

In [None]:
df_duel[["player_name", "type_name", "sub_type_name", "play_pattern_name", "outcome_name"]]

In [None]:
bind_successful_duel = (df_duel["outcome_name"] == "Won") | (df_duel["outcome_name"] == "Success In Play")

event_color = col.get("ocher")
pitch = Pitch(pitch_type='statsbomb', line_color='grey')
fig, ax = pitch.draw(figsize=(16, 11), constrained_layout=True, tight_layout=False)
pitch.scatter(df_duel[bind_successful_duel]["x"], df_duel[bind_successful_duel]["y"], alpha = 1, s = 225, color=event_color, ax=ax)
pitch.scatter(df_duel[~bind_successful_duel]["x"], df_duel[~bind_successful_duel]["y"], alpha = .2, s = 225, color=event_color, ax=ax)
fig.savefig("Duels.svg", format="svg")
fig.show()

In [None]:
event_color = col.get("ocher")
pitch = Pitch(pitch_type='statsbomb', line_color='grey')
fig, ax = pitch.draw(figsize=(16, 11), constrained_layout=True, tight_layout=False)
pitch.scatter(df_interception["x"], df_interception["y"], alpha = .4, s = 225, color=event_color, ax=ax)
# pitch.scatter(df_interception["x"], df_interception["y"], alpha = .4, s = 225, color=event_color, ax=ax)
ax.axis("equal")

In [None]:
df_duel[bind_successful_duel].shape