In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

from statsbombpy import sb
from statsbombpy.api_client import NoAuthWarning

from warnings import filterwarnings
filterwarnings('ignore', category=NoAuthWarning)

In [None]:
# sb.competitions()

In [None]:
invincibles_df = sb.matches(competition_id=2, season_id=44)
invincibles_df = invincibles_df.sort_values('match_date').reset_index(drop=True)

In [None]:
# convert list column to two string columns
list_to_string = lambda x: ','.join([str(i) for i in x])

# yessirrrrrrr
def preprocessing_events_df(
    events_df,
    o_cols=['player', 'location', 'position', 'type', 'pass_end_location', 'shot_outcome', 'dribble_outcome', 'pass_cross', 'shot_statsbomb_xg'],
    o_attrs=['Pass', 'Shot', 'Dribble', 'Cross']
    ):
    '''
    Return dataframe that contains offense-related metrics
    found in `offensive_cols` and `offensive_attrs`

    > events_df: play-by-play dataframe of team formations,
                 match start/finish, and on-ball actions
    '''

    # events from specific match with valid on-ball player data
    nonempty_df = events_df[(events_df['player_id'].notna()) & (events_df['team'] == 'Arsenal')][o_cols]

    # select specific offensive actions (types)
    nonempty_df = nonempty_df[nonempty_df['type'].isin(o_attrs)]

    # split x,y coordinates
    nonempty_df = pd.merge(
        nonempty_df,
        nonempty_df['location'].apply(list_to_string).str.split(',', expand=True),
        left_index=True, right_index=True, how='outer'
        )
    nonempty_df.rename(columns={0:'location_x', 1:'location_y'}, inplace=True)
    nonempty_df = pd.merge(
        nonempty_df,
        nonempty_df[nonempty_df['type'] == 'Pass']['pass_end_location'].apply(list_to_string).str.split(',', expand=True),
        left_index=True, right_index=True, how='outer'
        )
    nonempty_df.rename(columns={0:'pass_end_x', 1:'pass_end_y'}, inplace=True)

    # update type column to include crosses
    nonempty_df['type'] = np.where(nonempty_df['pass_cross'] == 1, 'Cross', nonempty_df['type'])

    # return dataframe with desired events
    return nonempty_df.drop(columns=['location', 'pass_end_location'])

In [None]:
master_df = pd.concat([
    preprocessing_events_df(sb.events(match_id=idx))
    for idx in invincibles_df['match_id']
]).reset_index(drop=True)

In [None]:
master_df

In [None]:
grouped_df = master_df.groupby(['player', 'type'])

In [None]:
player_dict = {player : dict() for player in master_df['player'].unique()}

for player_type, type_df in grouped_df:
    player_dict[player_type[0]][player_type[1]] = type_df

In [None]:
th_shot = player_dict['Thierry Henry']['Shot']

In [None]:
import matplotlib.patheffects as path_effects
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.colors import LinearSegmentedColormap
from scipy.ndimage import gaussian_filter

from mplsoccer import Pitch, VerticalPitch, FontManager
from mplsoccer.statsbomb import read_event, EVENT_SLUG

df = th_shot[['location_x', 'location_y']].astype('float64')

In [None]:
# Tom Decroos, author of `matplotsoccer <https://github.com/TomDecroos/matplotsoccer>`_,
# asked whether it was possible to plot a Gaussian smoothed heatmap,
# which are available in matplotsoccer. Here is an example demonstrating this.

# setup pitch
pitch = Pitch(pitch_type='statsbomb', line_zorder=2,
              pitch_color='#22312b', line_color='#efefef')
# draw
fig, ax = pitch.draw(figsize=(6.6, 4.125))
fig.set_facecolor('#22312b')
bin_statistic = pitch.bin_statistic(df.location_x, df.location_y, statistic='count', bins=(24, 25))
bin_statistic['statistic'] = gaussian_filter(bin_statistic['statistic'], 1)
pcm = pitch.heatmap(bin_statistic, ax=ax, cmap='hot', edgecolors='#22312b')
# Add the colorbar and format off-white
cbar = fig.colorbar(pcm, ax=ax, shrink=0.6)
cbar.outline.set_edgecolor('#efefef')
cbar.ax.yaxis.set_tick_params(color='#efefef')
plt.setp(plt.getp(cbar.ax.axes, 'yticklabels'), color='#efefef')
plt.show()

In [None]:
temp_df = sb.events(match_id=idx)
arsenal_temp = temp_df[temp_df['team'] == 'Arsenal']
# temp_df['shot_statsbomb_xg'].value_counts()


In [None]:
arsenal_temp['shot_outcome'].value_counts()

In [None]:
arsenal_temp.columns

In [None]:
def gather_team_data(match_id, events_df):
    return pd.DataFrame(
        {match_id : {
            'xG' : events_df['shot_statsbomb_xg'].astype('float64').sum(),
            'shots' : events_df[events_df['type'] == 'Shot'].shape[0],
            'passes' : events_df[events_df['type'] == 'Pass'].shape[0],
            'dribbles' : events_df[events_df['type'] == 'Dribble'].shape[0],
            'goals' : events_df[events_df['shot_outcome'] == 'Goal'].shape[0]
        }}
    ).T
    

arsenal_summary_statistics = pd.concat([
    gather_team_data(idx, sb.events(match_id=idx))
    for idx in invincibles_df['match_id']
])

In [None]:
arsenal_summary_statistics

In [None]:
fm = FontManager()
fm_rubik = FontManager(('https://github.com/google/fonts/blob/main/ofl/rubikmonoone/'
                        'RubikMonoOne-Regular.ttf?raw=true'))

In [None]:
arsenal_shot_df = master_df[master_df['type'] == 'Shot']
arsenal_shot_df

In [None]:
vertical_pitch = VerticalPitch(half=True, pad_top=0.05, pad_right=0.05, pad_bottom=0.05,
                               pad_left=0.05, line_zorder=2)

fig, axs = vertical_pitch.jointgrid(figheight=10, left=None, bottom=None,  # center aligned
                                    grid_width=0.95, marginal=0.1,
                                    # setting up the heights/space so it takes up 95% of the figure
                                    grid_height=0.80,
                                    title_height=0.1, endnote_height=0.03,
                                    title_space=0.01, endnote_space=0.01,
                                    axis=False,  # turn off title/ endnote/ marginal axes
                                    # here we filter out the left and top marginal axes
                                    ax_top=False, ax_bottom=True,
                                    ax_left=False, ax_right=True)
# typical shot map where the scatter points vary by the expected goals value
# using alpha for transparency as there are a lot of shots stacked around the six-yard box
sc_team2 = vertical_pitch.scatter(arsenal_shot_df['location_x'].astype('float64'), arsenal_shot_df['location_y'].astype('float64'),  s=arsenal_shot_df['shot_statsbomb_xg'] * 700,
                                  alpha=0.5, ec='black', color='#db0007', ax=axs['pitch'])
# kdeplots on the marginals
# remember to flip the coordinates y=x, x=y for the marginals when using vertical orientation
team2_hist_x = sns.kdeplot(y=arsenal_shot_df['location_x'].astype('float64'), ax=axs['right'], color='#db0007', shade=True)
team2_hist_y = sns.kdeplot(x=arsenal_shot_df['location_y'].astype('float64'), ax=axs['bottom'], color='#db0007', shade=True)
# txt1 = axs['pitch'].text(x=40, y=80, s='Arsenal', fontproperties=fm_rubik.prop, color=pitch.line_color,
#                          ha='center', va='center', fontsize=60)

# titles and endnote
axs['title'].text(0.5, 0.7, "Arsenal Shooting Chart", color='#db0007',
                  fontproperties=fm_rubik.prop, fontsize=18, ha='center', va='center')
axs['title'].text(0.5, 0.3, "2003/04", color='#db0007',
                  fontproperties=fm_rubik.prop, fontsize=12, ha='center', va='center')

plt.show()