**Disclaimer**: this notebook's compatibility with StatsBomb event data 4.0.0 was last checked on January 3rd, 2021

In [1]:
%load_ext autoreload
%autoreload 2
import os
import warnings
import pandas as pd
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
import tqdm


In [2]:
import socceraction.spadl as spadl
import socceraction.spadl.statsbomb as statsbomb
# import .statsbomb as statsbomb
# from .statsbomb import statsbomb as statsbomb

import socceraction.atomic.spadl as atomicspadl

## Set up the statsbombloader

In [3]:
# Use this if you only want to use the free public statsbomb data
free_open_data_remote = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/"
SBL = statsbomb.StatsBombLoader(root=free_open_data_remote, getter="remote")

# # Uncomment the code below if you have a local folder on your computer with statsbomb data
#datafolder = "../data-epl" # Example of local folder with statsbomb data
#SBL = statsbomb.StatsBombLoader(root=datafolder,getter="local")
SBL

<socceraction.spadl.statsbomb.StatsBombLoader at 0x236fef8f7c0>

## Select competitions to load and convert

In [4]:
# View all available competitions
competitions = SBL.competitions()
set(competitions.competition_name)

{'Champions League',
 "FA Women's Super League",
 'FIFA World Cup',
 'La Liga',
 'NWSL',
 'Premier League',
 "Women's World Cup"}

In [5]:
# Fifa world cup
# selected_competitions = competitions[competitions.competition_name == "FIFA World Cup"]
# selected_competitions = competitions[competitions.competition_name == "FIFA World Cup"]
selected_competitions = competitions[competitions.competition_name == "Champions League"]
# selected_competitions = competitions[competitions.competition_name == "NWSL"]
# selected_competitions = competitions[competitions.competition_name == "Premier League"]
# selected_competitions = competitions[competitions.competition_name == "Women's World Cup"]

# selected_competitions = competitions

# # Messi data
# selected_competitions = competitions[competitions.competition_name == "La Liga"]

# FA Women's Super League
# selected_competitions = competitions[competitions.competition_name == "FA Women's Super League"]
selected_competitions

Unnamed: 0,season_id,competition_id,competition_name,country_name,competition_gender,season_name
0,4,16,Champions League,Europe,male,2018/2019
1,1,16,Champions League,Europe,male,2017/2018
2,2,16,Champions League,Europe,male,2016/2017
3,27,16,Champions League,Europe,male,2015/2016
4,26,16,Champions League,Europe,male,2014/2015
5,25,16,Champions League,Europe,male,2013/2014
6,24,16,Champions League,Europe,male,2012/2013
7,23,16,Champions League,Europe,male,2011/2012
8,22,16,Champions League,Europe,male,2010/2011
9,21,16,Champions League,Europe,male,2009/2010


In [6]:
# Get matches from all selected competitions
games = list(
    SBL.games(row.competition_id, row.season_id)
    for row in selected_competitions.itertuples()
)
games = pd.concat(games, sort=True).reset_index(drop=True)
games[["home_team_id", "away_team_id", "game_date", "home_score", "away_score"]]
# list(games.itertuples())

KeyError: "['venue', 'referee_id'] not in index"

## Load and convert match data

In [None]:
games_verbose = tqdm.tqdm(list(games.itertuples()), desc="Loading game data")
teams, players = [],[]
actions = {}
atomic_actions = {}
for game in games_verbose:
    # load data
    teams.append(SBL.teams(game.game_id))
    players.append(SBL.players(game.game_id))
    events = SBL.events(game.game_id)
    
    # convert data
    actions = statsbomb.convert_to_actions(events, game.home_team_id)
    atomic_actions[game.game_id] = atomicspadl.convert_to_atomic(actions)

teams = pd.concat(teams).drop_duplicates("team_id").reset_index(drop=True)
players = pd.concat(players).reset_index(drop=True)

In [None]:
# set(games_verbose.game.game_id)

## Store converted spadl data in a h5-file

In [None]:

datafolder = "../../static/data/Statsbomb/data"


# Create data folder if it doesn't exist
if not os.path.exists(datafolder):
    os.mkdir(datafolder)
    print(f"Directory {datafolder} created.")

spadl_h5 = os.path.join(datafolder, "atomic-spadl-statsbomb.h5")

# Store all spadl data in h5-file
with pd.HDFStore(spadl_h5) as spadlstore:
    spadlstore["competitions"] = selected_competitions
    spadlstore["games"] = games
    spadlstore["teams"] = teams
    spadlstore["players"] = players[['player_id', 'player_name', 'nickname']].drop_duplicates(subset='player_id')
    spadlstore["player_games"] = players[['player_id', 'game_id', 'team_id', 'is_starter', 'starting_position_id', 'starting_position_name', 'minutes_played']]
    for game_id in atomic_actions.keys():
        spadlstore[f"atomic_actions/game_{game_id}"] = atomic_actions[game_id]

    spadlstore["results"] = spadl.results_df()
    spadlstore["bodyparts"] = spadl.bodyparts_df()
    spadlstore["atomic_actiontypes"] = atomicspadl.actiontypes_df()

## Plot the spadl data
Extra library required: ```pip install matplotsoccer```

In [None]:
# Select England vs Belgium game at World Cup
with pd.HDFStore(spadl_h5) as spadlstore:
    games = (
        spadlstore["games"]
        .merge(spadlstore["competitions"], how='left')
        .merge(spadlstore["teams"].add_prefix('home_'), how='left')
        .merge(spadlstore["teams"].add_prefix('away_'), how='left'))
#     print(games)
    game = games[(games.game_id == 7525) ]
#     game = games[(games.competition_name == "FIFA World Cup") 
#                   & (games.away_team_name == "England")
#                   & (games.home_team_name == "Belgium")]
    game_id = game.game_id.values[0]
    atomic_actions = spadlstore[f"atomic_actions/game_{game_id}"]
    atomic_actions = (
        atomic_actions
        .merge(spadlstore["atomic_actiontypes"], how="left")
        .merge(spadlstore["bodyparts"], how="left")
        .merge(spadlstore["players"], how="left")
        .merge(spadlstore["teams"], how="left")
    )

# use nickname if available else use full name
atomic_actions["player_name"] = atomic_actions[["nickname", "player_name"]].apply(lambda x: x[0] if x[0] else x[1],axis=1)
del atomic_actions['nickname']
actions[:5]

In [None]:
import matplotsoccer
import matplotlib.pyplot as plt

images = []
for shot in list(atomic_actions[(atomic_actions.type_name == "goal")].index):
    a = atomic_actions[shot-8:shot+1].copy()

    a["start_x"] = a.x
    a["start_y"] = a.y
    a["end_x"] = a.x + a.dx
    a["end_y"] = a.y + a.dy

    g = game.iloc[0]
    minute = int((a.period_id.values[0] - 1) * 45 + a.time_seconds.values[0] // 60)
    game_info = f"{g.game_date} {g.home_team_name} {g.home_score}-{g.away_score} {g.away_team_name} {minute + 1}'"
    print(game_info)

    def nice_time(row):
        minute = int((row.period_id-1) * 45 + row.time_seconds // 60)
        second = int(row.time_seconds % 60)
        return f"{minute}m{second}s"

    a["nice_time"] = a.apply(nice_time,axis=1)
    labels = a[["nice_time", "type_name", "player_name", "team_name"]]
    
    fig = plt.gcf()
    
    images.append(matplotsoccer.actions(
                                    location=a[["start_x", "start_y", "end_x", "end_y"]],
                                    action_type=a.type_name,
                                    team= a.team_name,
                                    label=labels,
                                    labeltitle=["time", "actiontype", "player_name", "team"],
                                    zoom=False,
                                    figsize=6,
                                    show=False,
                                    show_legend=False
                                ))
    fig.savefig('tessstttyyy.png')
    
images