In [1]:
import os
import warnings
import pandas as pd
pd.set_option('display.max_columns', None)
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
warnings.filterwarnings(action="ignore", message="credentials were not supplied. open data access only")
import tqdm
from socceraction.data.statsbomb import StatsBombLoader
import socceraction.spadl as spadl

In [2]:
SBL = StatsBombLoader(getter="remote", creds={"user": None, "passwd": None})
competitions = SBL.competitions()
set(competitions.competition_name)

{'1. Bundesliga',
 'African Cup of Nations',
 'Champions League',
 'Copa del Rey',
 "FA Women's Super League",
 'FIFA U20 World Cup',
 'FIFA World Cup',
 'Indian Super league',
 'La Liga',
 'Liga Profesional',
 'Ligue 1',
 'Major League Soccer',
 'NWSL',
 'North American League',
 'Premier League',
 'Serie A',
 'UEFA Euro',
 'UEFA Europa League',
 "UEFA Women's Euro",
 "Women's World Cup"}

In [7]:
selected_competitions = competitions[
    (competitions.competition_name == "La Liga")
    & (competitions.season_name == "2019/2020")
]
selected_competitions

Unnamed: 0,season_id,competition_id,competition_name,country_name,competition_gender,season_name
37,42,11,La Liga,Spain,male,2019/2020


In [8]:
# Get games from all selected competitions
games = pd.concat([
    SBL.games(row.competition_id, row.season_id)
    for row in selected_competitions.itertuples()
])
games[["home_team_id", "away_team_id", "game_date", "home_score", "away_score"]]

Unnamed: 0,home_team_id,away_team_id,game_date,home_score,away_score
0,217,322,2020-02-22 16:00:00,5,0
1,217,205,2020-06-16 22:00:00,2,0
2,209,217,2020-06-27 17:00:00,2,2
3,217,220,2019-12-18 20:00:00,0,0
4,217,222,2019-09-24 21:00:00,2,1
5,222,217,2020-07-05 22:00:00,1,4
6,221,217,2019-11-02 16:00:00,3,1
7,217,1043,2019-12-07 21:00:00,5,2
8,210,217,2019-12-14 16:00:00,2,2
9,217,221,2020-02-02 21:00:00,2,1


In [10]:
games_verbose = tqdm.tqdm(list(games.itertuples()), desc="Loading game data")
teams, players = [], []
actions = {}
for game in games_verbose:
    # load data
    teams.append(SBL.teams(game.game_id))
    players.append(SBL.players(game.game_id))
    events = SBL.events(game.game_id)
    # convert data
    actions[game.game_id] = spadl.statsbomb.convert_to_actions(
        events, 
        home_team_id=game.home_team_id,
        xy_fidelity_version=1,
        shot_fidelity_version=1
    )

teams = pd.concat(teams).drop_duplicates(subset="team_id")
players = pd.concat(players)

  eventsdf["under_pressure"] = eventsdf["under_pressure"].fillna(False).astype(bool)
  eventsdf["counterpress"] = eventsdf["counterpress"].fillna(False).astype(bool)
  eventsdf["under_pressure"] = eventsdf["under_pressure"].fillna(False).astype(bool)
  eventsdf["counterpress"] = eventsdf["counterpress"].fillna(False).astype(bool)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  events['extra'].fillna({}, inplace=True)
  eventsdf["under_pressure"] = eventsdf["under_pressure"].fillna(False).astype(bool)
  eventsdf["counterpress"] = eventsdf["counterpress"].fillna(False).astype(bool)
  eventsdf["under_pressure"] = eventsdf["under_pressure"].fillna(False).astyp

In [None]:
datafolder = "../data-fifa"

# Create data folder if it doesn't exist
if not os.path.exists(datafolder):
    os.mkdir(datafolder)
    print(f"Directory {datafolder} created.")

spadl_h5 = os.path.join(datafolder, "spadl-statsbomb.h5")

# Store all spadl data in h5-file
with pd.HDFStore(spadl_h5) as spadlstore:
    spadlstore["competitions"] = selected_competitions
    spadlstore["games"] = games
    spadlstore["teams"] = teams
    spadlstore["players"] = players[['player_id', 'player_name', 'nickname']].drop_duplicates(subset='player_id')
    spadlstore["player_games"] = players[['player_id', 'game_id', 'team_id', 'is_starter', 'starting_position_id', 'starting_position_name', 'minutes_played']]
    for game_id in actions.keys():
        spadlstore[f"actions/game_{game_id}"] = actions[game_id]

In [None]:
pip install matplotsoccer

In [None]:
with pd.HDFStore(spadl_h5) as spadlstore:
    games = (
        spadlstore["games"]
        .merge(spadlstore["competitions"], how='left')
        .merge(spadlstore["teams"].add_prefix('home_'), how='left')
        .merge(spadlstore["teams"].add_prefix('away_'), how='left'))
    # Select England vs Belgium game at World Cup
    game = games[(games.competition_name == "FIFA World Cup") 
                  & (games.away_team_name == "England")
                  & (games.home_team_name == "Belgium")]
    game_id = game.game_id.values[0]
    actions = (
        spadlstore[f"actions/game_{game_id}"]
        .merge(spadl.actiontypes_df(), how="left")
        .merge(spadl.results_df(), how="left")
        .merge(spadl.bodyparts_df(), how="left")
        .merge(spadlstore["players"], how="left")
        .merge(spadlstore["teams"], how="left")
    )

# use nickname if available else use full name
actions["player_name"] = actions[["nickname", "player_name"]].apply(lambda x: x.iloc[0] if x.iloc[0] else x.iloc[1], axis=1)
del actions['nickname']
actions[:5]