# Packages

In [1]:
%pip install statsbombpy


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3.12 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import json
import time
from pathlib import Path
from datetime import datetime
import ast

import pandas as pd
import geopandas as gpd
import numpy as np
from statsbombpy import sb

In [3]:
PROJECT_ROOT = Path.cwd().resolve().parent
DATA_DIR = PROJECT_ROOT / "data"
PROCESSED_DIR = DATA_DIR / "processed"
RAW_DIR = DATA_DIR / "raw"

# Getting the competition ids

In [4]:
# loading the liga games from the 2015/16 season
competitions = sb.competitions()[(sb.competitions()["season_name"] == "2015/2016") & (sb.competitions()["competition_name"] != "Champions League")]["competition_id"]
season_id = 27 # for 2015/16 season
sb.competitions()[(sb.competitions()["competition_id"].isin(competitions)) & (sb.competitions()["season_id"] == season_id)]["competition_name"]



1      1. Bundesliga
43           La Liga
60           Ligue 1
64    Premier League
66           Serie A
Name: competition_name, dtype: object

# Getting all the match ids for the relevant competitions

In [5]:
match_ids = {}
for competition_id in competitions:
    match_ids[competition_id] = list(sb.matches(competition_id=competition_id, season_id=season_id)["match_id"])



# Getting all the events for all match ids

In [7]:
events = pd.DataFrame()

for key, match_id_list in match_ids.items():
    for match_id in match_id_list:
        shot_events = sb.events(match_id, filters={"type.name": "Shot"})
        events = pd.concat([events, shot_events], axis=0, ignore_index=True)



# Selecting the relevant columns and saving the data

In [14]:
football_data = events.loc[:,["id", "index", "match_id", "period", 
                              "timestamp", "second", "minute", "team", 
                              "team_id", "player", "player_id", "position", 
                              "play_pattern", "type", "under_pressure", 
                              "shot_deflected", "shot_open_goal", "shot_type", 
                              "shot_statsbomb_xg", "shot_freeze_frame", 
                              "location", "shot_outcome", "shot_body_part", 
                              "shot_first_time", "shot_technique", 
                              "shot_one_on_one"]]


football_data.to_csv(RAW_DIR / "shot_events_2015_16.csv", index=False)
football_data.to_pickle(RAW_DIR / "shot_events_2015_16.pkl")