In [None]:
!pip install statsbombpy kloppy polars pyarrow

### StatsBomb Open Event Data

[Hudl StatsBomb](https://www.hudl.com/en_gb/products/statsbomb) is a big data provider that offers free data for research purposes.

They also offer an API, usable through their `statsbombpy` Python package. 


In [1]:
import numpy as np

from kloppy import statsbomb
import warnings
from statsbombpy.api_client import NoAuthWarning

warnings.filterwarnings('ignore', category=NoAuthWarning)
from statsbombpy import sb

### World Cup

World Cup is competition_id 43, each World Cup has it's own season_id too, namely `3` for World Cup 2018, and `106` for World Cup 2022. 

Using the StatsBomb Python package we can load the matches easily into a DataFrame. 

In [None]:
matches = sb.matches(competition_id=43, season_id=106)
matches

### Using Kloppy we can directly load this data

In [None]:
import polars as pl
pl.Config(tbl_rows=64, tbl_cols=100)    

cm_matches = (
    pl.from_pandas(matches)
    .filter((
        (pl.col("home_team") == 'Cameroon') | (pl.col("away_team") == "Cameroon")
    ))
)
cm_matches

match_id,match_date,kick_off,competition,season,home_team,away_team,home_score,away_score,match_status,match_status_360,last_updated,last_updated_360,match_week,competition_stage,stadium,referee,home_managers,away_managers,data_version,shot_fidelity_version,xy_fidelity_version
i64,str,str,str,str,str,str,i64,i64,str,str,str,str,i64,str,str,str,str,str,str,str,str
3857259,"""2022-11-28""","""12:00:00.000""","""International - FIFA World Cup""","""2022""","""Cameroon""","""Serbia""",3,3,"""available""","""available""","""2023-05-02T06:22:00.966145""","""2023-06-20T16:12:11.659469""",2,"""Group Stage""","""Al Janoub Stadium ""","""Mohamed Abdulla Hassan Mohd""","""Rigobert Song Bahanag""","""Dragan Stojković""","""1.1.0""","""2""","""2"""
3857290,"""2022-11-24""","""12:00:00.000""","""International - FIFA World Cup""","""2022""","""Switzerland""","""Cameroon""",1,0,"""available""","""available""","""2023-02-28T18:50:54.855427""","""2023-04-27T00:35:35.819237""",1,"""Group Stage""","""Al Janoub Stadium ""","""Facundo Tello Figueroa""","""Murat Yakin""","""Rigobert Song Bahanag""","""1.1.0""","""2""","""2"""
3857280,"""2022-12-02""","""21:00:00.000""","""International - FIFA World Cup""","""2022""","""Cameroon""","""Brazil""",1,0,"""available""","""available""","""2023-02-19T06:21:58.854769""","""2023-04-26T23:48:44.801077""",3,"""Group Stage""","""Lusail Stadium""","""Ismail Elfath""","""Rigobert Song Bahanag""","""Adenor Leonardo Bacchi""","""1.1.0""","""2""","""2"""


In [31]:
match_id = cm_matches['match_id'][0]
statsbomb_dataset = statsbomb.load(
    event_data=f"https://raw.githubusercontent.com/statsbomb/open-data/master/data/events/{match_id}.json",
    lineup_data=f"https://raw.githubusercontent.com/statsbomb/open-data/master/data/lineups/{match_id}.json",
    three_sixty_data=f"https://raw.githubusercontent.com/statsbomb/open-data/master/data/three-sixty/{match_id}.json"
)
statsbomb_dataset

<EventDataset record_count=3388>