In [47]:
import pandas as pd
import numpy as np

In [48]:
# Gets a list of competitions (with seasons and leagues) that are freely available on statsbomb:
competitions = pd.read_json('https://raw.githubusercontent.com/statsbomb/open-data/master/data/competitions.json')

In [49]:
# Adds the paths where the data of the competitions are stored:
path_list = []
for i in competitions.index:
    comp_id, season_id = competitions.iloc[i][['competition_id', 'season_id']]
    path = f'https://raw.githubusercontent.com/statsbomb/open-data/master/data/matches/{comp_id}/{season_id}.json'
    path_list.append(path)

competitions['path'] = path_list

In [50]:
# Gets the data of the competitions and combines them in a single dictionary:
matches_dict = {}
for i in competitions.index:
    if 'World Cup' in competitions.iloc[i].competition_name:
        continue
    matches_dict[f'{competitions.iloc[i].competition_name} {competitions.iloc[i].season_name}'] = pd.read_json(competitions.iloc[i].path)

In [51]:
# Loads the list of matches from the competitions:
for comp in matches_dict.values():
    comp['path'] = comp.match_id.apply(lambda x: f'https://raw.githubusercontent.com/statsbomb/open-data/master/data/events/{x}.json')

In [52]:
# Function that gets the winner of a match:
def get_winner(match):
    if match['home_score'] > match['away_score']:
        winner = match['home_team']
    if match['home_score'] < match['away_score']:
        winner = match['away_team']
    if match['home_score'] == match['away_score']:
        winner = 'draw'
    return winner

In [53]:
# Function that gets the team that has been leading before the shot:
def leader_function(shots, home_team, away_team):
    leader_list = []
    goals_home_team = 0
    goals_away_team = 0
    i = 0
    
    for shot_id in shots.index:
        while i < shot_id:
            if shots.iloc[i].outcome == 'Goal' or shots.iloc[i].outcome == 'Own Goal':
                if shots.iloc[i].team  == home_team:
                    goals_home_team += 1
                else:
                    goals_away_team += 1
            i += 1
        if goals_home_team > goals_away_team:
            leader = home_team
        elif goals_away_team > goals_home_team:
            leader = away_team
        else:
            leader = 'draw'
        leader_list.append(leader)
        
    return leader_list

In [54]:
# Function that gets a frame of all shots of a match:
def get_shots(match):
    home_team = match['home_team']
    away_team = match['away_team']
    events = pd.read_json(match['path'])
    match_shots = events[events.shot.notnull()]
    shot_list = []
    
    for key in events.shot.keys():
        if events['type'][key]['name'] == 'Own Goal Against':
            outcome = 'Own Goal'
            minute = events['minute'][key]
            half = events['period'][key]
            xG = 'Own Goal'
            team = events['possession_team'][key]['name']
            shot_list.append([minute, half, outcome, xG, team])
        elif key in match_shots.shot.keys():
            outcome = match_shots['shot'][key]['outcome']['name']
            minute = match_shots['minute'][key]
            half = match_shots['period'][key]
            xG = match_shots['shot'][key]['statsbomb_xg']
            team = match_shots['possession_team'][key]['name']
            shot_list.append([minute, half, outcome, xG, team])

    shots = pd.DataFrame(shot_list, columns = ['minute', 'half', 'outcome', 'xG', 'team']).reset_index(drop=True)
    shots['leader'] = leader_function(shots, home_team, away_team)
    shots['length_half1'] = events[events.period == 1].minute.max()
    shots['length_match'] = events.minute.max()
    return shots

In [55]:
# Gets a dictionary of all events of a match along with some other data:
events_dict = {}
for comp_key, comp in matches_dict.items():
    for key in comp.home_team.keys():
        match_key = f'''{comp.home_team[key]['home_team_name']} - {comp.away_team[key]['away_team_name']}, {comp_key}'''
        events_dict[match_key] = {}
        events_dict[match_key]['competition'] = comp.competition[key]['competition_name']
        events_dict[match_key]['season'] = comp.season[key]['season_name']
        events_dict[match_key]['match_date'] = comp.match_date[key]
        events_dict[match_key]['home_team'] = comp.home_team[key]['home_team_name']
        events_dict[match_key]['away_team'] = comp.away_team[key]['away_team_name']       
        events_dict[match_key]['home_score'] = comp.home_score[key]
        events_dict[match_key]['away_score'] = comp.away_score[key]
        events_dict[match_key]['path'] = comp.path[key]
        events_dict[match_key]['winner'] = get_winner(events_dict[match_key])
        events_dict[match_key]['data'] = get_shots(events_dict[match_key])

In [56]:
# Function that converts values of a dict to prepare them for conversion to a .json-file.
import datetime

def myconverter(dictionary):
    for key, value in dictionary.items():
    # This part is adapted from https://stackoverflow.com/questions/50916422/python-typeerror-object-of-type-int64-is-not-json-serializable/50916741
        if isinstance(value, np.integer):
            dictionary[key] = int(value)
        elif isinstance(value, np.floating):
            dictionary[key] = float(value)
        elif isinstance(value, np.ndarray):
            dictionary[key] = value.tolist()
        elif isinstance(value, datetime.datetime):
            dictionary[key] = value.__str__()
        if type(value) == dict:
            myconverter(value)
    return dictionary

In [60]:
# Saving the frame as a .json-file:
import json

if type(competitions) == pd.core.frame.DataFrame:
    competitions = competitions.to_dict()

for key, value in matches_dict.items():
    if type(value) == pd.core.frame.DataFrame:
        matches_dict[key] = value.to_dict()

for value in events_dict.values():
    for sub_key, sub_value in value.items():
        if type(sub_value) == pd.core.frame.DataFrame:
            value[sub_key] = sub_value.to_dict()
      
statsbomb_data = {'competitions': competitions, 'matches': matches_dict, 'events': events_dict}

statsbomb_data = myconverter(statsbomb_data)

with open('statsbomb_data.json', 'w') as file:
    json.dump(statsbomb_data, file)