In [1]:
import glob
import json
import pandas as pd

teams = ['homeTeam', 'awayTeam']

goals = []
red_cards = []
yellow_red_cards = []
unknown_actions = []

json_files = glob.glob("/Users/petebrown/Developer/update-from-bbc/bbc-json/matches/*.json")

for json_file in json_files:
    with open(json_file, 'r') as f:
        data = json.load(f)

        if 'payload' in data.keys():
            if 'matchData' in data['payload'][0]['body'].keys():
                try:
                    data_root = data['payload'][0]['body']['matchData'][0]['tournamentDatesWithEvents']
                    match_keys = data_root.keys()
                except:
                    print(f'Skipping {json_file} - no tournamentDatesWithEvents key')
                    pass
            else:
                print(f'Skipping {json_file} - no matchData key')
                continue
        else:
            data_root = data['matchData'][0]['tournamentDatesWithEvents']
            match_keys = data_root.keys()

        match_keys = sorted(match_keys)

        for k in match_keys:
            events = data_root[k][0]['events']
            for event in events:
                game_date = event['startTime']
                for team in teams:
                    t = event[team]['name']['full']
                    s = pd.json_normalize(event[team]['scores'])
                    if 'playerActions' not in event[team].keys():
                        continue
                    else:
                        plrs = event[team]['playerActions']
                        for plr in plrs:
                            p = pd.json_normalize(plr['name'])
                            
                            actions = plr['actions']

                            for action in actions:
                                a = pd.json_normalize(action)
                                
                                act = pd.concat([a, p, s], axis=1)
                                act['team'] = team
                                act['team_name'] = t
                                act['date'] = game_date
                                if act['type'][0] == 'goal':
                                    goals.append(act)
                                elif act['type'][0] == 'red-card':
                                    red_cards.append(act)
                                elif act['type'][0] == 'yellow-red-card':
                                    yellow_red_cards.append(act)
                                else:
                                    unknown_actions.append(act['type'][0])
                                    print(f'Unknown action: {act["type"][0]}')

goals_df = pd.concat(goals, axis=0, ignore_index=True).drop_duplicates()
red_cards_df = pd.concat(red_cards, axis=0, ignore_index=True).drop_duplicates()
yellow_red_cards_df = pd.concat(yellow_red_cards, axis=0, ignore_index=True).drop_duplicates()

Skipping /Users/petebrown/Developer/update-from-bbc/bbc-json/matches/2015-07-01.json - no tournamentDatesWithEvents key
Skipping /Users/petebrown/Developer/update-from-bbc/bbc-json/matches/2018-06-01.json - no tournamentDatesWithEvents key
Skipping /Users/petebrown/Developer/update-from-bbc/bbc-json/matches/2015-06-01.json - no tournamentDatesWithEvents key
Skipping /Users/petebrown/Developer/update-from-bbc/bbc-json/matches/2022-06-01.json - no tournamentDatesWithEvents key
Skipping /Users/petebrown/Developer/update-from-bbc/bbc-json/matches/2017-07-01.json - no tournamentDatesWithEvents key
Skipping /Users/petebrown/Developer/update-from-bbc/bbc-json/matches/2020-07-01.json - no tournamentDatesWithEvents key
Skipping /Users/petebrown/Developer/update-from-bbc/bbc-json/matches/2012-06-01.json - no tournamentDatesWithEvents key
Skipping /Users/petebrown/Developer/update-from-bbc/bbc-json/matches/2020-06-01.json - no tournamentDatesWithEvents key
Skipping /Users/petebrown/Developer/upda

In [2]:
goals_df = goals_df.sort_values(['date', 'timeElapsed', 'addedTime'])

goals_df['home_goal'] =  goals_df['team'].apply(lambda x: x=='homeTeam').astype(int)
goals_df['away_goal'] =  goals_df['team'].apply(lambda x: x=='awayTeam').astype(int)
goals_df['home_score'] = goals_df.groupby('date')['home_goal'].cumsum()
goals_df['away_score'] = goals_df.groupby('date')['away_goal'].cumsum()

goals_df['trfc_score'] = goals_df.apply(lambda x: x['home_score'] if x['team'] == 'homeTeam' and x['team_name'] == 'Tranmere Rovers' else x['away_score'], axis=1)
goals_df['opp_score'] = goals_df.apply(lambda x: x['home_score'] if x['team'] == 'homeTeam' and x['team_name'] != 'Tranmere Rovers' else x['away_score'], axis=1)

goals_df

Unnamed: 0,type,timeElapsed,addedTime,penalty,ownGoal,displayTime,first,full,abbreviation,last,...,aggregateGoalsAway,team,team_name,date,home_goal,away_goal,home_score,away_score,trfc_score,opp_score
862,goal,34,0,False,False,34',Zoumana,Zoumana Bakayogo,Bakayogo,Bakayogo,...,,homeTeam,Tranmere Rovers,2012-08-18T15:00:00+01:00,1,0,1,0,1,0
863,goal,57,0,False,False,57',Jean-Louis,Jean-Louis Akpa Akpro,Akpa Akpro,Akpa Akpro,...,,homeTeam,Tranmere Rovers,2012-08-18T15:00:00+01:00,1,0,2,0,2,0
864,goal,65,0,False,False,65',Jean-Louis,Jean-Louis Akpa Akpro,Akpa Akpro,Akpa Akpro,...,,homeTeam,Tranmere Rovers,2012-08-18T15:00:00+01:00,1,0,3,0,3,0
865,goal,88,0,True,False,88',Michael,Michael Symes,Symes,Symes,...,,awayTeam,Leyton Orient,2012-08-18T15:00:00+01:00,0,1,3,1,1,1
872,goal,12,0,False,False,12',Andy,Andy Robinson,Robinson,Robinson,...,,awayTeam,Tranmere Rovers,2012-08-21T19:45:00+01:00,0,1,0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
966,goal,13,0,False,False,13',Omari,Omari Patrick,Patrick,Patrick,...,,homeTeam,Tranmere Rovers,2024-08-24T12:30:00+01:00,1,0,1,0,1,0
194,goal,38,0,False,False,38',Jordan,Jordan Ayew,J Ayew,Ayew,...,,homeTeam,Leicester City,2024-08-27T19:45:00+01:00,1,0,1,0,0,1
195,goal,51,0,True,False,51',Stephy,Stephy Mavididi,Mavididi,Mavididi,...,,homeTeam,Leicester City,2024-08-27T19:45:00+01:00,1,0,2,0,0,2
196,goal,71,0,False,False,71',Wilfred,Wilfred Ndidi,Ndidi,Ndidi,...,,homeTeam,Leicester City,2024-08-27T19:45:00+01:00,1,0,3,0,0,3


In [3]:
three_plus = goals_df.query("trfc_score==0 and opp_score>=3")['date']

goals_df[goals_df['date'].isin(three_plus)].groupby(['date', 'team_name']).agg(
    {'home_score': 'max', 'away_score': 'max', 'fullTime': 'max'}) \
    .query('team_name=="Tranmere Rovers"')

Unnamed: 0_level_0,Unnamed: 1_level_0,home_score,away_score,fullTime
date,team_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-03-08T15:00:00+00:00,Tranmere Rovers,3,1,1
2014-03-22T15:00:00+00:00,Tranmere Rovers,3,2,2
2015-11-24T19:45:00+00:00,Tranmere Rovers,4,1,1
2018-01-01T15:00:00+00:00,Tranmere Rovers,5,2,2
2022-03-05T15:00:00+00:00,Tranmere Rovers,3,2,2


In [4]:
red_cards_df = pd.concat(red_cards, axis=0)
red_cards_df

Unnamed: 0,type,timeElapsed,addedTime,displayTime,first,full,abbreviation,last,score,halfTime,fullTime,extraTime,shootout,aggregate,aggregateGoalsAway,team,team_name,date
0,red-card,90,4,90'+4,Rushian,Rushian Hepburn-Murphy,Hepburn-Murphy,Hepburn-Murphy,2,0,2,,,,,homeTeam,Tranmere Rovers,2019-08-03T15:00:00+01:00
0,red-card,45,0,45',James,James Wallace,Wallace,Wallace,1,1,1,,,,,homeTeam,Tranmere Rovers,2014-01-01T15:00:00+00:00
0,red-card,20,0,20',Richard,Richard Stearman,Stearman,Stearman,1,0,1,,,,,awayTeam,Wolverhampton Wanderers,2014-01-01T15:00:00+00:00
0,red-card,45,0,45',Kieran,Kieran Agard,Agard,Agard,1,1,1,,,,,homeTeam,Milton Keynes Dons,2019-11-02T15:00:00+00:00
0,red-card,6,0,6',Liam,Liam Palmer,Palmer,Palmer,0,0,0,,,,,homeTeam,Tranmere Rovers,2013-02-15T19:45:00+00:00
0,red-card,36,0,36',Jean-Louis,Jean-Louis Akpa Akpro,Akpa Akpro,Akpa Akpro,0,0,0,,,,,homeTeam,Tranmere Rovers,2013-02-02T15:00:00+00:00
0,red-card,38,0,38',Bradley,Bradley Wood,Wood,Wood,2,1,2,,,,,awayTeam,Lincoln City,2016-03-25T15:00:00+00:00
0,red-card,64,0,64',Ian,Ian Goodison,Goodison,Goodison,0,0,0,,,,,homeTeam,Tranmere Rovers,2013-10-19T15:00:00+01:00
0,red-card,90,3,90'+3,Elijah,Elijah Adebayo,Adebayo,Adebayo,0,0,0,,,,,awayTeam,Stevenage,2019-02-09T15:00:00+00:00
0,red-card,90,3,90'+3,Elijah,Elijah Adebayo,Adebayo,Adebayo,0,0,0,,,,,awayTeam,Stevenage,2019-02-09T15:00:00+00:00


In [5]:
yellow_red_cards_df = pd.concat(yellow_red_cards, axis=0).drop_duplicates()

yellow_red_cards_df

Unnamed: 0,type,timeElapsed,addedTime,displayTime,first,full,abbreviation,last,score,halfTime,fullTime,extraTime,shootout,aggregate,aggregateGoalsAway,team,team_name,date
0,yellow-red-card,76,0,76',Emmanuel,Emmanuel Monthe,Monthe,Monthe,1,0,1,,,,,awayTeam,Walsall,2023-01-14T15:00:00+00:00
0,yellow-red-card,62,0,62',Darren,Darren Carter,Carter,Carter,1,1,1,,,,,awayTeam,Forest Green Rovers,2016-03-19T15:00:00+00:00
0,yellow-red-card,72,0,72',Conor,Conor McAleny,McAleny,McAleny,1,0,1,,,,,homeTeam,Salford City,2023-12-26T15:00:00+00:00
0,yellow-red-card,45,3,45'+3,Connor,Connor Jennings,Jennings,Jennings,0,0,0,,,,,homeTeam,Tranmere Rovers,2019-10-05T15:00:00+01:00
0,yellow-red-card,64,0,64',Ben,Ben Tollitt,Tollitt,Tollitt,2,2,2,,,,,homeTeam,Tranmere Rovers,2016-11-12T15:00:00+00:00
0,yellow-red-card,75,0,75',Ben,Ben Nugent,Nugent,Nugent,0,0,0,,,,,awayTeam,Stevenage,2019-02-09T15:00:00+00:00
0,yellow-red-card,28,0,28',Elliot,Elliot Watt,Watt,Watt,1,1,1,,,,,homeTeam,Bradford City,2022-04-15T15:00:00+01:00
0,yellow-red-card,90,7,90'+7,Reggie,Reggie Lambe,Lambe,Lambe,1,1,1,,,,,homeTeam,Mansfield Town,2015-04-14T19:45:00+01:00
0,yellow-red-card,84,0,84',Conor,Conor Clifford,Clifford,Clifford,0,0,0,,,,,homeTeam,Boreham Wood,2016-08-16T19:45:00+01:00
0,yellow-red-card,52,0,52',Dominic,Dominic Bernard,Bernard,Bernard,0,0,0,,,,,awayTeam,Forest Green Rovers,2023-11-11T15:00:00+00:00


In [6]:
trfc_goals = goals_df[goals_df.team_name=='Tranmere Rovers'].copy()

trfc_goals = trfc_goals[['date', 'full', 'timeElapsed', 'addedTime', 'penalty', 'ownGoal']]

trfc_goals = trfc_goals \
    .rename(columns={
        'date': 'game_date',
        'full': 'player_name',
        'timeElapsed': 'goal_min',
        'addedTime': 'added_time',
        'ownGoal': 'own_goal'
    }) \
    .sort_values(['game_date', 'goal_min', 'added_time']).reset_index(drop=True)

trfc_goals['game_date'] = trfc_goals['game_date'].str.split('T').str[0]

trfc_goals['penalty'] = trfc_goals['penalty'].astype(int)

trfc_goals['own_goal'] = trfc_goals['own_goal'].astype(int)

trfc_goals['goal_no'] = trfc_goals.groupby(['game_date', 'player_name']).cumcount() + 1

trfc_goals = trfc_goals[['game_date', 'player_name', 'goal_no', 'goal_min', 'added_time', 'penalty', 'own_goal']]

trfc_goals

Unnamed: 0,game_date,player_name,goal_no,goal_min,added_time,penalty,own_goal
0,2012-08-18,Zoumana Bakayogo,1,34,0,0,0
1,2012-08-18,Jean-Louis Akpa Akpro,1,57,0,0,0
2,2012-08-18,Jean-Louis Akpa Akpro,2,65,0,0,0
3,2012-08-21,Andy Robinson,1,12,0,0,0
4,2012-08-21,Andy Robinson,2,19,0,0,0
...,...,...,...,...,...,...,...
499,2024-08-13,Harvey Saunders,1,3,0,0,0
500,2024-08-13,Josh Williams,1,45,0,0,0
501,2024-08-13,Omari Patrick,1,71,0,0,0
502,2024-08-20,Josh Davison,1,11,0,0,0


In [7]:
trfc_goals.to_csv('bbc-json-goals.csv', index=False)

In [None]:
goals_against = goals_df[goals_df.team_name!='Tranmere Rovers'].copy()

goals_against = goals_against[['date', 'full', 'timeElapsed', 'addedTime', 'penalty', 'ownGoal']]

goals_against = goals_against \
    .rename(columns={
        'date': 'game_date',
        'full': 'player_name',
        'timeElapsed': 'goal_min',
        'addedTime': 'added_time',
        'ownGoal': 'own_goal'
    }) \
    .sort_values(['game_date', 'goal_min', 'added_time']).reset_index(drop=True)

goals_against['game_date'] = goals_against['game_date'].str.split('T').str[0]

goals_against['penalty'] = goals_against['penalty'].astype(int)

goals_against['own_goal'] = goals_against['own_goal'].astype(int)

goals_against['goal_no'] = goals_against.groupby(['game_date', 'player_name']).cumcount() + 1

goals_against = goals_against[['game_date', 'player_name', 'goal_no', 'goal_min', 'added_time', 'penalty', 'own_goal']]

goals_against

Unnamed: 0,game_date,player_name,goal_no,goal_min,added_time,penalty,own_goal
0,2012-08-18,Zoumana Bakayogo,1,34,0,0,0
1,2012-08-18,Jean-Louis Akpa Akpro,1,57,0,0,0
2,2012-08-18,Jean-Louis Akpa Akpro,2,65,0,0,0
3,2012-08-21,Andy Robinson,1,12,0,0,0
4,2012-08-21,Andy Robinson,2,19,0,0,0
...,...,...,...,...,...,...,...
499,2024-08-13,Harvey Saunders,1,3,0,0,0
500,2024-08-13,Josh Williams,1,45,0,0,0
501,2024-08-13,Omari Patrick,1,71,0,0,0
502,2024-08-20,Josh Davison,1,11,0,0,0


In [10]:
goals_df.to_csv('bbc-json-goals-df.csv', index=False)

In [12]:
goals_against = goals_df[goals_df.team_name!='Tranmere Rovers'].copy()

goals_against = goals_against[['date', 'full', 'timeElapsed', 'addedTime', 'penalty', 'ownGoal']]

goals_against = goals_against \
    .rename(columns={
        'date': 'game_date',
        'full': 'player_name',
        'timeElapsed': 'goal_min',
        'addedTime': 'added_time',
        'ownGoal': 'own_goal'
    }) \
    .sort_values(['game_date', 'goal_min', 'added_time']).reset_index(drop=True)

goals_against['game_date'] = goals_against['game_date'].str.split('T').str[0]

goals_against['penalty'] = goals_against['penalty'].astype(int)

goals_against['own_goal'] = goals_against['own_goal'].astype(int)

goals_against['goal_no'] = goals_against.groupby(['game_date', 'player_name']).cumcount() + 1

goals_against = goals_against[['game_date', 'player_name', 'goal_no', 'goal_min', 'added_time', 'penalty', 'own_goal']]

goals_against.to_csv('bbc-json-goals-against.csv', index=False)