In [36]:
import requests
import datetime

def request_json(url):
    r = requests.get(url)
    return r.json()

In [37]:
def today():
    return datetime.datetime.now().strftime('%Y-%m-%d')

In [38]:
game_date = today()

In [39]:
api_base = 'https://web-cdn.api.bbci.co.uk/wc-poll-data/container'

In [40]:
def get_fixtures(start_date, end_date=None, print_url=False):
    if not end_date:
        end_date = start_date
    fixtures = f'{api_base}/sport-data-scores-fixtures?selectedEndDate={end_date}&selectedStartDate={start_date}&todayDate={today()}&urn=urn%3Abbc%3Asportsdata%3Afootball%3Ateam%3Atranmere-rovers&useSdApi=false'
    if print_url:
        print(fixtures)
    return request_json(fixtures)['eventGroups']

In [41]:
fixture_info = get_fixtures(game_date, print_url=True)

https://web-cdn.api.bbci.co.uk/wc-poll-data/container/sport-data-scores-fixtures?selectedEndDate=2024-12-14&selectedStartDate=2024-12-14&todayDate=2024-12-14&urn=urn%3Abbc%3Asportsdata%3Afootball%3Ateam%3Atranmere-rovers&useSdApi=false


In [42]:
def get_resource_id(fixture_info):
    return fixture_info['secondaryGroups'][0]['events'][0]['tipoTopicId']

bbc_resource_id = get_resource_id(fixture_info[0])

bbc_resource_id

'c2054gey5n1t'

In [43]:
def get_match_id(fixture_info):
    return fixture_info['secondaryGroups'][0]['events'][0]['id']

bbc_match_id = get_match_id(fixture_info[0])

bbc_match_id

's-332awqf84nix3nx72koa8uo7o'

In [44]:
def get_match_stats(event_id):
    match_stats = f'{api_base}/match-stats?globalContainerPolling=true&urn=urn%3Abbc%3Asportsdata%3Afootball%3Aevent%3A{event_id}'
    return request_json(match_stats)

match_stats = get_match_stats(bbc_match_id)

In [45]:
def get_match_info(resource_id, game_date, match_id):
    match_info = f'{api_base}/live-header?assetId={resource_id}&endDateTime={game_date}&globalContainerPolling=true&isInternational=true&liveExperienceCrowdCount=true&showMSI=false&showMedia=true&sportDataEventUrn=urn%3Abbc%3Asportsdata%3Afootball%3Aevent%3A{match_id}&sportDiscipline=football&startDateTime={game_date}&uasEnv=live'
    return request_json(match_info)

match_info = get_match_info(bbc_resource_id, game_date, bbc_match_id)

In [46]:
def get_sameday_fixtures(event_id):
    sameday_fixtures = f'{api_base}/football-on-the-day-events?globalContainerPolling=true&matchUrn=urn%3Abbc%3Asportsdata%3Afootball%3Aevent%3A{event_id}'
    return request_json(sameday_fixtures)

sameday_fixtures = get_sameday_fixtures(bbc_match_id)

In [47]:
def get_table(date_today, event_id):
    table = f'{api_base}/football-table?globalContainerPolling=true&matchDate={date_today}&matchUrn=urn%3Abbc%3Asportsdata%3Afootball%3Aevent%3A{event_id}'
    return request_json(table)

table = get_table(today(), bbc_match_id)

In [48]:
def get_lineups(event_id):
    line_ups = f'{api_base}/match-lineups?globalContainerPolling=true&urn=urn%3Abbc%3Asportsdata%3Afootball%3Aevent%3A{event_id}'
    return request_json(line_ups)

lineups = get_lineups(bbc_match_id)

In [49]:
def get_commentary_url(match_id, page_no):
    return f'https://www.bbc.com/wc-data/container/stream?globalContainerPolling=true&liveTextStreamId={match_id}&pageNumber={page_no}&pageSize=20&pageUrl=%2Fsport%2Ffootball%2Flive%2Fc0mn93jz28nt&type=football'

def get_commentary(match_id, page_no=1):
    commentary = get_commentary_url(match_id, page_no)

    all_commentary = []
    page_1 = request_json(commentary)
    if not 'error' in page_1.keys():
        all_commentary.append(page_1)
        
        n_pages = page_1['page']['total']
        for i in range(2, n_pages+1):
            commentary = get_commentary_url(match_id, i)
            all_commentary.append(request_json(commentary))

        return all_commentary

commentary = get_commentary(bbc_match_id)

In [50]:
def get_match_json(game_date):
    fixture_info = get_fixtures(game_date)[0]

    bbc_resource_id = get_resource_id(fixture_info)

    bbc_match_id = get_match_id(fixture_info)

    match_stats = get_match_stats(bbc_match_id)

    match_info = get_match_info(bbc_resource_id, game_date, bbc_match_id)

    lineups = get_lineups(bbc_match_id)

    table = get_table(today(), bbc_match_id)

    sameday_fixtures = get_sameday_fixtures(bbc_match_id)

    commentary = get_commentary(bbc_match_id)

    return {
        'fixture_info': fixture_info,
        'match_stats': match_stats,
        'match_info': match_info,
        'lineups': lineups,
        'table': table,
        'sameday_fixtures': sameday_fixtures,
        'commentary': commentary
    }

In [51]:
# game_date = '2024-10-08'

fixture_info = get_fixtures(game_date)[0]

bbc_resource_id = get_resource_id(fixture_info)

bbc_match_id = get_match_id(fixture_info)

match_stats = get_match_stats(bbc_match_id)

match_info = get_match_info(bbc_resource_id, game_date, bbc_match_id)

lineups = get_lineups(bbc_match_id)

table = get_table(today(), bbc_match_id)

sameday_fixtures = get_sameday_fixtures(bbc_match_id)

commentary = get_commentary(bbc_match_id)

In [52]:
import json
import os

def save_match_json(game_date):
    bbc_json = get_match_json(game_date)

    print(f"~~~~~~ Saving data for {game_date} ~~~~~~")

    for key, value in bbc_json.items():
        dir = f"./bbc-json/{key}"
        os.makedirs(dir, exist_ok=True)
        
        filename = f"{dir}/{game_date}.json"
        
        with open(filename, 'w') as outfile:
            json.dump(value, outfile)
            print(f"Saved {filename}" )

In [53]:
save_match_json(game_date)

~~~~~~ Saving data for 2024-12-14 ~~~~~~
Saved ./bbc-json/fixture_info/2024-12-14.json
Saved ./bbc-json/match_stats/2024-12-14.json
Saved ./bbc-json/match_info/2024-12-14.json
Saved ./bbc-json/lineups/2024-12-14.json
Saved ./bbc-json/table/2024-12-14.json
Saved ./bbc-json/sameday_fixtures/2024-12-14.json
Saved ./bbc-json/commentary/2024-12-14.json


In [54]:
import pandas as pd

def name_json_file(date):
    return f'./bbc-json/table/{date}.json'

def request_json(file):
    with open(file) as f:
        data = json.load(f)
    return data

def get_divs(data):
    if not 'error' in data.keys():
        return data['tournaments'][0]['stages'][0]['rounds']
    else:
        return None

def find_tranmere_div(divs):
    if not divs:
        return None
    for i in range(len(divs)):
        teams = divs[i]['participants']

        for team in teams:
            team_name = team['name']
            if team_name == 'Tranmere Rovers':
                return divs[i]
            
def get_league_name(data):
    if 'error' in data.keys():
        return None
    else:
        return data['tournaments'][0]['disambiguatedName']

def get_cup_division(div):
    if div and 'name' in div.keys():
        return div['name']
    else:
        return None

def get_league_df(div):
    if div:
        df = pd.json_normalize(div['participants'])
        return df

def add_league_name(df, name):
    if not df.empty:
        df['league_name'] = name
        return df

def add_cup_division(df, cup_div):
    if not df.empty:
        df['cup_division'] = cup_div
        return df

def reduce_league_df(df):
    if not df.empty:
        return df[['league_name', 'cup_division', 'rank', 'name', 'matchesPlayed', 'wins', 'draws', 'losses', 'goalsScoredFor', 'goalsScoredAgainst', 'goalDifference', 'points']]

def rename_columns(df):
    if not df.empty:
        return df.rename(columns={
            'name': 'team_name',
            'matchesPlayed' : 'p',
            'wins': 'w',
            'draws': 'd',
            'losses': 'l',
            'goalsScoredFor': 'gf',
            'goalsScoredAgainst': 'ga',
            'goalDifference': 'gd'
        })

# game_date = '2024-10-08'

file = name_json_file(game_date)

data = request_json(file)

divs = get_divs(data)

div = find_tranmere_div(divs)

league_name = get_league_name(data)

cup_div = get_cup_division(div)

df = get_league_df(div)

df = add_league_name(df, league_name)

df = add_cup_division(df, cup_div)

df = reduce_league_df(df)

df = rename_columns(df)

df

Unnamed: 0,league_name,cup_division,rank,team_name,p,w,d,l,gf,ga,gd,points
0,League Two,,1,Walsall,19,11,5,3,34,19,15,38
1,League Two,,2,Port Vale,20,10,6,4,26,19,7,36
2,League Two,,3,Doncaster Rovers,20,9,7,4,28,22,6,34
3,League Two,,4,Crewe Alexandra,19,8,8,3,22,16,6,32
4,League Two,,5,Grimsby Town,20,10,2,8,27,28,-1,32
5,League Two,,6,AFC Wimbledon,19,9,4,6,30,16,14,31
6,League Two,,7,Milton Keynes Dons,18,9,4,5,30,19,11,31
7,League Two,,8,Chesterfield,20,7,8,5,33,23,10,29
8,League Two,,9,Notts County,19,7,7,5,25,21,4,28
9,League Two,,10,Gillingham,19,8,3,8,19,16,3,27


In [55]:
import pandas as pd
import json
from typing import Dict, List, Optional

def name_json_file(date: str) -> str:
    """ Return the path to the JSON file for the given date. """
    return f'./bbc-json/table/{date}.json'

def read_json_file(file: str) -> Dict:
    """ Load and return the JSON data from a file. """
    try:
        with open(file) as f:
            return json.load(f)
    except FileNotFoundError:
        raise Exception(f"File {file} not found.")
    except json.JSONDecodeError:
        raise Exception(f"Error decoding JSON from file {file}.")

def get_divs(data: Dict) -> List[Dict]:
    """Extract the division information from the JSON data."""
    try:
        return data['tournaments'][0]['stages'][0]['rounds']
    except (KeyError, IndexError):
        raise Exception("Unexpected JSON structure for divisions.")

def find_team_div(divs: List[Dict], team_name: str) -> Optional[Dict]:
    """Find and return the division containing the specified team."""
    for div in divs:
        teams = div.get('participants', [])
        if any(team.get('name') == team_name for team in teams):
            return div
    return None

def get_league_name(data: Dict) -> str:
    """Extract the league name"""
    return data['tournaments'][0]['name']

def get_cup_division(div: Dict) -> Optional[str]:
    """Extract the cup division name."""
    try:
        return div['name']
    except KeyError:
        return None

def get_league_df(div: Dict) -> pd.DataFrame:
    """Normalize the JSON participants data into a pandas DataFrame."""
    return pd.json_normalize(div['participants'])

def process_league_df(df: pd.DataFrame, league_name: str, cup_division: str) -> pd.DataFrame:
    """Add league name, reduce columns, and rename columns for the DataFrame."""
    df['league_name'] = league_name
    df['cup_div'] = cup_division
    df = df[['league_name', 'cup_div', 'rank', 'name', 'matchesPlayed', 'wins', 'draws', 'losses', 'goalsScoredFor', 'goalsScoredAgainst', 'goalDifference', 'points']]
    return df.rename(columns={
        'name': 'team_name',
        'matchesPlayed' : 'p',
        'wins': 'w',
        'draws': 'd',
        'losses': 'l',
        'goalsScoredFor': 'gf',
        'goalsScoredAgainst': 'ga',
        'goalDifference': 'gd'
    })

def process_league_table(game_date: str, team_name: str='Tranmere Rovers') -> pd.DataFrame:
    """Process league data for a specific team and date."""
    file = name_json_file(game_date)
    data = read_json_file(file)
    if not 'error' in data.keys():
        divs = get_divs(data)
        div = find_team_div(divs, team_name)

        if div:
            league_name = get_league_name(data)
            cup_div = get_cup_division(div)
            df = get_league_df(div)
            return process_league_df(df, league_name, cup_div)
        else:
            return pd.DataFrame()

In [56]:
import glob

def get_file_list(directory: str) -> List[str]:
    """Return a list of JSON files in the specified directory."""
    return glob.glob(f'./bbc-json/{directory}/*.json')

def get_date_from_filename(filename: str) -> str:
    """Extract the date from a JSON filename."""
    return filename.split('/')[-1].split('.')[0]

In [57]:
files = get_file_list('table')

all_tables = []
for file in sorted(files):
    date = get_date_from_filename(file)
    table = process_league_table(date)
    try:
        table['game_date'] = date
        all_tables.append(table)
    except:
        print(f"No table data for {date}")

all_tables_df = pd.concat(all_tables)

all_tables_df = all_tables_df.sort_values(by=['game_date', 'league_name', 'rank'])

all_tables_df.to_csv('./data/league_tables.csv', index=False)

No table data for 2024-11-02


In [58]:
def process_match_stats(data: Dict, game_date: str) -> pd.DataFrame:
    """
        Process match stats for all files in the specified directory. 
    """
    both_teams = []

    try:
        for team in ['homeTeam', 'awayTeam']:
            team_name = data[team]['name']['fullName']
            team_venue = data[team]['alignment']

            team_stats = pd.json_normalize(data[team]['stats'])
            team_stats.columns = team_stats.columns.str.replace('.total', '')

            default_cols = team_stats.columns.tolist()

            team_stats['game_date'] = game_date
            team_stats['team_name'] = team_name
            team_stats['team_venue'] = team_venue

            df_cols = ['game_date', 'team_name', 'team_venue'] + default_cols
            team_stats = team_stats[df_cols]

            both_teams.append(team_stats)

        both_teams_df = pd.concat(both_teams)

        return both_teams_df

    except Exception as e:
        print(f"Error processing {game_date}: {e}")

    
files = get_file_list('match_stats')
all_match_stats = []

for file in sorted(files):
    game_date = get_date_from_filename(file)
    data = read_json_file(file)
    match_stats = process_match_stats(data, game_date)
    all_match_stats.append(match_stats)
if all_match_stats:
    all_match_stats_df = pd.concat(all_match_stats).sort_values(by=['game_date', 'team_venue'], ascending=[True, False]).reset_index(drop=True)

# all_match_stats_df = process_match_stats()

all_match_stats_df.to_csv('./data/match_stats.csv', index=False)

Error processing 2020-03-10: 'stats'
Error processing 2021-09-14: 'stats'
Error processing 2022-03-26: 'stats'


In [59]:
def extract_player_id(long_id: str) -> str:
    """Extract the player ID from a long URN."""
    return long_id.split(':')[-1]

In [60]:
def process_lineups_df(data):
    """
        Process the lineups JSON data into a DataFrame.
    """
    both_teams = []

    for team in ['homeTeam', 'awayTeam']:
        try:
            team_name = data[team]['name']['fullName']
            team_venue = data[team]['alignment']
            formation = data[team]['formation']['value'].replace(' ', '')
            team_manager = data[team]['manager']['name']['full']
            players = data[team]['players']
            roles = ['starters', 'substitutes']

            for role in roles:
                player_list = players[role]

                for player in player_list:
                    surname = player['name']['last']
                    forename = player['name']['first']
                    short_name = player['name']['short']
                    full_name = f"{forename} {surname}"
                    shirt_no = player['shirtNumber']
                    position = player['position']

                    if 'formationPlace' in player.keys():
                        formation_place = player['formationPlace']
                    else:
                        formation_place = None

                    is_captain = player['isCaptain']
                    cards = player['cards']

                    yellow_card = 0
                    min_yc = None
                    red_card = 0
                    min_rc = None

                    if len(cards) > 0:
                        for card in cards:
                            if card['type'] == 'Yellow Card':
                                yellow_card = 1
                                min_yc = card['timeLabel']['value'].replace("'", '')
                            elif card['type'] == 'Red Card':
                                red_card = 1
                                min_rc = card['timeLabel']['value'].replace("'", '')

                    if 'substitutes' in player.keys():
                        substitutes = player['substitutes']
                    else:
                        substitutes = None

                    sub_off_period = None
                    sub_off_min = None
                    sub_off_reason = None
                    sub_replacement_id = None
                    sub_replacement_name = None

                    if 'substitutedOff' in player.keys():
                        sub_off = player['substitutedOff']
                        sub_off_period = sub_off['periodId']
                        sub_off_min = sub_off['timeMin']
                        sub_off_reason = sub_off['reason']
                        sub_replacement_id = sub_off['playerOnUrn']
                        sub_replacement_id = extract_player_id(sub_replacement_id)
                        sub_replacement_name = sub_off['playerOnName']

                    sub_on_period =  None
                    sub_on_min = None
                    sub_on_reason = None
                    sub_replaced_id = None
                    sub_replaced_name = None

                    if 'substitutedOn' in player.keys():
                        sub_on = player['substitutedOn']
                        sub_on_period = sub_on['periodId']
                        sub_off_min = sub_on['timeMin']
                        sub_off_reason = sub_on['reason']
                        sub_replaced_id = sub_on['playerOffUrn']
                        sub_replaced_id = extract_player_id(sub_replaced_id)
                        sub_replaced_name = sub_on['playerOffName']

                    player_data = {
                        'game_date': game_date,
                        'team_name': team_name,
                        'team_venue': team_venue,
                        'formation': formation,
                        'team_manager': team_manager,
                        'surname': surname,
                        'forename': forename,
                        'player_name': full_name,
                        'short_name': short_name,
                        'shirt_no': shirt_no,
                        'position': position,
                        'formation_place': formation_place,
                        'is_captain': is_captain,
                        'yellow_card': yellow_card,
                        'min_yc': min_yc,
                        'red_card': red_card,
                        'min_rc': min_rc,
                        'sub_off_period': sub_off_period,
                        'sub_off_min': sub_off_min,
                        'sub_off_reason': sub_off_reason,
                        'sub_replacement_id': sub_replacement_id,
                        'sub_replacement_name': sub_replacement_name,
                        'sub_on_period': sub_on_period,
                        'sub_on_min': sub_on_min,
                        'sub_on_reason': sub_on_reason,
                        'sub_replaced_id': sub_replaced_id,
                        'sub_replaced_name': sub_replaced_name
                    }

                    both_teams.append(player_data)
            
            return pd.DataFrame(both_teams).sort_values(by=['game_date', 'team_venue', 'shirt_no'], ascending = [True, False, True]).reset_index(drop=True)
                    
        except Exception as e:
            print(f"Error processing {team} on {game_date}: {e}")

files = get_file_list('lineups')

all_lineups = []

for file in sorted(files):
    game_date = get_date_from_filename(file)

    lineups = read_json_file(file)

    both_teams = []

    for team in ['homeTeam', 'awayTeam']:

        try:

            team_name = lineups[team]['name']['fullName']

            team_venue = lineups[team]['alignment']

            formation = lineups[team]['formation']['value'].replace(' ', '')

            team_manager = lineups[team]['manager']['name']['full']

            players = lineups[team]['players']

            roles = ['starters', 'substitutes']

            for role in roles:

                player_list = players[role]

                for player in player_list:

                    surname = player['name']['last']
                    
                    forename = player['name']['first']

                    short_name = player['name']['short']

                    full_name = f"{forename} {surname}"

                    shirt_no = player['shirtNumber']

                    position = player['position']

                    if 'formationPlace' in player.keys():
                        formation_place = player['formationPlace']
                    else:
                        formation_place = None

                    is_captain = player['isCaptain']

                    cards = player['cards']

                    yellow_card = 0
                    min_yc = None

                    red_card = 0
                    min_rc = None
                    
                    if len(cards) > 0:
                        for card in cards:
                            if card['type'] == 'Yellow Card':
                                yellow_card = 1
                                min_yc = card['timeLabel']['value'].replace("'", '')
                            elif card['type'] == 'Red Card':
                                red_card = 1
                                min_rc = card['timeLabel']['value'].replace("'", '')
                            

                    if 'substitutes' in player.keys():
                        substitutes = player['substitutes']
                    else:
                        substitutes = None

                    sub_off_period = None
                    sub_off_min = None
                    sub_off_reason = None
                    sub_replacement_id = None
                    sub_replacement_name = None

                    if 'substitutedOff' in player.keys():
                        sub_off = player['substitutedOff']
                        sub_off_period = sub_off['periodId']
                        sub_off_min = sub_off['timeMin']
                        sub_off_reason = sub_off['reason']
                        sub_replacement_id = sub_off['playerOnUrn']
                        sub_replacement_id = extract_player_id(sub_replacement_id)
                        sub_replacement_name = sub_off['playerOnName']

                    sub_on_period = None
                    sub_on_min = None
                    sub_on_reason = None
                    sub_replaced_id = None
                    sub_replaced_name = None

                    if 'substitutedOn' in player.keys():
                        sub_on = player['substitutedOn']
                        sub_on_period = sub_on['periodId']
                        sub_off_min = sub_on['timeMin']
                        sub_off_reason = sub_on['reason']
                        sub_replaced_id = sub_on['playerOffUrn']
                        sub_replaced_id = extract_player_id(sub_replaced_id)
                        sub_replaced_name = sub_on['playerOffName']


                    player_data = {
                        'game_date': game_date,
                        'team_name': team_name,
                        'team_venue': team_venue,
                        'formation': formation,
                        'team_manager': team_manager,
                        'surname': surname,
                        'forename': forename,
                        'short_name': short_name,
                        'shirt_no': shirt_no,
                        'position': position,
                        'formation_place': formation_place,
                        'is_captain': is_captain,
                        'yellow_card': yellow_card,
                        'min_yc': min_yc,
                        'red_card': red_card,
                        'min_rc': min_rc,
                        'sub_off_period': sub_off_period,
                        'sub_off_min': sub_off_min,
                        'sub_off_reason': sub_off_reason,
                        'sub_replacement_id': sub_replacement_id,
                        'sub_replacement_name': sub_replacement_name,
                        'sub_on_period': sub_on_period,
                        'sub_on_min': sub_on_min,
                        'sub_on_reason': sub_on_reason,
                        'sub_replaced_id': sub_replaced_id,
                        'sub_replaced_name': sub_replaced_name
                    }

                    both_teams.append(player_data)

        except Exception as e:
            print(f"Error processing {team} on {game_date}: {e}")

    both_teams_df = pd.DataFrame(both_teams)

    all_lineups.append(both_teams_df)

all_lineups_df = pd.concat(all_lineups).sort_values(by=['game_date', 'team_venue', 'shirt_no'], ascending = [True, False, True]).reset_index(drop=True)

all_lineups_df.to_csv('./data/lineups.csv', index=False)

In [61]:
def process_officials(data: Dict) -> pd.DataFrame:
    if 'officials' in data.keys() and len(data['officials']) > 0:
        match_officials = []
        for official in data['officials']:
            
            if 'shortFirstName' in official.keys():
                forename = official['shortFirstName']
            else:
                forename = official['firstName']

            if 'shortLastName' in official.keys():
                surname = official['shortLastName']
            else:
                surname = official['lastName']

            name = f"{forename} {surname}"

            role = official['type']

            official_data = {
                'game_date': game_date,
                'surname': surname,
                'forename': forename,
                'name': name,
                'role': role
            }

            match_officials.append(official_data)
        return pd.DataFrame(match_officials)
    else:
        return pd.DataFrame()

files = get_file_list('lineups')

all_officials = []

for file in sorted(files):

    game_date = get_date_from_filename(file)

    officials = read_json_file(file)

    match_officials = process_officials(officials)

    all_officials.append(match_officials)

all_officials_df = pd.concat(all_officials)

all_officials_df.to_csv('./data/officials.csv', index=False)

In [62]:
files = get_file_list('match_info')

all_match_info = []

all_score_data = []

all_goals = []

all_assists = []

for file in sorted(files):

        data = read_json_file(file)

        game_date = get_date_from_filename(file)

        if 'attendance' in data['sportDataEvent'].keys():
            try:
                attendance = data['sportDataEvent']['attendance']['value']
            except:
                if game_date == '2024-08-20':
                    attendance = '671'
                elif game_date == '2024-09-27':
                    attendance = '7281'
                elif game_date == '2024-10-01':
                    attendance = '5120'
                elif game_date == '2024-10-08':
                    attendance = '919'
                else:
                    attendance = None
                    print(f"No attendance for {game_date}")
        else:
            attendance = None

        game_date = data['startDateTime'].split('T')[0]

        competition = data['sportDataEvent']['tournament']['name']

        competition_long = data['sportDataEvent']['tournament']['disambiguatedName']

        comp_stage = data['sportDataEvent']['stage']['name']

        if 'round' in data['sportDataEvent'].keys():
            comp_round = data['sportDataEvent']['round']['name']
        else:
            comp_round = None

        if 'stage' in data['sportDataEvent'].keys():
            comp_stage = data['sportDataEvent']['stage']['name']
        else:
            comp_stage = None

        ko_time = data['sportDataEvent']['time']['displayTimeUK']

        venue = data['sportDataEvent']['venue']['name']

        match_data = {
            'game_date': game_date,
            'competition': competition,
            'competition_long': competition_long,
            'comp_stage': comp_stage,
            'comp_round': comp_round,
            'ko_time': ko_time,
            'venue': venue,
            'attendance': attendance
        }

        all_match_info.append(match_data)

        ## ASSISTS
        
        if 'groupedActions' in data['sportDataEvent'].keys():
            grouped_actions = data['sportDataEvent']['groupedActions']

            for action in grouped_actions:
                group_name = action['groupName']['fullName']

                if group_name == 'Assists':

                    team_actions = {
                        'homeTeamActions': data['sportDataEvent']['home']['fullName'],
                        'awayTeamActions': data['sportDataEvent']['away']['fullName']
                    }

                    for team in team_actions:
                        if team in action.keys():
                            team_assists = action[team]

                            for assist in team_assists:
                                assist_info = assist.split(' (')
                                assist_player = assist_info[0]
                                assist_min = assist_info[1].replace(')', '').replace("'", '').split(',')
                                for min in assist_min:
                                    assist_min = min.strip()
                                    assist_min_inj = None
                                    if '+' in min:
                                        inj_assist = min.split('+')
                                        assist_min = inj_assist[0]
                                        assist_min_inj = inj_assist[1]
                                    assist_data = {
                                        'game_date': game_date,
                                        'team_name': team_actions[team],
                                        'assist_player': assist_player,
                                        'assist_min': assist_min,
                                        'assist_min_inj': assist_min_inj
                                    }
                                    all_assists.append(assist_data)

        ## SCORES
        
        teams = ['home', 'away']
        
        for team in teams:
            team_info = data['sportDataEvent'][team]

            team_venue = team

            team_name = team_info['fullName']

            ht_score = team_info['runningScores']['halftime']

            ft_score = team_info['runningScores']['fulltime']

            if 'extratime' in team_info['runningScores'].keys():
                et_score = team_info['runningScores']['extratime']
            else:
                et_score = None
            
            if 'penaltyShootoutScore' in team_info['runningScores'].keys():
                pens_score = team_info['runningScores']['penaltyShootoutScore']
            else:
                pens_score = None

            scores_data = {
                'game_date': game_date,
                'team_name': team_name,
                'team_venue': team_venue,
                'ht_score': ht_score,
                'ft_score': ft_score,
                'et_score': et_score,
                'pens_score': pens_score
            }

            all_score_data.append(scores_data)

            ## GOALS
            
            if 'actions' in team_info.keys():
                actions = team_info['actions']

                for action in actions:

                    if action['actionType'] == 'goal':
                        
                        player_name = action['playerName']

                        bbc_player_id = action['playerUrn']
                        player_id = extract_player_id(bbc_player_id)

                        goals = action['actions']

                        for goal in goals:

                            goal_min = goal['timeLabel']['value'].replace("'", '').replace(" ET", '')

                            goal_min_inj = None
                            if '+' in goal_min:
                                inj_goal = goal_min.split('+')
                                goal_min = inj_goal[0]
                                goal_min_inj = inj_goal[1]

                            goal_type = goal['type']

                            goal_data = {
                                'game_date': game_date,
                                'team_name': team_name,
                                'player_name': player_name,
                                'bbc_player_id': player_id,
                                'goal_min': goal_min,
                                'goal_min_inj': goal_min_inj,
                                'goal_type': goal_type
                            }
                            all_goals.append(goal_data)

No attendance for 2024-10-12
No attendance for 2024-10-19
No attendance for 2024-10-22
No attendance for 2024-11-02
No attendance for 2024-11-09
No attendance for 2024-11-22
No attendance for 2024-11-26


In [63]:
all_matches_df = pd.DataFrame(all_match_info).sort_values(by='game_date').reset_index(drop=True)

all_matches_df.to_csv('./data/match_info.csv', index=False)

In [64]:
all_scores_df = pd.DataFrame(all_score_data)

all_scores_df.to_csv('./data/scores.csv', index=False)

In [65]:
goals_df = pd.DataFrame(all_goals)

goals_df.to_csv('./data/goals.csv', index=False)

In [66]:
assists_df = pd.DataFrame(all_assists)

assists_df.to_csv('./data/assists.csv', index=False)

In [67]:
files = get_file_list('sameday_fixtures')

all_sameday_fixtures = []

all_sameday_scorers = []

for file in sorted(files):
        fixtures = read_json_file(file)

        game_date = get_date_from_filename(file)

        games = fixtures['events']

        for game in games:

                if game['status'] not in ['Cancelled', 'Postponed']:

                        teams = ['home', 'away']

                        home_team = game['home']['fullName']

                        away_team = game['away']['fullName']

                
                        for team in teams:
                                team_info = game[team]

                                team_name = team_info['fullName']

                                ht_score = team_info['runningScores']['halftime']

                                ft_score = team_info['runningScores']['fulltime']

                                if 'penaltyShootoutScore' in team_info['runningScores'].keys():
                                        pen_score = team_info['runningScores']['penaltyShootoutScore']
                                else:
                                        pen_score = None

                                scores_data = {
                                        'game_date': game_date,
                                        'home_team': home_team,
                                        'away_team': away_team,
                                        'team_name': team_name,
                                        'ht_score': ht_score,
                                        'ft_score': ft_score,
                                        'pen_score': pen_score
                                }

                                all_sameday_fixtures.append(scores_data)

                                ## GOALS
                                
                                if 'actions' in team_info.keys():
                                        actions = team_info['actions']

                                        for action in actions:

                                                if action['actionType'] == 'goal':
                                                
                                                        player_name = action['playerName']

                                                        bbc_player_id = action['playerUrn']
                                                        player_id = extract_player_id(bbc_player_id)

                                                        goals = action['actions']

                                                        for goal in goals:

                                                                goal_min = goal['timeLabel']['value'].replace("'", '')

                                                                goal_min_inj = None
                                                                if '+' in goal_min:
                                                                        inj_goal = goal_min.split('+')
                                                                        goal_min = inj_goal[0]
                                                                        goal_min_inj = inj_goal[1]

                                                                goal_type = goal['type']

                                                                goal_data = {
                                                                        'game_date': game_date,
                                                                        'team_name': team_name,
                                                                        'player_name': player_name,
                                                                        'bbc_player_id': player_id,
                                                                        'goal_min': goal_min,
                                                                        'goal_min_inj': goal_min_inj,
                                                                        'goal_type': goal_type
                                                                }
                                                                all_sameday_scorers.append(goal_data)

KeyError: 'halftime'

In [33]:
all_sameday_fixtures_df = pd.DataFrame(all_sameday_fixtures).sort_values(by=['game_date', 'home_team']).reset_index(drop=True)

all_sameday_fixtures_df.to_csv('./data/sameday_fixtures.csv', index=False)

In [34]:
all_sameday_scorers_df = pd.DataFrame(all_sameday_scorers).sort_values(by=['game_date', 'team_name', 'goal_min']).reset_index(drop=True)

all_sameday_scorers_df.to_csv('./data/sameday_scorers.csv', index=False)

In [35]:
files = get_file_list('commentary')

all_commentary = []

for file in sorted(files):

    game_date = get_date_from_filename(file)

    pages = read_json_file(file)

    for page in pages:
        comms = page['results']

        for comm in comms:

            comm_min = comm['dates']['time'].replace("'", '')

            if '+' in comm_min:
                inj_comm = comm_min.split('+')
                comm_min = inj_comm[0]
                comm_min_inj = inj_comm[1]
            else:
                comm_min_inj = None
            
            comm_text = comm['content']['model']['blocks'][0]['model']['blocks'][0]['model']['text']

            headline = comm['headline']

            if headline:
                headline = headline['model']['blocks'][0]['model']['text']

            comm_data = {
                'game_date': game_date,
                'comm_min': comm_min,
                'comm_min_inj': comm_min_inj,
                'comm_text': comm_text,
                'headline': headline
            }

            all_commentary.append(comm_data)

all_commentary_df = pd.DataFrame(all_commentary).sort_values(by=['game_date', 'comm_min']).reset_index(drop=True)

all_commentary_df.to_csv('./data/commentary.csv', index=False)