In [4]:
import requests
import json
import pandas as pd
import pprint
from tqdm import tqdm
import time

In [5]:
def get_keys(path):
    with open(path) as f:
        return json.load(f)

In [6]:
keys = get_keys("../temp/.secret/football-data.json")
headers = { 
    'X-Auth-Token': keys["X-Auth-Token"] 
}

In [82]:
uri = "https://api.football-data.org/v4/competitions/BL1//matches?matchday=2"
uri

'https://api.football-data.org/v4/competitions/BL1//matches?matchday=2'

In [88]:
uri = "https://api.football-data.org/v4/competitions/PD//matches?matchday=2"
response = requests.get(uri, headers=headers)
for i, match in enumerate(response.json()['matches']):
    print(match)

{'area': {'id': 2224, 'name': 'Spain', 'code': 'ESP', 'flag': 'https://crests.football-data.org/760.svg'}, 'competition': {'id': 2014, 'name': 'Primera Division', 'code': 'PD', 'type': 'LEAGUE', 'emblem': 'https://crests.football-data.org/PD.png'}, 'season': {'id': 1577, 'startDate': '2023-08-13', 'endDate': '2024-05-26', 'currentMatchday': 29, 'winner': None}, 'id': 438491, 'utcDate': '2023-08-18T17:30:00Z', 'status': 'FINISHED', 'matchday': 2, 'stage': 'REGULAR_SEASON', 'group': None, 'lastUpdated': '2023-10-09T15:20:25Z', 'homeTeam': {'id': 89, 'name': 'RCD Mallorca', 'shortName': 'Mallorca', 'tla': 'MAL', 'crest': 'https://crests.football-data.org/89.png'}, 'awayTeam': {'id': 94, 'name': 'Villarreal CF', 'shortName': 'Villarreal', 'tla': 'VIL', 'crest': 'https://crests.football-data.org/94.png'}, 'score': {'winner': 'AWAY_TEAM', 'duration': 'REGULAR', 'fullTime': {'home': 0, 'away': 1}, 'halfTime': {'home': 0, 'away': 0}}, 'odds': {'msg': 'Activate Odds-Package in User-Panel to ret

In [7]:
base_url = "https://api.football-data.org/"
comp = 'v4/competitions/'
matchday = "//matches?matchday="

matchdays = {"PL": ["Premier League", 38], 
            "BL1": ["Bundesliga", 34],
            "FL1": ["Ligue 1", 38],
            "PD": ["La Liga", 38],
            "SA": ["Serie A", 38]}

match_list = {}
count = 1
for key, value in matchdays.items():
    print(f"Getting data for {value[0]}")
    match_list[value[0]] = []
    for i in range(value[1]):
        uri = base_url + comp + key + matchday + f"{i+1}"
        print(f"Getting data from {uri}")
        if count%10 == 1:
            time.sleep(60)
        try: 
            response = requests.get(uri, headers=headers)
            # print(response)
            # print()
            match_list[value[0]].append(response.json())
        except FileNotFoundError:
            print("matchday data not available")
        count += 1
    print()

Getting data for Premier League
Getting data from https://api.football-data.org/v4/competitions/PL//matches?matchday=1
Getting data from https://api.football-data.org/v4/competitions/PL//matches?matchday=2
Getting data from https://api.football-data.org/v4/competitions/PL//matches?matchday=3
Getting data from https://api.football-data.org/v4/competitions/PL//matches?matchday=4
Getting data from https://api.football-data.org/v4/competitions/PL//matches?matchday=5
Getting data from https://api.football-data.org/v4/competitions/PL//matches?matchday=6
Getting data from https://api.football-data.org/v4/competitions/PL//matches?matchday=7
Getting data from https://api.football-data.org/v4/competitions/PL//matches?matchday=8
Getting data from https://api.football-data.org/v4/competitions/PL//matches?matchday=9
Getting data from https://api.football-data.org/v4/competitions/PL//matches?matchday=10
Getting data from https://api.football-data.org/v4/competitions/PL//matches?matchday=11
Getting d

In [95]:
pprint.pprint(match_list['Premier League'][0])

{'competition': {'code': 'PL',
                 'emblem': 'https://crests.football-data.org/PL.png',
                 'id': 2021,
                 'name': 'Premier League',
                 'type': 'LEAGUE'},
 'filters': {'matchday': '1', 'season': '2023'},
 'matches': [{'area': {'code': 'ENG',
                       'flag': 'https://crests.football-data.org/770.svg',
                       'id': 2072,
                       'name': 'England'},
              'awayTeam': {'crest': 'https://crests.football-data.org/65.png',
                           'id': 65,
                           'name': 'Manchester City FC',
                           'shortName': 'Man City',
                           'tla': 'MCI'},
              'competition': {'code': 'PL',
                              'emblem': 'https://crests.football-data.org/PL.png',
                              'id': 2021,
                              'name': 'Premier League',
                              'type': 'LEAGUE'},
          

In [97]:
# with open("../temp/matches.json", "w") as f_out:
#   f_out.write(str(match_list))

In [8]:
def get_match_points(home_score: int, away_score: int) -> tuple[int, int]:

    # if isinstance(home_score, int):
    if home_score == away_score:
        home_team_points, away_team_points = 1, 1
    elif home_score > away_score:
        home_team_points = 3
        away_team_points = 0
    else:
        home_team_points = 0
        away_team_points = 3
    # else:
    #     home_team_points = None
    #     away_team_points = None
    
    return home_team_points, away_team_points 

In [9]:
def get_goal_difference(home_score: int, away_score: int) -> tuple[int, int]:
    # if isinstance(home_score, int):
    home_gd = home_score - away_score
    away_gd = -home_gd
    # else:
    #     home_gd = None
    #     away_gd = None

    return home_gd, -home_gd

In [10]:
leagues = {'Premier League': [38, 10], 
          'Bundesliga': [34, 9], 
          'Ligue 1': [34, 9], 
          'La Liga': [38, 10], 
          'Serie A': [38, 10]}

In [12]:
columns = ['league', 'season', 'league_id', 'season_start_date', 'season_end_date', 
           'matchday', 'match_id', 'match_date', 'home_team', 'away_team', 
           'home_score', 'away_score', 'home_team_points_earned', 'away_team_points_earned', 
           'home_team_gd', 'away_team_gd', 'match_referee']

df = pd.DataFrame(columns = columns)

for league, total_matchdays in leagues.items():

    print(f"Getting data for {league}...")
    # Get Season, League_id, first and last matchday dates only once as that is a constant
    firstday_data = match_list[league][0]
    SEASON = int(firstday_data['filters']['season'])
    LEAGUE_ID = firstday_data['competition']['id']
    FIRST_MATCHDAY = firstday_data['matches'][0]['season']['startDate']
    LAST_MATCHDAY = firstday_data['matches'][0]['season']['endDate']
    
    # Also need to consider status
    for mday in range(0, total_matchdays[0]):
        # print(f"\tGetting matchday {mday+1} data...")
        league_data = match_list[league][mday]

        # No data available for matchday 23 of Serie A :(
        if league == 'Serie A' and mday == 22:
            pass
        else:
            matchday = int(league_data['filters']['matchday'])
        
            for match_num in range(0, total_matchdays[1]):
                # print(f"\t\tGetting data for match {match_num + 1}")
                match_data = league_data['matches'][match_num]
                if match_data['status'] == 'FINISHED':
                    match_id = match_data['id']
                    matchdate = match_data['utcDate']
                    home_team = match_data['homeTeam']['shortName']
                    away_team = match_data['awayTeam']['shortName']
                    home_score = match_data['score']['fullTime']['home']
                    away_score = match_data['score']['fullTime']['away']
                          
                    try:
                        match_referee = match_data['referees'][0]['name']
                    except IndexError:
                        match_referee = ''
                    
                    home_team_points, away_team_points = get_match_points(home_score=home_score, away_score=away_score)
                    
                    # try:
                    home_team_gd, away_team_gd = get_goal_difference(home_score=home_score, away_score=away_score)
                    # except TypeError:
                    #     home_team_gd, away_team_gd = None, None
                        
                    df.loc[len(df.index)] = [league, SEASON, LEAGUE_ID, FIRST_MATCHDAY, LAST_MATCHDAY, 
                                             matchday, match_id, matchdate, home_team, away_team, 
                                             home_score, away_score, home_team_points, away_team_points, 
                                             home_team_gd, away_team_gd, match_referee]

print("Successfully completed adding finished matches data for top 5 leagues...")

Getting data for Premier League...
Getting data for Bundesliga...
Getting data for Ligue 1...
Getting data for La Liga...
Getting data for Serie A...
Successfully completed adding finished matches data for top 5 leagues...


In [13]:
df

Unnamed: 0,league,season,league_id,season_start_date,season_end_date,matchday,match_id,match_date,home_team,away_team,home_score,away_score,home_team_points_earned,away_team_points_earned,home_team_gd,away_team_gd,match_referee
0,Premier League,2023,2021,2023-08-11,2024-05-19,1,435943,2023-08-11T19:00:00Z,Burnley,Man City,0,3,0,3,-3,3,Craig Pawson
1,Premier League,2023,2021,2023-08-11,2024-05-19,1,435944,2023-08-12T12:00:00Z,Arsenal,Nottingham,2,1,3,0,1,-1,Michael Oliver
2,Premier League,2023,2021,2023-08-11,2024-05-19,1,435945,2023-08-12T14:00:00Z,Bournemouth,West Ham,1,1,1,1,0,0,Peter Bankes
3,Premier League,2023,2021,2023-08-11,2024-05-19,1,435946,2023-08-12T14:00:00Z,Brighton Hove,Luton Town,4,1,3,0,3,-3,David Coote
4,Premier League,2023,2021,2023-08-11,2024-05-19,1,435947,2023-08-12T14:00:00Z,Everton,Fulham,0,1,0,3,-1,1,Stuart Attwell
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1314,Serie A,2023,2019,2023-08-19,2024-05-26,29,444536,2024-03-16T19:45:00Z,Frosinone,Lazio,2,3,0,3,-1,1,Antonio Rapuano
1315,Serie A,2023,2019,2023-08-19,2024-05-26,29,444539,2024-03-17T11:30:00Z,Juventus,Genoa,0,0,1,1,0,0,Antonio Giua
1316,Serie A,2023,2019,2023-08-19,2024-05-26,29,444537,2024-03-17T14:00:00Z,Verona,Milan,1,3,0,3,-2,2,Maurizio Mariani
1317,Serie A,2023,2019,2023-08-19,2024-05-26,29,444541,2024-03-17T17:00:00Z,Roma,Sassuolo,1,0,3,0,1,-1,Gianluca Manganiello
