# API data for premier league current season

## Set environment

In [2]:
import os
from pathlib import Path
import numpy as np
import pandas as pd

pd.set_option("display.max_columns", 500)
from datetime import datetime

from dotenv import load_dotenv

import xlsxwriter

import requests
import json

# current date
date = datetime.now().strftime("%Y%m%d")

load_dotenv('../../.env')

True

## API call

### Key columns to keep

In [3]:
# keep columns
keep_cols = [
    'id',
    'utcDate',
    'status',
    'matchday',
    'homeTeam.shortName',
    'awayTeam.shortName',
    'score.winner',
    'score.fullTime.home',
    'score.fullTime.away',
    'score'
]

In [4]:
# Rename club names to simplify
club_names = {
    'Brighton Hove': 'Brighton',
    'Leeds United': 'Leeds',
    'Leicester City': 'Leicester',
    'Wolverhampton': 'Wolves'
}


### API data call

Add scoreline to dataframe

In [5]:
api_key = os.environ['API_AUTH_KEY']
uri = 'http://api.football-data.org/v4/competitions/PL/matches?status=FINISHED'
headers = { 'X-Auth-Token':  api_key}

r = requests.get(uri, headers=headers)
data = r.json()


df = pd.json_normalize(data, record_path='matches')

df['utcDate'] = pd.to_datetime(df['utcDate'])
# sort by date
df = df.sort_values(by='utcDate', ascending=True).reset_index(drop=True)
# rename team names using club_names dict
df['homeTeam.shortName'] = df['homeTeam.shortName'].replace(club_names)
df['awayTeam.shortName'] = df['awayTeam.shortName'].replace(club_names)

# add score column 
df['score'] = df['score.fullTime.home'].astype(str) + '-' + df['score.fullTime.away'].astype(str)

# keep only columns in keep_cols
df = df[keep_cols]

df.head()


Unnamed: 0,id,utcDate,status,matchday,homeTeam.shortName,awayTeam.shortName,score.winner,score.fullTime.home,score.fullTime.away,score
0,435943,2023-08-11 19:00:00+00:00,FINISHED,1,Burnley,Man City,AWAY_TEAM,0,3,0-3
1,435944,2023-08-12 12:00:00+00:00,FINISHED,1,Arsenal,Nottingham,HOME_TEAM,2,1,2-1
2,435945,2023-08-12 14:00:00+00:00,FINISHED,1,Bournemouth,West Ham,DRAW,1,1,1-1
3,435946,2023-08-12 14:00:00+00:00,FINISHED,1,Brighton,Luton Town,HOME_TEAM,4,1,4-1
4,435947,2023-08-12 14:00:00+00:00,FINISHED,1,Everton,Fulham,AWAY_TEAM,0,1,0-1


In [6]:
# create function to get home_result and away_result
def get_result_home(row):
    if row['score.winner'] == 'HOME_TEAM':
        return 'W'
    elif row['score.winner'] == 'AWAY_TEAM':
        return 'L'
    else:
        return 'D'

def get_result_away(row):
    if row['score.winner'] == 'HOME_TEAM':
        return 'L'
    elif row['score.winner'] == 'AWAY_TEAM':
        return 'W'
    else:
        return 'D'

# apply function to get home_result and away_result
df['home_result'] = df.apply(get_result_home, axis=1)
df['away_result'] = df.apply(get_result_away, axis=1)

df.head()

Unnamed: 0,id,utcDate,status,matchday,homeTeam.shortName,awayTeam.shortName,score.winner,score.fullTime.home,score.fullTime.away,score,home_result,away_result
0,435943,2023-08-11 19:00:00+00:00,FINISHED,1,Burnley,Man City,AWAY_TEAM,0,3,0-3,L,W
1,435944,2023-08-12 12:00:00+00:00,FINISHED,1,Arsenal,Nottingham,HOME_TEAM,2,1,2-1,W,L
2,435945,2023-08-12 14:00:00+00:00,FINISHED,1,Bournemouth,West Ham,DRAW,1,1,1-1,D,D
3,435946,2023-08-12 14:00:00+00:00,FINISHED,1,Brighton,Luton Town,HOME_TEAM,4,1,4-1,W,L
4,435947,2023-08-12 14:00:00+00:00,FINISHED,1,Everton,Fulham,AWAY_TEAM,0,1,0-1,L,W


In [9]:
# create league table function
def create_league_table(df, homeTeam, awayTeam, home_result, away_result, matchday, team):
    # create a new dataframe for points
    df_points = pd.DataFrame({'team': df[homeTeam].unique()})
    #filer by matchday
    df = df[df['matchday'] <= matchday]

    # calculate points, goals for and against, and goal difference
    df_points['played'] = 0
    df_points['won'] = 0
    df_points['drawn'] = 0
    df_points['lost'] = 0
    df_points['goals_for'] = 0
    df_points['goals_against'] = 0
    df_points['goal_difference'] = 0
    df_points['points'] = 0
    df_points['form'] = ''


    for index, row in df.iterrows():
        if row[home_result] == 'W':
            df_points.loc[df_points['team'] == row[homeTeam], 'played'] += 1
            df_points.loc[df_points['team'] == row[homeTeam], 'won'] += 1
            df_points.loc[df_points['team'] == row[homeTeam], 'points'] += 3
            df_points.loc[df_points['team'] == row[homeTeam], 'goals_for'] += row['score.fullTime.home']
            df_points.loc[df_points['team'] == row[homeTeam], 'goals_against'] += row['score.fullTime.away']
            df_points.loc[df_points['team'] == row[homeTeam], 'form'] += 'W'
        
        elif row[home_result] == 'D':
            df_points.loc[df_points['team'] == row[homeTeam], 'played'] += 1
            df_points.loc[df_points['team'] == row[homeTeam], 'drawn'] += 1
            df_points.loc[df_points['team'] == row[homeTeam], 'points'] += 1
            df_points.loc[df_points['team'] == row[homeTeam], 'goals_for'] += row['score.fullTime.home']
            df_points.loc[df_points['team'] == row[homeTeam], 'goals_against'] += row['score.fullTime.away']
            df_points.loc[df_points['team'] == row[homeTeam], 'form'] += 'D'
        elif row[home_result] == 'L':
            df_points.loc[df_points['team'] == row[homeTeam], 'played'] += 1
            df_points.loc[df_points['team'] == row[homeTeam], 'lost'] += 1
            df_points.loc[df_points['team'] == row[homeTeam], 'goals_for'] += row['score.fullTime.home']
            df_points.loc[df_points['team'] == row[homeTeam], 'goals_against'] += row['score.fullTime.away']
            df_points.loc[df_points['team'] == row[homeTeam], 'form'] += 'L'
        if row[away_result] == 'W':
            df_points.loc[df_points['team'] == row[awayTeam], 'played'] += 1
            df_points.loc[df_points['team'] == row[awayTeam], 'won'] += 1
            df_points.loc[df_points['team'] == row[awayTeam], 'points'] += 3
            df_points.loc[df_points['team'] == row[awayTeam], 'goals_for'] += row['score.fullTime.away']
            df_points.loc[df_points['team'] == row[awayTeam], 'goals_against'] += row['score.fullTime.home']
            df_points.loc[df_points['team'] == row[awayTeam], 'form'] += 'W'
        elif row[away_result] == 'D':
            df_points.loc[df_points['team'] == row[awayTeam], 'played'] += 1
            df_points.loc[df_points['team'] == row[awayTeam], 'drawn'] += 1
            df_points.loc[df_points['team'] == row[awayTeam], 'points'] += 1
            df_points.loc[df_points['team'] == row[awayTeam], 'goals_for'] += row['score.fullTime.away']
            df_points.loc[df_points['team'] == row[awayTeam], 'goals_against'] += row['score.fullTime.home']
            df_points.loc[df_points['team'] == row[awayTeam], 'form'] += 'D'
        elif row[away_result] == 'L':
            df_points.loc[df_points['team'] == row[awayTeam], 'played'] += 1
            df_points.loc[df_points['team'] == row[awayTeam], 'lost'] += 1
            df_points.loc[df_points['team'] == row[awayTeam], 'goals_for'] += row['score.fullTime.away']
            df_points.loc[df_points['team'] == row[awayTeam], 'goals_against'] += row['score.fullTime.home']
            df_points.loc[df_points['team'] == row[awayTeam], 'form'] += 'L'
    
    # calculate goal difference 
    df_points['goal_difference'] = df_points['goals_for'] - df_points['goals_against']

    # limit form the last five results to 5 characters
    df_points['form'] = df_points['form'].str[-5:]
    # reverse the form string
    df_points['form'] = df_points['form'].str[::-1]

    # win %  format to 0 decimal places
    df_points['win_%'] = (df_points['won'] / df_points['played'] * 100).round(1)
    
    # goals scored per game
    df_points['goals_score_pg'] = (df_points['goals_for'] / df_points['played']).round(2)

    # goals conceded per game  
    df_points['goals_conceded_pg'] = (df_points['goals_against'] / df_points['played']).round(2)

    # goals difference per game
    df_points['goals_diff_pg'] = (df_points['goal_difference'] / df_points['played']).round(2)

    # clean sheets where goals conceded = 0
    df_points['clean_sheets'] = 0
    
    for index, row in df.iterrows():
        if row['score.fullTime.away'] == 0:
            df_points.loc[df_points['team'] == row[homeTeam], 'clean_sheets'] += 1
        if row['score.fullTime.home'] == 0:
            df_points.loc[df_points['team'] == row[awayTeam], 'clean_sheets'] += 1

    # points per game
    df_points['points_pg'] = (df_points['points'] / df_points['played']).round(2)

    # score vs focus team when vs Necastle home and away
    df_points['team_home_score'] = ''
    for index, row in df.iterrows():
        if row['homeTeam.shortName'] == team:
            df_points.loc[df_points['team'] == row[awayTeam], 'team_home_score'] += row['score']
    
    # score vs focus team when away, return score in reverse order
    df_points['team_away_score'] = ''
    for index, row in df.iterrows():
        if row['awayTeam.shortName'] == team:
            df_points.loc[df_points['team'] == row[homeTeam], 'team_away_score'] += row['score']
    # reverse the string
    df_points['team_away_score'] = df_points['team_away_score'].str[::-1]
    
    # points taken by team
    df_points['focus_team_points'] = 0
    for index, row in df.iterrows():
        if row['homeTeam.shortName'] == team:
            if row[home_result] == 'W':
                df_points.loc[df_points['team'] == row[awayTeam], 'focus_team_points'] += 3
            elif row[home_result] == 'D':
                df_points.loc[df_points['team'] == row[awayTeam], 'focus_team_points'] += 1
        if row['awayTeam.shortName'] == team:
            if row[away_result] == 'W':
                df_points.loc[df_points['team'] == row[homeTeam], 'focus_team_points'] += 3
            elif row[away_result] == 'D':
                df_points.loc[df_points['team'] == row[homeTeam], 'focus_team_points'] += 1

    # remaining points to play for
    df_points['remaining_points'] = 6
    for index, row in df.iterrows():
        if row['homeTeam.shortName'] == team:
            df_points.loc[df_points['team'] == row[awayTeam], 'remaining_points'] -= 3
        if row['awayTeam.shortName'] == team:
            df_points.loc[df_points['team'] == row[homeTeam], 'remaining_points'] -= 3
    # % of points taken
    df_points['%_taken'] = (df_points['focus_team_points'] / (6 - df_points['remaining_points']) * 100).round()
    # replace nan with ''
    df_points['%_taken'] = df_points['%_taken'].fillna('')

    
    # sort, reindex add a position column
    df_points = df_points.sort_values(by=['points', 'goal_difference', 'goals_for'], ascending=False)
    df_points = df_points.reset_index(drop=True)
    df_points['position'] = df_points.index + 1
    
    return df_points

# apply the function to the results dataframe for latest matchday

# max matchday in df
max_matchday = df['matchday'].max()

df_league_table = create_league_table(df, 'homeTeam.shortName', 'awayTeam.shortName', 'home_result', 'away_result' , max_matchday, 'Arsenal')
df_league_table.head(20)

Unnamed: 0,team,played,won,drawn,lost,goals_for,goals_against,goal_difference,points,form,win_%,goals_score_pg,goals_conceded_pg,goals_diff_pg,clean_sheets,points_pg,team_home_score,team_away_score,focus_team_points,remaining_points,%_taken,position
0,Arsenal,14,10,3,1,29,11,18,33,WWWLW,71.4,2.07,0.79,1.29,6,2.36,,,0,6,,1
1,Liverpool,14,9,4,1,32,14,18,31,WDWDW,64.3,2.29,1.0,1.29,4,2.21,,,0,6,,2
2,Man City,14,9,3,2,36,16,20,30,DDDWW,64.3,2.57,1.14,1.43,4,2.14,1-0,,3,3,100.0,3
3,Aston Villa,14,9,2,3,33,20,13,29,DWWLW,64.3,2.36,1.43,0.93,2,2.07,,,0,6,,4
4,Tottenham,14,8,3,3,28,20,8,27,DLLLW,57.1,2.0,1.43,0.57,4,1.93,2-2,,1,3,33.0,5
5,Newcastle,14,8,2,4,32,14,18,26,WWLWD,57.1,2.29,1.0,1.29,6,1.86,,0-1,0,3,0.0,6
6,Man United,14,8,0,6,16,17,-1,24,LWWWL,57.1,1.14,1.21,-0.07,5,1.71,3-1,,3,3,100.0,7
7,Brighton,14,6,4,4,30,26,4,22,LWDDD,42.9,2.14,1.86,0.29,0,1.57,,,0,6,,8
8,West Ham,14,6,3,5,24,24,0,21,DWWLL,42.9,1.71,1.71,0.0,1,1.5,,,0,6,,9
9,Chelsea,14,5,4,5,25,22,3,19,WLDWL,35.7,1.79,1.57,0.21,3,1.36,,2-2,1,3,33.0,10


In [11]:
# max matchday in df
max_matchday = 3

df_league_table = create_league_table(df, 'homeTeam.shortName', 'awayTeam.shortName', 'home_result', 'away_result' , max_matchday, 'Newcastle')
df_league_table.head(20)


Unnamed: 0,team,played,won,drawn,lost,goals_for,goals_against,goal_difference,points,form,win_%,goals_score_pg,goals_conceded_pg,goals_diff_pg,clean_sheets,points_pg,team_home_score,team_away_score,focus_team_points,remaining_points,%_taken,position
0,Man City,3,3,0,0,6,1,5,9,WWW,100.0,2.0,0.33,1.67,2,3.0,,0-1,0,3,0.0,1
1,West Ham,3,2,1,0,7,3,4,7,WWD,66.7,2.33,1.0,1.33,0,2.33,,,0,6,,2
2,Tottenham,3,2,1,0,6,2,4,7,WWD,66.7,2.0,0.67,1.33,2,2.33,,,0,6,,3
3,Liverpool,3,2,1,0,6,3,3,7,WWD,66.7,2.0,1.0,1.0,0,2.33,1-2,,0,3,0.0,4
4,Arsenal,3,2,1,0,5,3,2,7,DWW,66.7,1.67,1.0,0.67,1,2.33,,,0,6,,5
5,Brighton,3,2,0,1,9,5,4,6,LWW,66.7,3.0,1.67,1.33,0,2.0,,,0,6,,6
6,Aston Villa,3,2,0,1,8,6,2,6,WWL,66.7,2.67,2.0,0.67,1,2.0,5-1,,3,3,100.0,7
7,Man United,3,2,0,1,4,4,0,6,WLW,66.7,1.33,1.33,0.0,1,2.0,,,0,6,,8
8,Brentford,3,1,2,0,6,3,3,5,DWD,33.3,2.0,1.0,1.0,1,1.67,,,0,6,,9
9,Chelsea,3,1,1,1,5,4,1,4,WLD,33.3,1.67,1.33,0.33,1,1.33,,,0,6,,10
