# API data for premier league current season

## Set environment

In [126]:
import os
from pathlib import Path
import numpy as np
import pandas as pd

pd.set_option("display.max_columns", 500)
from datetime import datetime

from dotenv import load_dotenv

import xlsxwriter

import requests
import json

# current date
date = datetime.now().strftime("%Y%m%d")

load_dotenv('../../.env')


True

## API call

### Key columns to keep

In [127]:
# keep columns
keep_cols = [
    'id',
    'utcDate',
    'status',
    'matchday',
    'homeTeam.shortName',
    'awayTeam.shortName',
    'score.winner',
    'score.fullTime.home',
    'score.fullTime.away',
]

In [128]:
# Rename club names to simplify
club_names = {
    'Brighton Hove': 'Brighton',
    'Leeds United': 'Leeds',
    'Leicester City': 'Leicester',
    'Wolverhampton': 'Wolves'
}


### API data call

Add scoreline to dataframe

In [129]:
api_key = os.environ['API_AUTH_KEY']
uri = 'http://api.football-data.org/v4/competitions/PL/matches?status=FINISHED'
headers = { 'X-Auth-Token':  api_key}

r = requests.get(uri, headers=headers)
data = r.json()


df = pd.json_normalize(data, record_path='matches')

df['utcDate'] = pd.to_datetime(df['utcDate'])
# sort by date
df = df.sort_values(by='utcDate', ascending=True).reset_index(drop=True)
# rename team names using club_names dict
df['homeTeam.shortName'] = df['homeTeam.shortName'].replace(club_names)
df['awayTeam.shortName'] = df['awayTeam.shortName'].replace(club_names)
# keep only columns in keep_cols
df = df[keep_cols]

df.head()


Unnamed: 0,id,utcDate,status,matchday,homeTeam.shortName,awayTeam.shortName,score.winner,score.fullTime.home,score.fullTime.away
0,416384,2022-08-05 19:00:00+00:00,FINISHED,1,Crystal Palace,Arsenal,AWAY_TEAM,0,2
1,416383,2022-08-06 11:30:00+00:00,FINISHED,1,Fulham,Liverpool,DRAW,2,2
2,416378,2022-08-06 14:00:00+00:00,FINISHED,1,Tottenham,Southampton,HOME_TEAM,4,1
3,416379,2022-08-06 14:00:00+00:00,FINISHED,1,Newcastle,Nottingham,HOME_TEAM,2,0
4,416381,2022-08-06 14:00:00+00:00,FINISHED,1,Leeds,Wolves,HOME_TEAM,2,1


In [134]:
# create function to get home_result and away_result
def get_result_home(row):
    if row['score.winner'] == 'HOME_TEAM':
        return 'W'
    elif row['score.winner'] == 'AWAY_TEAM':
        return 'L'
    else:
        return 'D'

def get_result_away(row):
    if row['score.winner'] == 'HOME_TEAM':
        return 'L'
    elif row['score.winner'] == 'AWAY_TEAM':
        return 'W'
    else:
        return 'D'

# apply function to get home_result and away_result
df['home_result'] = df.apply(get_result_home, axis=1)
df['away_result'] = df.apply(get_result_away, axis=1)

df.head()

Unnamed: 0,id,utcDate,status,matchday,homeTeam.shortName,awayTeam.shortName,score.winner,score.fullTime.home,score.fullTime.away,home_result,away_result
0,416384,2022-08-05 19:00:00+00:00,FINISHED,1,Crystal Palace,Arsenal,AWAY_TEAM,0,2,L,W
1,416383,2022-08-06 11:30:00+00:00,FINISHED,1,Fulham,Liverpool,DRAW,2,2,D,D
2,416378,2022-08-06 14:00:00+00:00,FINISHED,1,Tottenham,Southampton,HOME_TEAM,4,1,W,L
3,416379,2022-08-06 14:00:00+00:00,FINISHED,1,Newcastle,Nottingham,HOME_TEAM,2,0,W,L
4,416381,2022-08-06 14:00:00+00:00,FINISHED,1,Leeds,Wolves,HOME_TEAM,2,1,W,L


In [131]:
# create league table function
def create_league_table(df, homeTeam, awayTeam, home_result, away_result):
    # create a new dataframe for points
    df_points = pd.DataFrame({'team': df[homeTeam].unique()})
    #

    # calculate points, goals for and against, and goal difference
    df_points['played'] = 0
    df_points['won'] = 0
    df_points['drawn'] = 0
    df_points['lost'] = 0
    df_points['goals_for'] = 0
    df_points['goals_against'] = 0
    df_points['goal_difference'] = 0
    df_points['points'] = 0
    df_points['form'] = ''

    for index, row in df.iterrows():
        if row[home_result] == 'W':
            df_points.loc[df_points['team'] == row[homeTeam], 'played'] += 1
            df_points.loc[df_points['team'] == row[homeTeam], 'won'] += 1
            df_points.loc[df_points['team'] == row[homeTeam], 'points'] += 3
            df_points.loc[df_points['team'] == row[homeTeam], 'goals_for'] += row['score.fullTime.home']
            df_points.loc[df_points['team'] == row[homeTeam], 'goals_against'] += row['score.fullTime.away']
            df_points.loc[df_points['team'] == row[homeTeam], 'form'] += 'W'
        elif row[home_result] == 'D':
            df_points.loc[df_points['team'] == row[homeTeam], 'played'] += 1
            df_points.loc[df_points['team'] == row[homeTeam], 'drawn'] += 1
            df_points.loc[df_points['team'] == row[homeTeam], 'points'] += 1
            df_points.loc[df_points['team'] == row[homeTeam], 'goals_for'] += row['score.fullTime.home']
            df_points.loc[df_points['team'] == row[homeTeam], 'goals_against'] += row['score.fullTime.away']
            df_points.loc[df_points['team'] == row[homeTeam], 'form'] += 'D'
        elif row[home_result] == 'L':
            df_points.loc[df_points['team'] == row[homeTeam], 'played'] += 1
            df_points.loc[df_points['team'] == row[homeTeam], 'lost'] += 1
            df_points.loc[df_points['team'] == row[homeTeam], 'goals_for'] += row['score.fullTime.home']
            df_points.loc[df_points['team'] == row[homeTeam], 'goals_against'] += row['score.fullTime.away']
            df_points.loc[df_points['team'] == row[homeTeam], 'form'] += 'L'
        if row[away_result] == 'W':
            df_points.loc[df_points['team'] == row[awayTeam], 'played'] += 1
            df_points.loc[df_points['team'] == row[awayTeam], 'won'] += 1
            df_points.loc[df_points['team'] == row[awayTeam], 'points'] += 3
            df_points.loc[df_points['team'] == row[awayTeam], 'goals_for'] += row['score.fullTime.away']
            df_points.loc[df_points['team'] == row[awayTeam], 'goals_against'] += row['score.fullTime.home']
            df_points.loc[df_points['team'] == row[awayTeam], 'form'] += 'W'
        elif row[away_result] == 'D':
            df_points.loc[df_points['team'] == row[awayTeam], 'played'] += 1
            df_points.loc[df_points['team'] == row[awayTeam], 'drawn'] += 1
            df_points.loc[df_points['team'] == row[awayTeam], 'points'] += 1
            df_points.loc[df_points['team'] == row[awayTeam], 'goals_for'] += row['score.fullTime.away']
            df_points.loc[df_points['team'] == row[awayTeam], 'goals_against'] += row['score.fullTime.home']
            df_points.loc[df_points['team'] == row[awayTeam], 'form'] += 'D'
        elif row[away_result] == 'L':
            df_points.loc[df_points['team'] == row[awayTeam], 'played'] += 1
            df_points.loc[df_points['team'] == row[awayTeam], 'lost'] += 1
            df_points.loc[df_points['team'] == row[awayTeam], 'goals_for'] += row['score.fullTime.away']
            df_points.loc[df_points['team'] == row[awayTeam], 'goals_against'] += row['score.fullTime.home']
            df_points.loc[df_points['team'] == row[awayTeam], 'form'] += 'L'

    # calculate goal difference 
    df_points['goal_difference'] = df_points['goals_for'] - df_points['goals_against']

    # limit form the last five results to 5 characters
    df_points['form'] = df_points['form'].str[-5:]
    # reverse the form string
    df_points['form'] = df_points['form'].str[::-1]

    
    # sort, reindex add a position column
    df_points = df_points.sort_values(by=['points', 'goal_difference', 'goals_for'], ascending=False)
    df_points = df_points.reset_index(drop=True)
    df_points['position'] = df_points.index + 1
    
    return df_points

# apply the function to the results dataframe
df_league_table = create_league_table(df, 'homeTeam.shortName', 'awayTeam.shortName', 'home_result', 'away_result')
df_league_table.head(20)



Unnamed: 0,team,played,won,drawn,lost,goals_for,goals_against,goal_difference,points,form,position
0,Arsenal,28,22,3,3,66,26,40,69,WWWWW,1
1,Man City,27,19,4,4,67,25,42,61,WWWDW,2
2,Man United,26,15,5,6,41,35,6,50,DLWWD,3
3,Tottenham,28,15,4,9,52,40,12,49,DWLWW,4
4,Newcastle,26,12,11,3,39,19,20,47,WWLLD,5
5,Liverpool,26,12,6,8,47,29,18,42,LWWDW,6
6,Brighton,25,12,6,7,46,31,15,42,WDWLD,7
7,Brentford,27,10,12,5,43,34,9,42,DWLWD,8
8,Fulham,27,11,6,10,38,37,1,39,LLDWW,9
9,Chelsea,27,10,8,9,29,28,1,38,DWWLL,10
