In [66]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 500)
from datetime import datetime

import requests
import json

# current date
date = datetime.now().strftime("%Y%m%d-%H%M")
print(date)

20221102-2216


In [67]:
# keep columns
keep_cols = [
    'id',
    'utcDate',
    'status',
    'matchday',
    # 'season.id',
    # 'season.currentMatchday',
    # 'homeTeam.id',
    # 'homeTeam.name',
    'homeTeam.shortName',
    # 'awayTeam.id',
    # 'awayTeam.name',
    'awayTeam.shortName',
    # 'score.winner',
    'score.fullTime.home',
    'score.fullTime.away',
    # 'score.halfTime.home',
    # 'score.halfTime.away'
]

In [68]:
uri = 'http://api.football-data.org/v4/competitions/PL/matches?status=FINISHED'
headers = { 'X-Auth-Token': '73015e4f2665414cb1fafe97e8f9971e' }

response = requests.get(uri, headers=headers)
data = response.json()
df = pd.json_normalize(data, record_path = ['matches'])
df = df[keep_cols]
df['utcDate'] = pd.to_datetime(df['utcDate'])
df['score.fullTime'] = df['score.fullTime.home'].astype(str) + '-' + df['score.fullTime.away'].astype(str)
print(df.shape)
df.head()


(126, 9)


Unnamed: 0,id,utcDate,status,matchday,homeTeam.shortName,awayTeam.shortName,score.fullTime.home,score.fullTime.away,score.fullTime
0,416384,2022-08-05 19:00:00+00:00,FINISHED,1,Crystal Palace,Arsenal,0,2,0-2
1,416383,2022-08-06 11:30:00+00:00,FINISHED,1,Fulham,Liverpool,2,2,2-2
2,416378,2022-08-06 14:00:00+00:00,FINISHED,1,Tottenham,Southampton,4,1,4-1
3,416379,2022-08-06 14:00:00+00:00,FINISHED,1,Newcastle,Nottingham,2,0,2-0
4,416381,2022-08-06 14:00:00+00:00,FINISHED,1,Leeds United,Wolverhampton,2,1,2-1


In [69]:
# pivot
df_table = df.pivot_table(index=['homeTeam.shortName'], columns=['awayTeam.shortName'], fill_value='.', values=['score.fullTime'], aggfunc='first')
# export to csv
df_table.to_csv(f'../../output/{date}-pivot_.csv')
df_table

Unnamed: 0_level_0,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime,score.fullTime
awayTeam.shortName,Arsenal,Aston Villa,Bournemouth,Brentford,Brighton Hove,Chelsea,Crystal Palace,Everton,Fulham,Leeds United,Leicester City,Liverpool,Man City,Man United,Newcastle,Nottingham,Southampton,Tottenham,West Ham,Wolverhampton
homeTeam.shortName,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
Arsenal,.,2-1,.,.,.,.,.,.,2-1,.,4-2,3-2,.,.,.,5-0,.,3-1,.,.
Aston Villa,.,.,.,4-0,.,0-2,.,2-1,.,.,.,.,1-1,.,.,.,1-0,.,0-1,.
Bournemouth,0-3,2-0,.,0-0,.,.,.,.,.,.,2-1,.,.,.,.,.,0-1,2-3,.,0-0
Brentford,0-3,.,.,.,2-0,0-0,.,1-1,.,5-2,.,.,.,4-0,.,.,.,.,.,1-1
Brighton Hove,.,.,.,.,.,4-1,.,.,.,1-0,5-2,.,.,.,0-0,0-0,.,0-1,.,.
Chelsea,.,.,.,.,.,.,.,.,.,.,2-1,.,.,1-1,.,.,.,2-2,2-1,3-0
Crystal Palace,0-2,3-1,.,1-1,.,1-2,.,.,.,2-1,.,.,.,.,.,.,1-0,.,.,2-1
Everton,.,.,.,.,.,0-1,3-0,.,.,.,.,0-0,.,1-2,.,1-1,.,.,1-0,.
Fulham,.,3-0,2-2,3-2,2-1,.,.,0-0,.,.,.,2-2,.,.,1-4,.,.,.,.,.
Leeds United,0-1,0-0,.,.,.,3-0,.,1-1,2-3,.,.,.,.,.,.,.,.,.,.,2-1


## Create Matches DataFrame with a row for each team 

In [70]:
points_map = {
    'W': 3,
    'D': 1,
    'L': 0
}

def get_result(score, score_opp):
    if score == score_opp:
        return 'D'
    elif score > score_opp:
        return 'W'
    else:
        return 'L'

drop_cols = ['homeTeam.shortName','awayTeam.shortName', 'score.fullTime.home', 'score.fullTime.away']

# convert each match into two rows (one for each team)

df['H'] = df['homeTeam.shortName']
df['A'] = df['awayTeam.shortName']

df_matches = pd.melt(
    df,
    id_vars=keep_cols,
    value_vars=["H", "A"],
    var_name="home_away",
    value_name="team",
)
df_matches['opponent'] = np.where(
    df_matches['home_away'] == 'H', 
    df_matches['awayTeam.shortName'], 
    df_matches['homeTeam.shortName']
    )

# full time goals
df_matches["goals_scored"] = np.where(
    df_matches["team"] == df_matches["homeTeam.shortName"],
    df_matches["score.fullTime.home"],
    df_matches["score.fullTime.away"],
)
df_matches["goals_against"] = np.where(
    df_matches["team"] != df_matches["homeTeam.shortName"],
    df_matches["score.fullTime.home"],
    df_matches["score.fullTime.away"],
)
df_matches["result"] = np.vectorize(get_result)(
    df_matches["goals_scored"], df_matches["goals_against"]
)
df_matches["points"] = df_matches["result"].map(points_map)


# sort df_matches by id
df_matches = (df_matches
    .drop(drop_cols, axis=1)
    .sort_values(by=['utcDate', 'id'], ascending=True)
    .reset_index(drop=True)
    )

# export to csv
df_matches.to_csv(f'../../output/{date}-matches_.csv')

df_matches.head(20)

Unnamed: 0,id,utcDate,status,matchday,home_away,team,opponent,goals_scored,goals_against,result,points
0,416384,2022-08-05 19:00:00+00:00,FINISHED,1,H,Crystal Palace,Arsenal,0,2,L,0
1,416384,2022-08-05 19:00:00+00:00,FINISHED,1,A,Arsenal,Crystal Palace,2,0,W,3
2,416383,2022-08-06 11:30:00+00:00,FINISHED,1,H,Fulham,Liverpool,2,2,D,1
3,416383,2022-08-06 11:30:00+00:00,FINISHED,1,A,Liverpool,Fulham,2,2,D,1
4,416378,2022-08-06 14:00:00+00:00,FINISHED,1,H,Tottenham,Southampton,4,1,W,3
5,416378,2022-08-06 14:00:00+00:00,FINISHED,1,A,Southampton,Tottenham,1,4,L,0
6,416379,2022-08-06 14:00:00+00:00,FINISHED,1,H,Newcastle,Nottingham,2,0,W,3
7,416379,2022-08-06 14:00:00+00:00,FINISHED,1,A,Nottingham,Newcastle,0,2,L,0
8,416381,2022-08-06 14:00:00+00:00,FINISHED,1,H,Leeds United,Wolverhampton,2,1,W,3
9,416381,2022-08-06 14:00:00+00:00,FINISHED,1,A,Wolverhampton,Leeds United,1,2,L,0


## Create league table

In [71]:
def league(x):
    results = {
        'Played': x['result'].count(),
        'Wins': (x['result'] == 'W').sum(),
        'Draws': (x['result'] == 'D').sum(),
        'Losses': (x['result'] == 'L').sum(),
        'Goals For': x['goals_scored'].sum(),
        'Goals Against': x['goals_against'].sum(),
        'Goal Difference': x['goals_scored'].sum() - x['goals_against'].sum(),
        'Points': x['points'].sum()

    }
    return pd.Series(results)

# calculate form (last 5 matches)
df_form = df_matches.groupby('team').tail(5)
df_form = df_form.groupby('team')['result'].apply(lambda x: ''.join(x))
df_form = pd.DataFrame(df_form)
df_form = df_form.rename(columns={'result': 'Form'})


# apply league function
df_league = df_matches.groupby('team').apply(league)
# merge in form
df_league = df_league.merge(df_form, left_index=True, right_index=True)
# Sort by points and then goal difference
df_league = df_league.sort_values(by=['Points', 'Goal Difference'], ascending=False)
# add column for position
df_league['Position'] = df_league['Points'].rank(ascending=False, method='first')
# reformat position to int
df_league['Position'] = df_league['Position'].astype(int)
# export to csv
df_league.to_csv(f'../../output/{date}-league_.csv')

df_league.head(20)


Unnamed: 0_level_0,Played,Wins,Draws,Losses,Goals For,Goals Against,Goal Difference,Points,Form,Position
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Arsenal,12,10,1,1,30,11,19,31,WWWDW,1
Man City,12,9,2,1,37,11,26,29,WWLWW,2
Tottenham,13,8,2,3,26,16,10,26,WWLLW,3
Newcastle,13,6,6,1,24,10,14,24,WDWWW,4
Man United,12,7,2,3,17,16,1,23,WDWDW,5
Chelsea,12,6,3,3,17,15,2,21,WWDDL,6
Fulham,13,5,4,4,22,22,0,19,LDWWD,7
Brighton Hove,12,5,3,4,19,15,4,18,LLDLW,8
Liverpool,12,4,4,4,23,15,8,16,LWWLL,9
Crystal Palace,12,4,4,4,13,16,-3,16,WDWLW,10
