In [1]:
import importlib
from utils import *
# importlib.reload(utils)
import numpy as np
import pandas as pd
from tqdm import tqdm

In [2]:
base_url = "https://www.football-data.co.uk/mmz4281/{}/E0.csv"
season = "1415"

In [3]:
df = get_match_data(base_url.format(season))
ws = get_winstreak(df)

In [4]:
### Gradient descent / NO MOMENTUM

### Parameter matrix rows: home off, home def, away off, away def
params = np.ones((4,20))

num_itrs = 10000
lr = 0.0001

teams_ind = {
    "Arsenal": 0,
    "Aston Villa": 1,
    "Burnley": 2,
    "Chelsea": 3,
    "Crystal Palace": 4,
    "Everton": 5,
    "Hull": 6,
    "Leicester": 7,
    "Liverpool": 8,
    "Man City": 9,
    "Man United": 10,
    "Newcastle": 11,
    "QPR": 12,
    "Southampton": 13,
    "Stoke": 14,
    "Sunderland": 15,
    "Swansea": 16,
    "Tottenham": 17,
    "West Brom": 18,
    "West Ham": 19
    }

for i in tqdm(range(num_itrs)):
    batch = df.sample(frac=1/20)

    for index, row in batch.iterrows():

        home_index = teams_ind[row["HomeTeam"]]
        away_index = teams_ind[row["AwayTeam"]]
        home_goal = row["FTHG"]
        away_goal = row["FTAG"]

        # update home team params
        params[0,home_index] += lr * (home_goal / params[0,home_index] - params[3,away_index])
        params[1,home_index] += lr * (away_goal / params[1,home_index] - params[2,away_index])

        # update away team params
        params[2,away_index] += lr * (away_goal / params[2,away_index] - params[1,home_index])
        params[3,away_index] += lr * (home_goal / params[3,away_index] - params[0,home_index])


100%|██████████| 10000/10000 [00:23<00:00, 427.77it/s]


In [5]:
# np.concatenate((np.array(teams_ind.keys()),np.zeros((20,7))))
columns = ["Team", "Points", "W", "D", "L", "GD", "GF", "GA"]
table = pd.DataFrame(data=np.zeros((20,8)), columns=columns)
table["Team"] = list(teams_dict.values())
for i, row in df.iterrows():
    home = row["HomeTeam"]
    away = row["AwayTeam"]
    result = (row["FTHG"],row["FTAG"])
    win = winner(result)
    # update results
    if win == "H":
        table.loc[table["Team"] == home, "Points"] += 3.
        table.loc[table["Team"] == home, "W"] += 1.
        table.loc[table["Team"] == away, "L"] += 1.
    elif win == "A":
        table.loc[table["Team"] == away, "Points"] += 3.
        table.loc[table["Team"] == away, "W"] += 1.
        table.loc[table["Team"] == home, "L"] += 1.
    else:
        table.loc[table["Team"] == home, "Points"] += 1.
        table.loc[table["Team"] == away, "Points"] += 1.
        table.loc[table["Team"] == home, "D"] += 1.
        table.loc[table["Team"] == away, "D"] += 1.
    
    # update goals
    table.loc[table["Team"] == home, "GF"] += result[0]
    table.loc[table["Team"] == home, "GA"] += result[1]
    table.loc[table["Team"] == home, "GD"] += (result[0] - result[1])

    table.loc[table["Team"] == away, "GF"] += result[1]
    table.loc[table["Team"] == away, "GA"] += result[0]
    table.loc[table["Team"] == away, "GD"] += (result[1] - result[0])

In [6]:
standings(df)

Unnamed: 0,Team,Points,W,D,L,GD,GF,GA
1,Chelsea,87.0,26.0,9.0,3.0,41.0,73.0,32.0
2,Man City,79.0,24.0,7.0,7.0,45.0,83.0,38.0
3,Arsenal,75.0,22.0,9.0,7.0,35.0,71.0,36.0
4,Man United,70.0,20.0,10.0,8.0,25.0,62.0,37.0
5,Tottenham,64.0,19.0,7.0,12.0,5.0,58.0,53.0
6,Liverpool,62.0,18.0,8.0,12.0,4.0,52.0,48.0
7,Southampton,60.0,18.0,6.0,14.0,21.0,54.0,33.0
8,Swansea,56.0,16.0,8.0,14.0,-3.0,46.0,49.0
9,Stoke,54.0,15.0,9.0,14.0,3.0,48.0,45.0
10,Crystal Palace,48.0,13.0,9.0,16.0,-4.0,47.0,51.0
