In [1]:
from utils import *
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from scipy.stats import poisson, chisquare
import pickle

In [2]:
base_url = "https://www.football-data.co.uk/mmz4281/{}/E0.csv"
season = "1415"
df = get_match_data(base_url.format(season)).sort_values(by="MatchWeek")
ws = get_winstreak(df)

In [3]:
train = df[df["MatchWeek"]<29]
test = df[df["MatchWeek"]>=29]

In [4]:
### Train params

params = np.ones((4,20))

num_itrs = 1000
lr = 0.0001

for i in tqdm(range(num_itrs)):
    batch = train.sample(frac=1/20)

    for index, row in batch.iterrows():

        home_index = teams_ind[row["HomeTeam"]]
        away_index = teams_ind[row["AwayTeam"]]
        home_goal = row["FTHG"]
        away_goal = row["FTAG"]

        # update home team params
        params[0,home_index] += lr * (home_goal / params[0,home_index] - params[3,away_index])
        params[1,home_index] += lr * (away_goal / params[1,home_index] - params[2,away_index])

        # update away team params
        params[2,away_index] += lr * (away_goal / params[2,away_index] - params[1,home_index])
        params[3,away_index] += lr * (home_goal / params[3,away_index] - params[0,home_index])

100%|██████████| 1000/1000 [00:04<00:00, 242.72it/s]


In [5]:
team_params = np.ones((4,20))
streak_param = 1

num_itrs = 1000
lr = 0.0001

for i in tqdm(range(num_itrs)):
    batch = train.sample(frac=1/20)

    for index, row in batch.iterrows():

        home_index = teams_ind[row["HomeTeam"]]
        away_index = teams_ind[row["AwayTeam"]]
        home_goal = row["FTHG"]
        away_goal = row["FTAG"]
        week = row["MatchWeek"]
        home_streak = ws.loc[week,row["HomeTeam"]]
        away_streak = ws.loc[week,row["AwayTeam"]]

        # update home team params
        team_params[0,home_index] += lr * (home_goal / team_params[0,home_index] - team_params[3,away_index] * (streak_param ** home_streak))
        team_params[1,home_index] += lr * (away_goal / team_params[1,home_index] - team_params[2,away_index] * (streak_param ** away_streak))
        if home_streak != 0:
            streak_param += lr * (home_goal * home_streak / streak_param - home_streak * team_params[0,home_index] * team_params[3,away_index] * (streak_param ** (home_streak - 1)))

        # update away team params
        team_params[2,away_index] += lr * (away_goal / team_params[2,away_index] - team_params[1,home_index] * (streak_param ** away_streak))
        team_params[3,away_index] += lr * (home_goal / team_params[3,away_index] - team_params[0,home_index] * (streak_param ** home_streak))
        if away_streak != 0:
            streak_param += lr * (away_goal * away_streak / streak_param - away_streak * team_params[2,away_index] * team_params[1,home_index] * (streak_param ** (away_streak - 1)))


100%|██████████| 1000/1000 [00:05<00:00, 189.03it/s]


In [6]:
### Save trained params DON'T UNCOMMENT
# saved_params = [params, team_params, streak_param]
# with open('betting_params.pickle', 'wb') as f:
#     # Pickle the 'data' dictionary using the highest protocol available.
#     pickle.dump(saved_params, f, pickle.HIGHEST_PROTOCOL)

In [7]:
with open('betting_params.pickle', 'rb') as f:
    saved_params = pickle.load(f)

params = saved_params[0]
team_params = saved_params[1]
streak_param = saved_params[2]

In [8]:
current = 1000.
history = [current]

for i, row in tqdm(test.iterrows()):
    home_index = teams_ind[row["HomeTeam"]]
    away_index = teams_ind[row["AwayTeam"]]
    result = row["FTR"]
    
    house_odds = np.array([row["B365H"], row["B365D"], row["B365A"]])
    model_odds = get_odds(params, home_index, away_index)

    if max(house_odds - model_odds) > 0.5 and model_odds[np.argmax(house_odds - model_odds)] < 5:
        

    if model_odds["H"] + 0.1 < house_odds["H"]:
        if result == "H":
            current += (house_odds - 1)
        else:
            current -= 1
    # elif model_odds["D"] + 0.1 < 

    

IndentationError: expected an indented block (<ipython-input-8-536e5d7b3dae>, line 15)

In [10]:
match = df.iloc[360]
print(match)
home_index = teams_ind[match["HomeTeam"]]
away_index = teams_ind[match["AwayTeam"]]

house_odds = np.array([match["B365H"], match["B365D"],match["B365A"]])
model_odds = get_odds(params, home_index, away_index)

max(house_odds - model_odds)

Date          18/05/15
MatchWeek           37
HomeTeam     West Brom
AwayTeam       Chelsea
FTHG                 3
FTAG                 0
FTR                  H
B365H              4.1
B365D              3.5
B365A                2
Name: 368, dtype: object


0.5865119391273399

In [11]:
kelly_criterion(house_odds, model_odds)

array([1.28041946, 1.33526776, 1.29253028])

In [12]:
model_odds

array([7.65112864, 6.17930144, 1.41348806])

In [13]:
1/model_odds

array([0.13069967, 0.16183059, 0.70746972])

In [14]:
((0.70*2) - 1)/(1)

0.3999999999999999

In [15]:
model_odds - (1-model_odds)/(house_odds-1)

array([9.79665401, 8.25102202, 1.82697612])