In [1]:
# Import packages and set options
import pandas as pd
import numpy as np
import math
pd.set_option("display.max.columns", None)
pd.options.mode.chained_assignment = None

In [2]:
# Define poisson function
def poisson(actual, mean):
    return(mean**actual*math.exp(-mean))/math.factorial(actual)

In [3]:
# Function to build mean goal values for the typical newly promoted teams
def promoted_team(betting_year):
    newteaminfo = pd.DataFrame(index=['newteam'],columns=columns)
    newteaminfo[(columns)]=0
    for year in range(betting_year-back_years,betting_year):
        new_season=pd.read_csv('data/EPL/%d.csv'%(year+1),usecols=cols)
        prev_season=pd.read_csv('data/EPL/%d.csv'%(year),usecols=cols)
        newlypromoted = []
        teamlist=[]
        for i in range(len(prev_season)):
            if prev_season.HomeTeam[i] not in teamlist:
                teamlist.append(prev_season.HomeTeam[i])
        for i in range(len(new_season)):
            if new_season.HomeTeam[i] not in newlypromoted:
                if new_season.HomeTeam[i] not in teamlist:
                    newlypromoted.append(new_season.HomeTeam[i])
        for i in range(len(new_season)):
            if new_season.HomeTeam[i] in newlypromoted:
                newteaminfo['home_games']+=1
                newteaminfo['home_goals']+=new_season.FTHG[i]
                newteaminfo['home_conceded']+=new_season.FTAG[i]
            if new_season.AwayTeam[i] in newlypromoted:
                newteaminfo['away_games']+=1
                newteaminfo['away_goals']+=new_season.FTAG[i]
                newteaminfo['away_conceded']+=new_season.FTHG[i]
    newteaminfo['alpha_h']=newteaminfo['home_goals']/newteaminfo['home_games']
    newteaminfo['beta_h']=newteaminfo['home_conceded']/newteaminfo['home_games']
    newteaminfo['alpha_a']=newteaminfo['away_goals']/newteaminfo['away_games']
    newteaminfo['beta_a']=newteaminfo['away_conceded']/newteaminfo['away_games']
    newteaminfo['total_games']=newteaminfo['home_games']+newteaminfo['away_games']
    return(newteaminfo)

In [4]:
# Creating team list
def make_teamlist(data):
    teamlist=[]
    for i in range(len(data)):
        if data.HomeTeam[i] not in teamlist:
            teamlist.append(data.HomeTeam[i])
    teamlist.sort()
    return(teamlist)

In [5]:
# Some of the CSVs had a blank final row which caused issues:
def clean(data):
    data=data[0:380]
    return(data)

In [6]:
# Makes the teaminfo dataframe
def make_teaminfo(data):
    teamlist=make_teamlist(data)
    teaminfo = pd.DataFrame(index=teamlist, columns = columns)
    teaminfo[(columns)]=0
    for i in range(len(data)):
        teaminfo['home_games'][(data.HomeTeam[i])]+=1
        teaminfo['away_games'][(data.AwayTeam[i])]+=1
        teaminfo['home_goals'][(data.HomeTeam[i])]+=data.FTHG[i]
        teaminfo['away_goals'][(data.AwayTeam[i])]+=data.FTAG[i]
        teaminfo['home_conceded'][(data.HomeTeam[i])]+=data.FTAG[i]
        teaminfo['away_conceded'][(data.AwayTeam[i])]+=data.FTHG[i]
    teaminfo['alpha_h']=teaminfo['home_goals']/teaminfo['home_games']
    teaminfo['beta_h']=teaminfo['home_conceded']/teaminfo['home_games']
    teaminfo['alpha_a']=teaminfo['away_goals']/teaminfo['away_games']
    teaminfo['beta_a']=teaminfo['away_conceded']/teaminfo['away_games']
    teaminfo['total_games']=teaminfo['home_games']+teaminfo['away_games']
    newteaminfo=promoted_team(betting_year)
    teaminfo=teaminfo.append(newteaminfo)
    return(teaminfo)

In [7]:
# Functions to get the betting season data ready
def add_columns(season):
    add_columns=['p_win','p_draw','p_draw','p_loss','sum_probs',
     'ev_win','ev_draw','ev_loss']
    for i in add_columns:
        season[i]=np.zeros((len(season)))
    return(season)

def newteam_label(season):
    for i in range(len(season)):
        if season.HomeTeam[i] not in teamlist:
            season.HomeTeam[i]='newteam'
        if season.AwayTeam[i] not in teamlist:
            season.AwayTeam[i]='newteam'
    return(season)
            
def clean_data(season):
    season=add_columns(season)
    season=newteam_label(season)
    return(season)

In [8]:
# Calculate probabilities of each event and add to season data
def probabilities(season):
    maxscore = 11
    for game in range(len(season)):
            probs=pd.DataFrame(index=range(maxscore**2),columns=['homescore',
                                                         'awayscore','probability'])
            index_counter=0
            for i in range(maxscore):
                for j in range(maxscore):
                    prob = poisson(i, teaminfo['alpha_h'][season.HomeTeam[game]]) * poisson(j,teaminfo['alpha_a'][season.AwayTeam[game]])
                    probs.homescore[index_counter]=i
                    probs.awayscore[index_counter]=j               
                    probs.probability[index_counter]=prob
                    index_counter+=1
            p_win=0
            p_loss=0
            p_draw=0
            for i in range(len(probs)):
                if probs.homescore[i]>probs.awayscore[i]:
                    p_win+=probs.probability[i]
                if probs.homescore[i]<probs.awayscore[i]:
                    p_loss+=probs.probability[i]
                if probs.homescore[i]==probs.awayscore[i]:
                    p_draw+=probs.probability[i] 
            season['p_win'][game]=p_win
            season['p_draw'][game]=p_draw
            season['p_loss'][game]=p_loss
            season['sum_probs'][game]=np.sum((p_win,p_draw,p_loss))
    return(season)

In [9]:
# Calculate expected values of each bet
def expected_value(season):   
    for game in range(len(season)):
        season.ev_win[game]=(season.p_win[game]*(season.B365H[game]-1))-(1-season.p_win[game])
        season.ev_draw[game]=(season.p_draw[game]*(season.B365D[game]-1))-(1-season.p_draw[game])
        season.ev_loss[game]=(season.p_loss[game]*(season.B365A[game]-1))-(1-season.p_loss[game])
    return(season)

In [10]:
# Function to combine a few of the above
def full_season_run(season):
    season=clean_data(season)
    season=probabilities(season)
    season=expected_value(season)
    return(season)

In [11]:
# Carrying out the betting
def place_bets(season):    
    wager=5
    starting_bankroll=100
    games=len(season) #number of games you want to consider betting on
    bankroll=starting_bankroll
    incorrect=0 # counters for track record
    correct=0
    for game in range(games):
        result=0
        ev_max=max(season.ev_win[game],season.ev_draw[game],season.ev_loss[game]) 
        if season.ev_win[game]==ev_max and season.ev_win[game]>threshold:
            team_bet=season.HomeTeam[game]
            if season.FTHG[game]>season.FTAG[game]:
                betvalue=wager*(season.B365H[game]-1)
                result='won'
                correct+=1
            else:
                betvalue=-wager
                result='lost'
                incorrect+=1
            bankroll+=betvalue
        #print("Bet",season.HomeTeam[game],'vs',season.AwayTeam[game],':backed',team_bet)
        #print('home scored',season.FTHG[game], 'away scored',season.FTAG[game])
        #print('Bet',result)
        #print('Bankroll=',bankroll)
        elif season.ev_draw[game]==ev_max and season.ev_draw[game]>threshold:
            team_bet="draw"
            if season.FTHG[game]==season.FTAG[game]:
                betvalue=wager*(season.B365D[game]-1)
                result='won'
                correct+=1
            else:
                betvalue=-wager
                result='lost'
                incorrect+=1
            bankroll+=betvalue
        #print("Bet",season.HomeTeam[game],'vs',season.AwayTeam[game],':backed',team_bet)
        #print('home scored',season.FTHG[game], 'away scored',season.FTAG[game])
        #print('Bet',result)
        #print('Bankroll=',bankroll)
        elif season.ev_loss[game]==ev_max and season.ev_loss[game]>threshold:
            team_bet=season.AwayTeam[game]
            if season.FTHG[game]<season.FTAG[game]:
                betvalue=wager*(season.B365A[game]-1)
                result='won'
                correct+=1
            else:
                betvalue=-wager
                result='lost'
                incorrect+=1
            bankroll+=betvalue
        #print("Bet",season.HomeTeam[game],'vs',season.AwayTeam[game],':backed',team_bet)
        #print('home scored',season.FTHG[game], 'away scored',season.FTAG[game])
        #print('Bet',result)
        #print('Bankroll=',bankroll)       
    return(wager, starting_bankroll,bankroll, correct, incorrect) 

In [12]:
# Function to return performance metrics
def bets_return(wager, starting_bankroll, bankroll, correct,incorrect): 
    betcounter=incorrect+correct
    ROI = ((bankroll - starting_bankroll) /(wager * (betcounter)))
    ROI="{:.2%}".format(ROI)
    print(str(2000+betting_year)+'/'+str(betting_year+1),'season:')
    print(correct,'out of',correct+incorrect,'bets were correct')
    print('ROI=',ROI)

In [13]:
# Set some variables and column lists
threshold=2
back_years=8 # How many seasons to calculate the newly promoted teams' data across
cols=['HomeTeam','AwayTeam','FTHG','FTAG','B365H','B365A','B365D']
columns=['home_goals', 'away_goals', 'home_conceded', 'away_conceded',
                 'home_games', 'away_games','total_games', 'alpha_h','beta_h','alpha_a','beta_a']

In [15]:
# Carrying out the betting over the seasons in the dataset
for betting_year in range(2+back_years,19):
        data_year=betting_year-1
        newteaminfo=promoted_team(betting_year)
        data=pd.read_csv('data/EPL/%d.csv'%(data_year),usecols=cols)
        season=pd.read_csv('data/EPL/%d.csv'%(betting_year),usecols=cols)
        data=clean(data)
        season=clean(season)
        teamlist=make_teamlist(data)
        teaminfo=make_teaminfo(data)
        season=full_season_run(season)
        wager, starting_bankroll,bankroll, correct, incorrect = place_bets(season)
        bets_return(wager, starting_bankroll,bankroll, correct, incorrect)

2010/11 season:
0 out of 2 bets were correct
ROI= -100.00%
2011/12 season:
2 out of 17 bets were correct
ROI= 88.24%
2012/13 season:
1 out of 5 bets were correct
ROI= 80.00%
2013/14 season:
1 out of 19 bets were correct
ROI= -55.26%
2014/15 season:
1 out of 2 bets were correct
ROI= 450.00%
2015/16 season:
1 out of 1 bets were correct
ROI= 1000.00%
2016/17 season:
3 out of 25 bets were correct
ROI= 54.00%
2017/18 season:
2 out of 25 bets were correct
ROI= 24.00%
2018/19 season:
2 out of 9 bets were correct
ROI= 233.33%
