In [1]:
import os
import csv
import numpy as np
import matplotlib.pyplot as plot
from scipy.stats import poisson
from scipy.stats import gamma
from scipy.stats import beta
from scipy.stats import norm
import pandas as pd
import itertools
import model_utils
from scipy.stats import dirichlet

In [2]:
def EstimateParameters(fixture_list_1, fixture_list_2, fixture_list_3,
                       teams, beta, thetapriormeans, thetapriorsds,
                       niter=1000, log=False, temp=0):
    
    # xdata and ydata are coordinates and y values of data
    # xmodel are coordinates of model evaluations
    # thetaprior are prior guesses for parameters
    
    # draw initial
    if log:
        if hasattr(thetapriormeans, '__len__'):
            theta = np.zeros(len(thetapriormeans))
            for i in range(len(thetapriormeans)):
                theta[i] = np.exp(np.random.normal(thetapriormeans[i], thetapriorsds[i], 1))
        else:
            theta = np.exp(np.random.normal(thetapriormeans, thetapriorsds, 1))
    else:
        if hasattr(thetapriormeans, '__len__'):
            theta = np.zeros(len(thetapriormeans))
            for i in range(len(thetapriormeans)):
                theta[i] = np.random.normal(thetapriormeans[i], thetapriorsds[i], 1)
            # normalize
            #theta[(len(teams) + 1 - 1)] = -np.sum(theta[1:(len(teams) + 1 - 1)])
            #theta[((2 * len(teams)) + 1 - 1)] = -np.sum(theta[(len(teams) + 1):((2 * len(teams)) + 1 - 1)])
        else:
            theta = np.random.normal(thetapriormeans, thetapriorsds, 1)
    
    if hasattr(thetapriormeans, '__len__'):
        thetaarray = np.zeros((niter, len(thetapriormeans)))
    else:
        thetaarray = np.zeros(niter)
    
    for j in range(niter):
        
        # temperature
        T = np.exp(-temp * ((i + 1) / niter))
        
        if log:
            if hasattr(thetapriormeans, '__len__'):
                thetastar = np.exp(np.log(theta) + np.random.normal(0, np.sqrt(beta), len(theta)))
            else:
                thetastar = np.exp(np.log(theta) + np.random.normal(0, np.sqrt(beta), 1))
        else:
            if hasattr(thetapriormeans, '__len__'):
                ind = np.random.normal(0, np.sqrt(beta), len(theta))
                # normalize
                #ind[(len(teams) + 1 - 1)] = -np.sum(ind[1:(len(teams) + 1 - 1)])
                #ind[((2 * len(teams)) + 1 - 1)] = -np.sum(ind[(len(teams) + 1):((2 * len(teams)) + 1 - 1)])
                thetastar = theta + ind
            else:
                ind = np.random.normal(0, np.sqrt(beta), 1)
                thetastar = theta + ind
        
        # get likelihood for each
        mu = theta[0]
        a = theta[1:(len(teams) + 1)]
        d = theta[(len(teams) + 1):((2 * len(teams)) + 1)]
        alpha = theta[((2 * len(teams)) + 1)]
        Htheta = likelihood_three_seasons(fixture_list_1, fixture_list_2, fixture_list_3,
                                          teams, mu, a, d, alpha)
        
        mu = thetastar[0]
        a = thetastar[1:(len(teams) + 1)]
        d = thetastar[(len(teams) + 1):((2 * len(teams)) + 1)]
        alpha = thetastar[((2 * len(teams)) + 1)]
        Hthetastar = likelihood_three_seasons(fixture_list_1, fixture_list_2, fixture_list_3,
                                              teams, mu, a, d, alpha)
        
        alpha = np.min([0, (1 / T) * (Hthetastar - Htheta)])
        
        # sample uniformly
        u = np.random.uniform(0, 1)
        
        # accept or not
        accept = np.log(u) <= alpha
        
        if accept:
            theta = thetastar
            
        if hasattr(thetapriormeans, '__len__'):
            thetaarray[j, :] = theta
            if (j%10) == 0:
                print('------')
                print('Iteration: ', str(j))
                print('Home coefficient: '+str(thetaarray[j, 0]))
                print('Arsenal attack coefficient: '+str(thetaarray[j, 1]))
        else:
            thetaarray[j] = theta
    
    return thetaarray

# create likelihood eval for one game
def likelihood_one_game(goals_ht, goals_at, form_ht, form_at, mu, a_ht, d_ht, a_at, d_at, alpha):
    lambda_ht = np.exp(mu + a_ht + d_at + (alpha * form_ht))
    lambda_at = np.exp(a_at + d_ht + (alpha * form_at))
    p1 = poisson.pmf(goals_ht, lambda_ht)
    p2 = poisson.pmf(goals_at, lambda_at)
    return(p1 * p2)

# create likelihood eval for single season
def likelihood_season(fixtures_list, teams, mu, a, d, alpha):
    N = np.shape(fixtures_list)[0]
    goals_ht = fixtures_list[:, 2]
    goals_at = fixtures_list[:, 3]
    teams_ht = fixtures_list[:, 0]
    teams_at = fixtures_list[:, 1]
    
    teams_for_season = np.unique(teams_ht)
    
    points = np.zeros((38, 20))
    team_count = np.zeros(20)
    for i in range(N):
        points[team_count[np.where(teams_for_season == teams_ht[i])[0][0].astype(int)].astype(int), np.where(teams_for_season == teams_ht[i])[0][0].astype(int)] = (3 * (goals_ht[i] > goals_at[i])) + (goals_ht[i] == goals_at[i])
        points[team_count[np.where(teams_for_season == teams_at[i])[0][0].astype(int)].astype(int), np.where(teams_for_season == teams_at[i])[0][0].astype(int)] = (3 * (goals_ht[i] < goals_at[i])) + (goals_ht[i] == goals_at[i])
        team_count[np.where(teams_for_season == teams_ht[i])[0][0].astype(int)] += 1
        team_count[np.where(teams_for_season == teams_at[i])[0][0].astype(int)] += 1
    form = np.ones((38, 20)) * 7.5
    for j in range(20):
        form[5:, j] = np.cumsum(points[:, j])[5:] - np.cumsum(points[:, j])[:(38 - 5)]
    
    team_count = np.zeros(20)
    likelihood = np.zeros(N)
    for i in range(N):
        ind_ht = np.where(teams == teams_ht[i])[0][0].astype(int)
        ind_at = np.where(teams == teams_at[i])[0][0].astype(int)
        ind_for_season_ht = np.where(teams_for_season == teams_ht[i])[0][0].astype(int)
        ind_for_season_at = np.where(teams_for_season == teams_at[i])[0][0].astype(int)
        l = likelihood_one_game(goals_ht[i], goals_at[i],
                                form[team_count[ind_for_season_ht].astype(int), ind_for_season_ht].astype(int), form[team_count[ind_for_season_at].astype(int), ind_for_season_at],
                                mu, a[ind_ht], d[ind_ht], a[ind_at], d[ind_at], alpha)
        team_count[np.where(teams_for_season == teams_ht[i])[0][0].astype(int)] += 1
        team_count[np.where(teams_for_season == teams_at[i])[0][0].astype(int)] += 1
        likelihood[i] = l
    
    return(np.sum(np.log(likelihood)))

# likelihood over three seasons - weighted
def likelihood_three_seasons(fixture_list_1, fixture_list_2, fixture_list_3, teams, mu, a, d, alpha):
    likelihood = (0.2 * likelihood_season(fixture_list_1, teams, mu, a, d, alpha)) + (0.3 * likelihood_season(fixture_list_2, teams, mu, a, d, alpha)) + (0.5 * likelihood_season(fixture_list_3, teams, mu, a, d, alpha))
    return(likelihood)

# function to predict probabilities of fixtures
def predict_fixtures(new_fixtures, form, teams, mu, a, d, alpha, uncertainty=False):
    if uncertainty:
        # form is N x 2
        N = np.shape(new_fixtures)[0]
        teams_ht = new_fixtures[:, 0]
        teams_at = new_fixtures[:, 1]
        lambda_1 = np.zeros(N)
        lambda_2 = np.zeros(N)
        for i in range(N):
            muest = np.random.normal(mu[0], mu[1])
            aest = np.zeros(len(teams))
            dest = np.zeros(len(teams))
            for u in range(len(teams)):
                aest[u] = np.random.normal(a[u, 0], a[u, 1])
                dest[u] = np.random.normal(d[u, 0], d[u, 1])
            alphaest = np.random.normal(alpha[0], alpha[1])
            ind_ht = np.where(teams == teams_ht[i])[0][0].astype(int)
            ind_at = np.where(teams == teams_at[i])[0][0].astype(int)
            lambda_1[i] = np.exp(muest + aest[ind_ht] + dest[ind_at] + (alphaest * form[i, 0]))
            lambda_2[i] = np.exp(aest[ind_at] + dest[ind_ht] + (alphaest * form[i, 1]))
    else:
        # form is N x 2
        N = np.shape(new_fixtures)[0]
        teams_ht = new_fixtures[:, 0]
        teams_at = new_fixtures[:, 1]
        lambda_1 = np.zeros(N)
        lambda_2 = np.zeros(N)
        for i in range(N):
            ind_ht = np.where(teams == teams_ht[i])[0][0].astype(int)
            ind_at = np.where(teams == teams_at[i])[0][0].astype(int)
            lambda_1[i] = np.exp(mu + a[ind_ht] + d[ind_at] + (alpha * form[i, 0]))
            lambda_2[i] = np.exp(a[ind_at] + d[ind_ht] + (alpha * form[i, 1]))
    return(lambda_1, lambda_2)

def import_fixture_lists(filename_1, filename_2, filename_3):
    fixture_list_1 = pd.read_csv(filename_1, header=None)
    fixture_list_2 = pd.read_csv(filename_2, header=None)
    fixture_list_3 = pd.read_csv(filename_3, header=None)
    return(fixture_list_1, fixture_list_2, fixture_list_3)

### Optimization of players

#### Probability functions of points scoring

In [3]:
def prob_clean_sheet_for_team(lambda_2):
    return(poisson.pmf(0, lambda_2))

def sample_clean_sheet_for_team(lambda_2):
    return(np.random.poisson(lambda_2) == 0)

def prob_mins_played(x, a, b, a_games):
    return((1 - poisson.cdf(x, a / b)) * np.round(a_games))  # a / b mean of gamma hyperparameter distribution

def sample_mins_played(a, b, a_games, b_games):
    return(np.random.choice([1, 0], 1, p=np.random.dirichlet([a_games, b_games]))[0] * np.min([np.random.poisson(np.random.gamma(a, 1 / b)), 90]))

def sample_goals_and_assists(a, b, c, n):
    d = np.random.dirichlet(np.array([a, b, c]))
    samples = np.random.choice(np.array([0, 1, 2]), n, p=d)
    # return num of goals / assists out of n trials
    return(sum(samples == 0), sum(samples == 1))

def prob_clean_sheet_points(lambda_2, a_mins, b_mins, a_games):
    return(prob_mins_played(60, a_mins, b_mins, a_games) * prob_clean_sheet_for_team(lambda_2))

def prob_mins_points(a_mins, b_mins, a_games):
    return(prob_mins_played(60, a_mins, b_mins, a_games) + prob_mins_played(1, a_mins, b_mins, a_games))

def sample_clean_sheet_points(lambda_2, a_mins, b_mins, a_games, b_games):
    return(int(sample_mins_played(a_mins, b_mins, a_games, b_games) > (60)) * sample_clean_sheet_for_team(lambda_2))

def sample_mins_points(a_mins, b_mins, a_games, b_games):
    return(int(sample_mins_played(a_mins, b_mins, a_games, b_games) > (60)) + int(sample_mins_played(a_mins, b_mins, a_games, b_games) > (1)))

In [4]:
def ComputeExpectedPoints(fixtures_list, form, teams, all_players_params, all_teams_params, uncertainty=False, zerooutbottom=0):
    
    # param data sets are pd Data Frames
    
    # Without bonus for now and goals conceded points
    
    # sum over n to nmax
    Nmax = 11
    Niter = 250
    if uncertainty:
        
        points = np.zeros((Niter, len(all_players_params.index)))
        
        # mean and std of team hyperparameters
        mu = (all_teams_params.as_matrix())[0, :]
        a = (all_teams_params.as_matrix())[1:(len(teams) + 1), :]
        d = (all_teams_params.as_matrix())[(len(teams) + 1):((2 * len(teams)) + 1), :]
        alpha = (all_teams_params.as_matrix())[((2 * len(teams)) + 1), :]
            
        for l in range(Niter):
            
            # sample lambdas for team performance - sample from team hyperparameters
            lambdas = predict_fixtures(fixtures_list, form, teams, mu, a, d, alpha, uncertainty=True)
            
            for i in range(len(all_players_params.index)):  # sample performance of teams from poisson model, and players also 
                h_games = np.where(fixtures_list[:, 0] == all_players_params.loc[all_players_params.index[i], 'team'])[0]
                a_games = np.where(fixtures_list[:, 1] == all_players_params.loc[all_players_params.index[i], 'team'])[0]
                if (len(h_games) > 0 or len(a_games) > 0):  # players in championship wont get any points this season!
                    if all_players_params.loc[all_players_params.index[i], 'position'] == "GKP":
                        scaling = np.array([6., 3., 4.])
                    if all_players_params.loc[all_players_params.index[i], 'position'] == "DEF":
                        scaling = np.array([6., 3., 4.])
                    if all_players_params.loc[all_players_params.index[i], 'position'] == "MID":
                        scaling = np.array([5., 3., 1.])
                    if all_players_params.loc[all_players_params.index[i], 'position'] == "FWD":
                        scaling = np.array([4., 3., 0.])
                    for k in range(len(h_games)):  # over all home games
                        n = np.random.poisson(lambdas[0][h_games[k]])  # sample total goal number
                        goa, assi = sample_goals_and_assists(all_players_params.loc[all_players_params.index[i], 'a_goals'],
                                                             all_players_params.loc[all_players_params.index[i], 'b_goals'],
                                                             all_players_params.loc[all_players_params.index[i], 'c_goals'],
                                                             n)
                        points[l, i] += ((goa * scaling[0]) + (assi * scaling[1]))  # goals and assists
                        points[l, i] += ((scaling[2] * sample_clean_sheet_points(lambdas[1][h_games[k]], all_players_params.loc[all_players_params.index[i], 'a_mins'], all_players_params.loc[all_players_params.index[i], 'b_mins'], all_players_params.loc[all_players_params.index[i], 'a_games'], all_players_params.loc[all_players_params.index[i], 'b_games'])) +
                                               (sample_mins_points(all_players_params.loc[all_players_params.index[i], 'a_mins'], all_players_params.loc[all_players_params.index[i], 'b_mins'], all_players_params.loc[all_players_params.index[i], 'a_games'], all_players_params.loc[all_players_params.index[i], 'b_games'])))
                    for k in range(len(a_games)):  # over all away games
                        n = np.random.poisson(lambdas[1][a_games[k]])  # sample total goal number
                        goa, assi = sample_goals_and_assists(all_players_params.loc[all_players_params.index[i], 'a_goals'],
                                                             all_players_params.loc[all_players_params.index[i], 'b_goals'],
                                                             all_players_params.loc[all_players_params.index[i], 'c_goals'],
                                                             n)
                        points[l, i] += ((goa * scaling[0]) + (assi * scaling[1]))  # goals and assists
                        points[l, i] += ((scaling[2] * sample_clean_sheet_points(lambdas[0][a_games[k]], all_players_params.loc[all_players_params.index[i], 'a_mins'], all_players_params.loc[all_players_params.index[i], 'b_mins'], all_players_params.loc[all_players_params.index[i], 'a_games'], all_players_params.loc[all_players_params.index[i], 'b_games'])) +
                                               (sample_mins_points(all_players_params.loc[all_players_params.index[i], 'a_mins'], all_players_params.loc[all_players_params.index[i], 'b_mins'], all_players_params.loc[all_players_params.index[i], 'a_games'], all_players_params.loc[all_players_params.index[i], 'b_games'])))
            
            print('---')
            print('Realisation ', l)
            print('Top Points Scorers: ', all_players_params.loc[all_players_params.index[np.argsort(points[l, :])[-5:].astype(int)], 'player'],
                  ' with ', np.sort(points[l, :])[-5:], ' points')

        if zerooutbottom > 0:
            for l in range(Niter):
                points[l, np.argsort(points[l, :])[:zerooutbottom]] = 0  # to account for bench players
        
        expected_points = np.mean(points, axis=0)
        sd_points = np.std(points, axis=0)
        
        return(expected_points, sd_points)
    
    else:
        
        expected_points = np.zeros(len(all_players_params.index))
        
        # get mean lambdas for team performance
        mu = (all_teams_params.as_matrix())[0, 0]
        a = (all_teams_params.as_matrix())[1:(len(teams) + 1), 0]
        d = (all_teams_params.as_matrix())[(len(teams) + 1):((2 * len(teams)) + 1), 0]
        alpha = (all_teams_params.as_matrix())[((2 * len(teams)) + 1), 0]
        lambdas = predict_fixtures(fixtures_list, form, teams, mu, a, d, alpha)
        
        for i in range(len(all_players_params.index)):  # use mean performance of teams, and use mean of players model
            h_games = np.where(fixtures_list[:, 0] == all_players_params.loc[all_players_params.index[i], 'team'])[0]
            a_games = np.where(fixtures_list[:, 1] == all_players_params.loc[all_players_params.index[i], 'team'])[0]
            if (len(h_games) > 0 or len(a_games) > 0):  # players in championship wont get any points this season!
                if all_players_params.loc[all_players_params.index[i], 'position'] == "GKP":
                    scaling = np.array([6., 3., 4.])
                if all_players_params.loc[all_players_params.index[i], 'position'] == "DEF":
                    scaling = np.array([6., 3., 4.])
                if all_players_params.loc[all_players_params.index[i], 'position'] == "MID":
                    scaling = np.array([5., 3., 1.])
                if all_players_params.loc[all_players_params.index[i], 'position'] == "FWD":
                    scaling = np.array([4., 3., 0.])
                mean_goals = all_players_params.loc[all_players_params.index[i], 'a_goals'] / (all_players_params.loc[all_players_params.index[i], 'a_goals'] + all_players_params.loc[all_players_params.index[i], 'b_goals'] + all_players_params.loc[all_players_params.index[i], 'c_goals'])
                mean_assists = all_players_params.loc[all_players_params.index[i], 'b_goals'] / (all_players_params.loc[all_players_params.index[i], 'a_goals'] + all_players_params.loc[all_players_params.index[i], 'b_goals'] + all_players_params.loc[all_players_params.index[i], 'c_goals'])
                for k in range(len(h_games)):  # over all home games
                    for j in range(1, Nmax):
                        expected_points[i] += ((poisson.pmf(j, lambdas[0][h_games[k]]) * j * scaling[0] * mean_goals) +
                                               (poisson.pmf(j, lambdas[0][h_games[k]]) * j * scaling[1] * mean_assists))
                    expected_points[i] += ((scaling[2] * prob_clean_sheet_points(lambdas[1][h_games[k]], all_players_params.loc[all_players_params.index[i], 'a_mins'], all_players_params.loc[all_players_params.index[i], 'b_mins'], all_players_params.loc[all_players_params.index[i], 'a_games'])) +
                                           (prob_mins_points(all_players_params.loc[all_players_params.index[i], 'a_mins'], all_players_params.loc[all_players_params.index[i], 'b_mins'], all_players_params.loc[all_players_params.index[i], 'a_games'])))
                for k in range(len(a_games)):  # over all away games
                    for j in range(1, Nmax):
                        expected_points[i] += ((poisson.pmf(j, lambdas[1][a_games[k]]) * j * scaling[0] * mean_goals) +
                                               (poisson.pmf(j, lambdas[1][a_games[k]]) * j * scaling[1] * mean_assists))
                    expected_points[i] += ((scaling[2] * prob_clean_sheet_points(lambdas[0][a_games[k]], all_players_params.loc[all_players_params.index[i], 'a_mins'], all_players_params.loc[all_players_params.index[i], 'b_mins'], all_players_params.loc[all_players_params.index[i], 'a_games'])) +
                                           (prob_mins_points(all_players_params.loc[all_players_params.index[i], 'a_mins'], all_players_params.loc[all_players_params.index[i], 'b_mins'], all_players_params.loc[all_players_params.index[i], 'a_games'])))

        return(expected_points)

### Historical fitting

In [5]:
form = np.ones((380, 2)) * 5

all_players_params = pd.read_csv("all_players_params.csv")
all_teams_params = pd.read_csv("all_teams_params.csv", header=None)
teams = pd.read_csv("all_teams.csv", header=None).values[:, 0]

In [6]:
# create fixture list this season to predict
fixture_list_this_season = []
for i, fix in enumerate(list(itertools.permutations(((pd.read_csv("teams_20192020.csv", header=None)).as_matrix())[:, 0], 2))):
    fixture_list_this_season.append(list(fix))
fixture_list_this_season = np.array(fixture_list_this_season)

  This is separate from the ipykernel package so we can avoid doing imports until


In [7]:
# small fixture list - use for optimization for any particular gameweek
#fixture_list_this_season = (pd.read_csv("prem_results_20182019.csv", header=None).as_matrix())[:10, :]
#print(fixture_list_this_season)

#type in current form here
#form=....

In [None]:
C, S = ComputeExpectedPoints(fixture_list_this_season, form, teams, all_players_params, all_teams_params, uncertainty=True)

  from ipykernel import kernelapp as app
  app.launch_new_instance()


---
Realisation  0
Top Points Scorers:  516    Christian Eriksen
276          Jamie Vardy
329      Raheem Sterling
128    César Azpilicueta
310        Mohamed Salah
Name: player, dtype: object  with  [166. 170. 170. 182. 216.]  points
---
Realisation  1
Top Points Scorers:  31     Pierre-Emerick Aubameyang
519                Heung-Min Son
134                  Eden Hazard
329              Raheem Sterling
310                Mohamed Salah
Name: player, dtype: object  with  [179. 184. 193. 209. 228.]  points
---
Realisation  2
Top Points Scorers:  310        Mohamed Salah
128    César Azpilicueta
306           Sadio Mané
134          Eden Hazard
329      Raheem Sterling
Name: player, dtype: object  with  [165. 173. 177. 203. 212.]  points
---
Realisation  3
Top Points Scorers:  134         Eden Hazard
306          Sadio Mané
329     Raheem Sterling
175    Luka Milivojevic
310       Mohamed Salah
Name: player, dtype: object  with  [169. 176. 176. 187. 211.]  points
---
Realisation  4
Top Po

---
Realisation  34
Top Points Scorers:  772    Alisson Ramses Becker
522               Harry Kane
339            Sergio Agüero
134              Eden Hazard
310            Mohamed Salah
Name: player, dtype: object  with  [176. 183. 190. 194. 215.]  points
---
Realisation  35
Top Points Scorers:  267      Harry Maguire
306         Sadio Mané
329    Raheem Sterling
134        Eden Hazard
310      Mohamed Salah
Name: player, dtype: object  with  [162. 177. 192. 206. 213.]  points
---
Realisation  36
Top Points Scorers:  339            Sergio Agüero
772    Alisson Ramses Becker
840             Raúl Jiménez
310            Mohamed Salah
329          Raheem Sterling
Name: player, dtype: object  with  [182. 186. 188. 245. 267.]  points
---
Realisation  37
Top Points Scorers:  337         Leroy Sané
315    Virgil van Dijk
329    Raheem Sterling
134        Eden Hazard
310      Mohamed Salah
Name: player, dtype: object  with  [168. 171. 185. 223. 253.]  points
---
Realisation  38
Top Points Score

In [51]:
# add expected points and std to data frame
all_players_params['Expected Points 2019/2020'] = C
all_players_params['Std Points 2019/2020'] = S
all_players_params['Lower 95% Confidence Points 2019/2020'] = np.fmax(0, C - (2 * S))
all_players_params['Upper 95% Confidence Points 2019/2020'] = C + (2 * S)

In [None]:
# save to csv
frame = all_players_params.loc[all_players_params.index[all_players_params['last_season'] == 3], :]
frameref = frame[['player', 'position', 'team', 'Expected Points 2019/2020',
                  'Std Points 2019/2020', 'Lower 95% Confidence Points 2019/2020',
                  'Upper 95% Confidence Points 2019/2020']]
frameref.to_csv("pre_draft_expected_points_20192020.csv")

In [52]:
# extract best current players
frame = all_players_params.loc[all_players_params.index[all_players_params['last_season'] == 3], :]
frame.sort_values(by=['Expected Points 2019/2020'], ascending=False)

Unnamed: 0,ID,a_games,a_goals,a_mins,b_games,b_goals,b_mins,c_goals,last_season,player,position,team,Expected Points 2019/2020,Std Points 2019/2020,Lower 95% Confidence Points 2019/2020,Upper 95% Confidence Points 2019/2020
310,310,31.6,21.6,2543.5,0.8,10.6,33.6,41.8,3,Mohamed Salah,MID,Liverpool,245.5,30.496721,184.506558,306.493442
329,329,28.2,14.9,2205.2,4.2,13.6,30.2,54.9,3,Raheem Sterling,MID,Man City,193.1,15.902516,161.294969,224.905031
134,134,30.2,12.6,2229.1,2.2,10.3,32.2,33.8,3,Eden Hazard,MID,Chelsea,177.3,23.392520,130.514960,224.085040
306,306,28.2,15.0,2239.7,4.2,4.9,30.2,54.1,3,Sadio Mané,MID,Liverpool,165.0,15.633298,133.733404,196.266596
128,128,31.4,2.1,2745.0,1.0,5.8,33.4,58.8,3,César Azpilicueta,DEF,Chelsea,158.7,11.841030,135.017939,182.382061
322,322,31.3,1.0,2713.5,1.1,1.5,33.3,125.9,3,Ederson Santana de Moraes,GKP,Man City,153.5,11.681181,130.137637,176.862363
522,522,26.6,18.2,2178.7,5.8,4.6,28.6,38.2,3,Harry Kane,FWD,Tottenham,153.3,20.842505,111.614991,194.985009
130,130,26.4,4.1,2281.0,6.0,5.6,28.4,57.0,3,Marcos Alonso,DEF,Chelsea,153.3,9.747307,133.805385,172.794615
516,516,29.6,8.3,2395.4,2.8,10.0,31.6,42.7,3,Christian Eriksen,MID,Tottenham,152.1,20.949702,110.200597,193.999403
828,828,20.0,3.0,1615.5,1.0,4.5,22.0,32.5,3,Matt Doherty,DEF,Wolves,150.9,12.389108,126.121784,175.678216


In [53]:
# extract best current goalkeepers
frame = all_players_params.loc[all_players_params.index[all_players_params['last_season'] == 3], :]
frame = frame.loc[frame.index[frame['position'] == 'GKP'], :]
frame.sort_values(by=['Expected Points 2019/2020'], ascending=False)

Unnamed: 0,ID,a_games,a_goals,a_mins,b_games,b_goals,b_mins,c_goals,last_season,player,position,team,Expected Points 2019/2020,Std Points 2019/2020,Lower 95% Confidence Points 2019/2020,Upper 95% Confidence Points 2019/2020
322,322,31.3,1.0,2713.5,1.1,1.5,33.3,125.9,3,Ederson Santana de Moraes,GKP,Man City,153.5,11.681181,130.137637,176.862363
772,772,20.5,1.0,1755.0,0.5,1.0,22.5,93.0,3,Alisson Ramses Becker,GKP,Liverpool,150.2,8.919641,132.360718,168.039282
775,775,20.0,1.0,1755.0,1.0,1.0,22.0,82.5,3,David de Gea,GKP,Man United,133.3,16.068914,101.162172,165.437828
710,710,19.5,1.0,1665.0,1.5,1.0,21.5,80.5,3,Kepa Arrizabalaga,GKP,Chelsea,132.5,15.441826,101.616347,163.383653
844,844,19.5,1.0,1710.0,1.5,1.0,21.5,73.0,3,Rui Pedro dos Santos Patrício,GKP,Wolves,121.2,12.600000,96.000000,146.400000
196,196,32.2,1.0,2781.0,0.2,1.0,34.2,89.1,3,Jordan Pickford,GKP,Everton,119.8,8.726970,102.346061,137.253939
506,506,28.3,1.0,2502.0,4.1,1.0,30.3,104.0,3,Hugo Lloris,GKP,Tottenham,115.0,15.172343,84.655313,145.344687
65,65,29.7,1.0,2601.0,2.7,1.0,31.7,77.4,3,Mathew Ryan,GKP,Brighton,109.5,14.773287,79.953427,139.046573
259,259,30.2,1.0,2644.5,2.2,1.0,32.2,89.3,3,Kasper Schmeichel,GKP,Leicester,109.5,10.660675,88.178649,130.821351
578,578,31.4,1.0,2754.0,1.0,1.0,33.4,85.4,3,Ben Foster,GKP,Watford,108.4,12.531560,83.336880,133.463120


In [54]:
# extract best current defenders
frame = all_players_params.loc[all_players_params.index[all_players_params['last_season'] == 3], :]
frame = frame.loc[frame.index[frame['position'] == 'DEF'], :]
frame.sort_values(by=['Expected Points 2019/2020'], ascending=False)

Unnamed: 0,ID,a_games,a_goals,a_mins,b_games,b_goals,b_mins,c_goals,last_season,player,position,team,Expected Points 2019/2020,Std Points 2019/2020,Lower 95% Confidence Points 2019/2020,Upper 95% Confidence Points 2019/2020
128,128,31.4,2.1,2745.0,1.0,5.8,33.4,58.8,3,César Azpilicueta,DEF,Chelsea,158.7,11.841030,135.017939,182.382061
130,130,26.4,4.1,2281.0,6.0,5.6,28.4,57.0,3,Marcos Alonso,DEF,Chelsea,153.3,9.747307,133.805385,172.794615
828,828,20.0,3.0,1615.5,1.0,4.5,22.0,32.5,3,Matt Doherty,DEF,Wolves,150.9,12.389108,126.121784,175.678216
315,315,27.8,3.0,2412.9,4.6,3.0,29.8,78.0,3,Virgil van Dijk,DEF,Liverpool,148.4,12.403225,123.593549,173.206451
826,826,19.5,3.0,1629.0,1.5,1.0,21.5,36.0,3,Willy Boly,DEF,Wolves,141.8,9.724197,122.351607,161.248393
341,341,27.9,1.5,2268.2,4.5,3.3,29.9,88.6,3,Kyle Walker,DEF,Man City,141.4,15.913516,109.572967,173.227033
721,721,19.0,3.0,1527.0,2.0,3.5,21.0,37.0,3,Lucas Digne,DEF,Everton,140.5,21.500000,97.500000,183.500000
763,763,18.5,2.0,1606.5,2.5,5.0,20.5,34.0,3,Ricardo Domingos Barbosa Pereira,DEF,Leicester,138.5,17.906703,102.686595,174.313405
316,316,26.4,1.3,2235.0,6.0,8.5,28.4,74.2,3,Andrew Robertson,DEF,Liverpool,134.6,11.740528,111.118944,158.081056
43,43,31.4,3.6,2756.1,1.0,2.4,33.4,52.0,3,Nathan Aké,DEF,Bournemouth,134.1,16.096273,101.907454,166.292546


In [55]:
# extract best current midfielders
frame = all_players_params.loc[all_players_params.index[all_players_params['last_season'] == 3], :]
frame = frame.loc[frame.index[frame['position'] == 'MID'], :]
frame.sort_values(by=['Expected Points 2019/2020'], ascending=False)

Unnamed: 0,ID,a_games,a_goals,a_mins,b_games,b_goals,b_mins,c_goals,last_season,player,position,team,Expected Points 2019/2020,Std Points 2019/2020,Lower 95% Confidence Points 2019/2020,Upper 95% Confidence Points 2019/2020
310,310,31.6,21.6,2543.5,0.8,10.6,33.6,41.8,3,Mohamed Salah,MID,Liverpool,245.5,30.496721,184.506558,306.493442
329,329,28.2,14.9,2205.2,4.2,13.6,30.2,54.9,3,Raheem Sterling,MID,Man City,193.1,15.902516,161.294969,224.905031
134,134,30.2,12.6,2229.1,2.2,10.3,32.2,33.8,3,Eden Hazard,MID,Chelsea,177.3,23.392520,130.514960,224.085040
306,306,28.2,15.0,2239.7,4.2,4.9,30.2,54.1,3,Sadio Mané,MID,Liverpool,165.0,15.633298,133.733404,196.266596
516,516,29.6,8.3,2395.4,2.8,10.0,31.6,42.7,3,Christian Eriksen,MID,Tottenham,152.1,20.949702,110.200597,193.999403
372,372,27.4,9.3,2192.0,5.0,9.9,29.4,42.2,3,Paul Pogba,MID,Man United,145.4,10.883014,123.633971,167.166029
220,220,28.1,8.7,2283.1,4.3,5.2,30.1,32.2,3,Gylfi Sigurdsson,MID,Everton,142.6,20.095771,102.408459,182.791541
175,175,30.8,10.0,2678.7,1.6,2.6,32.8,31.6,3,Luka Milivojevic,MID,Crystal Palace,141.6,18.238421,105.123158,178.076842
563,563,29.9,9.0,2221.1,2.5,4.9,31.9,32.2,3,Richarlison de Andrade,MID,Everton,139.6,13.558761,112.482478,166.717522
519,519,28.4,10.6,1749.1,4.0,7.4,30.4,43.0,3,Heung-Min Son,MID,Tottenham,132.4,15.818976,100.762048,164.037952


In [56]:
# extract best current forwards
frame = all_players_params.loc[all_players_params.index[all_players_params['last_season'] == 3], :]
frame = frame.loc[frame.index[frame['position'] == 'FWD'], :]
frame.sort_values(by=['Expected Points 2019/2020'], ascending=False)

Unnamed: 0,ID,a_games,a_goals,a_mins,b_games,b_goals,b_mins,c_goals,last_season,player,position,team,Expected Points 2019/2020,Std Points 2019/2020,Lower 95% Confidence Points 2019/2020,Upper 95% Confidence Points 2019/2020
522,522,26.6,18.2,2178.7,5.8,4.6,28.6,38.2,3,Harry Kane,FWD,Tottenham,153.3,20.842505,111.614991,194.985009
840,840,20.0,7.5,1601.0,1.0,6.0,22.0,16.5,3,Raúl Jiménez,FWD,Wolves,145.4,16.936351,111.527297,179.272703
276,276,29.9,16.0,2382.4,2.5,4.1,31.9,26.2,3,Jamie Vardy,FWD,Leicester,144.2,18.059900,108.080199,180.319801
339,339,25.0,17.8,1861.5,7.4,8.3,27.0,57.3,3,Sergio Agüero,FWD,Man City,138.8,13.548432,111.703137,165.896863
22,22,28.9,11.7,1952.6,3.5,8.5,30.9,40.2,3,Alexandre Lacazette,FWD,Arsenal,137.7,20.669059,96.361882,179.038118
31,31,22.9,15.0,1722.8,9.5,5.7,24.9,39.7,3,Pierre-Emerick Aubameyang,FWD,Arsenal,132.2,23.919866,84.360268,180.039732
311,311,29.6,11.5,2176.0,2.8,7.4,31.6,55.1,3,Roberto Firmino,FWD,Liverpool,129.3,15.601602,98.096795,160.503205
169,169,27.2,8.7,2328.2,5.2,8.6,29.2,26.9,3,Wilfried Zaha,FWD,Crystal Palace,128.3,11.419720,105.460560,151.139440
592,592,28.3,8.6,2175.0,4.1,5.9,30.3,22.9,3,Salomón Rondón,FWD,Newcastle,122.1,13.736448,94.627104,149.572896
396,396,31.1,9.4,2248.8,1.3,3.8,33.1,26.0,3,Ayoze Pérez,FWD,Newcastle,118.4,15.120847,88.158307,148.641693


### Team predictions

Gives predictions for a team's score on a gameweek

In [58]:
# this gameweek fixtures
#fixture_list_this_gw = (pd.read_csv("prem_results_20182019.csv", header=None).as_matrix())[:10, :]
#print(fixture_list_this_gw)

# this gameweek form for each fixture
#form =...

# players for a specific team
tm_players = ['Mohamed Salah', 'James Milner'] #....
new_players_frame = all_players_params[all_players_params['player'].isin(tm_players)]
niter = 1  # dummy for wrapper function
N = 100
C, S = ComputeExpectedPoints(fixture_list_this_gw, form, teams, new_players_frame, all_teams_params, uncertainty=True,
                             zerooutbottom=3)  # zero out bench players
expected_tm_score = np.sum(C)
sd_tm_score = np.sqrt(np.sum(S ** 2))
print('expected team score: ', expected_tm_score)
print('lower 95% confidence team score: ', max(0, expected_tm_score - (2 * sd_tm_score)))
print('upper 95% confidence team score: ', expected_tm_score + (2 * sd_tm_score))

---
Realisation  0
Top Points Scorers:  298     James Milner
310    Mohamed Salah
Name: player, dtype: object  with  [0. 2.]  points
---
Realisation  1
Top Points Scorers:  310    Mohamed Salah
298     James Milner
Name: player, dtype: object  with  [2. 5.]  points
---
Realisation  2
Top Points Scorers:  298     James Milner
310    Mohamed Salah
Name: player, dtype: object  with  [1. 2.]  points
---
Realisation  3
Top Points Scorers:  298     James Milner
310    Mohamed Salah
Name: player, dtype: object  with  [ 4. 10.]  points
---
Realisation  4
Top Points Scorers:  298     James Milner
310    Mohamed Salah
Name: player, dtype: object  with  [2. 3.]  points
---
Realisation  5
Top Points Scorers:  298     James Milner
310    Mohamed Salah
Name: player, dtype: object  with  [0. 3.]  points
---
Realisation  6
Top Points Scorers:  298     James Milner
310    Mohamed Salah
Name: player, dtype: object  with  [1. 2.]  points
---
Realisation  7
Top Points Scorers:  298     James Milner
310   

  from ipykernel import kernelapp as app
  app.launch_new_instance()


In [None]:
# calculates match probability
def match_prob(exp_p1, sd_p1, exp_p2, sd_p2):
    exp_diff = exp_p1 - exp_p2
    sd_diff = np.sqrt((sd_p1 ** 2) + (sd_p2 ** 2))
    return(1 - norm.cdf(0, exp_diff, sd_diff))

### Current (Updates)

when updating with new data, use mins played after last three games (not mins per season!). This is just a proxy for whether they are getting in team or not

In [48]:
def read_current_data(file):
    ycurrent = pd.read_csv(file)
    return(ycurrent)

def convert_team_marker(teams):
    new_teams = []
    for tm in teams:
        if tm == 'ARS':
            new_teams.append('Arsenal')
        if tm == 'CHE':
            new_teams.append('Chelsea')
        if tm == 'BOU':
            new_teams.append('Bournemouth')
        if tm == 'WHU':
            new_teams.append('West Ham')
        if tm == 'MCI':
            new_teams.append('Man City')
        if tm == 'MUN':
            new_teams.append('Man United')
        if tm == 'LEI':
            new_teams.append('Leicester')
        if tm == 'TOT':
            new_teams.append('Tottenham')
        if tm == 'LIV':
            new_teams.append('Liverpool')
        if tm == 'NEW':
            new_teams.append('Newcastle')
        if tm == 'HUD':
            new_teams.append('Huddersfield')
        if tm == 'FUL':
            new_teams.append('Fulham')
        if tm == 'SOU':
            new_teams.append('Southampton')
        if tm == 'CRY':
            new_teams.append('Crystal Palace')
        if tm == 'CAR':
            new_teams.append('Cardiff')
        if tm == 'EVE':
            new_teams.append('Everton')
        if tm == 'BHA':
            new_teams.append('Brighton')
        if tm == 'BUR':
            new_teams.append('Burnley')
        if tm == 'WOL':
            new_teams.append('Wolves')
        if tm == 'WAT':
            new_teams.append('Watford')
        if tm == 'AVL':
            new_teams.append('Aston Villa')
        if tm == 'MID':
            new_teams.append('Middlesbrough')
        if tm == 'HUL':
            new_teams.append('Hull')
        if tm == 'SWA':
            new_teams.append('Swansea')
        if tm == 'WBA':
            new_teams.append('West Brom')
        if tm == 'STK':
            new_teams.append('Stoke')
        if tm == 'SUN':
            new_teams.append('Sunderland')
        if tm == 'NOR':
            new_teams.append('Norwich')
        if tm == 'SHU':
            new_teams.append('Sheffield United')
    return(new_teams)

In [49]:
# use last year players for initial draft
ycurrent = read_current_data("players_20192020.csv")
ycurrent['Team'] = convert_team_marker(ycurrent['Team'])

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xfc in position 2: invalid start byte

In [None]:
# append their IDs from previous seasons
ycurrent['ID'] = np.zeros(len(ycurrent.index))
cnt = np.max(all_players_params['ID']) + 1
for i in range(len(ycurrent.index)):
    iid = np.where((ycurrent.loc[ycurrent.index[i], 'Player'] == all_players_params['player']) & (ycurrent.loc[ycurrent.index[i], 'Team'] == all_players_params['team']) & (ycurrent.loc[ycurrent.index[i], 'Position'] == all_players_params['position']))[0]
    if  len(iid) > 0:
        ycurrent.loc[ycurrent.index[i], 'ID'] = all_players_params.loc[all_players_params.index[iid[0]], 'ID']
        print(iid[0])
        print(all_players_params.loc[all_players_params.index[iid[0]], 'player'])
    else:
        ycurrent.loc[ycurrent.index[i], 'ID'] = cnt  # if not, add id for next season
        cnt += 1