In [None]:
import os
import csv
import matplotlib.pyplot as plot
import numpy as np
import pandas as pd
from collections import Counter
from scipy.stats import poisson
from scipy.stats import beta
from scipy.stats import dirichlet
%run -i utils.py
%run -i priors.py

### Historical training

In [None]:
y1 = pd.read_csv("../data/players_raw_20162017.csv")
y1['team_name'] = team_code(y1['team'], season="2016/2017")

y2 = pd.read_csv("../data/players_raw_20172018.csv")
y2['team_name'] = team_code(y2['team'], season="2017/2018")

y3 = pd.read_csv("../data/players_raw_20182019.csv")
y3['team_name'] = team_code(y3['team'], season="2018/2019")

In [None]:
# make games played
y1['Games'] = np.ceil(y1['total_points'] / np.maximum(1, y1['points_per_game'].values))
y2['Games'] = np.ceil(y2['total_points'] / np.maximum(1, y2['points_per_game'].values))
y3['Games'] = np.ceil(y3['total_points'] / np.maximum(1, y3['points_per_game'].values))

In [None]:
# minutes per game per season
y1['MPerc'] = y1['minutes'] / (90. * 38)
y2['MPerc'] = y2['minutes'] / (90. * 38)
y3['MPerc'] = y3['minutes'] / (90. * 38)

In [None]:
# full name
y1['full_name'] = (y1['first_name'] + ' ' + y1['second_name'])
y2['full_name'] = (y2['first_name'] + ' ' + y2['second_name'])
y3['full_name'] = (y3['first_name'] + ' ' + y3['second_name'])

In [None]:
# Season
y1['Season'] = 1
y2['Season'] = 2
y3['Season'] = 3

In [None]:
# teams goals
y1['TGoals'] = np.zeros(len(y1.index))
for i in range(len(y1.index)):
    y1.loc[y1.index[i], 'TGoals'] = np.sum(y1.loc[y1.index[y1['team_name'] == y1.loc[y1.index[i], 'team_name']], 'goals_scored'])
y2['TGoals'] = np.zeros(len(y2.index))
for i in range(len(y2.index)):
    y2.loc[y2.index[i], 'TGoals'] = np.sum(y2.loc[y2.index[y2['team_name'] == y2.loc[y2.index[i], 'team_name']], 'goals_scored'])
y3['TGoals'] = np.zeros(len(y3.index))
for i in range(len(y3.index)):
    y3.loc[y3.index[i], 'TGoals'] = np.sum(y3.loc[y3.index[y3['team_name'] == y3.loc[y3.index[i], 'team_name']], 'goals_scored'])

In [None]:
y1p = []
y2p = []
y3p = []
for i in range(len(y1.index)):
    y1p.append(np.array(['GKP', 'DEF', 'MID', 'FWD'])[int(y1.loc[y1.index[i], "element_type"] - 1)])
for i in range(len(y2.index)):
    y2p.append(np.array(['GKP', 'DEF', 'MID', 'FWD'])[int(y2.loc[y2.index[i], "element_type"] - 1)])
for i in range(len(y3.index)):
    y3p.append(np.array(['GKP', 'DEF', 'MID', 'FWD'])[int(y3.loc[y3.index[i], "element_type"] - 1)])
y1['position'] = y1p
y2['position'] = y2p
y3['position'] = y3p

In [None]:
ID, players, teams, positions, season = player_id(np.concatenate((y2.as_matrix(), y3.as_matrix())))
id_sel = 1100
print(np.concatenate((y2.as_matrix(), y3.as_matrix()))[id_sel,:])
print(ID[id_sel])
print(np.concatenate((y2.as_matrix(), y3.as_matrix()))[np.where(ID == ID[id_sel])[0].astype(int), :])
print(np.where(ID == ID[id_sel])[0].astype(int))

In [None]:
#y1['ID'] = ID[0:len(y1['full_name'])]
y2['ID'] = ID[0:len(y2['full_name'])]
y3['ID'] = ID[len(y2['full_name']):(len(y2['full_name']) + len(y3['full_name']))]

In [None]:
id_y2 = np.transpose(np.array([ID[0:len(y2['full_name'])], players[0:len(y2['full_name'])], teams[0:len(y2['full_name'])], positions[0:len(y2['full_name'])], season[0:len(y2['full_name'])]]))
id_y3 = np.transpose(np.array([ID[len(y2['full_name']):(len(y2['full_name']) + len(y3['full_name']))], players[len(y2['full_name']):(len(y2['full_name']) + len(y3['full_name']))], teams[len(y2['full_name']):(len(y2['full_name']) + len(y3['full_name']))], positions[len(y2['full_name']):(len(y2['full_name']) + len(y3['full_name']))], season[len(y2['full_name']):(len(y2['full_name']) + len(y3['full_name']))]]))


### Bayesian update of simplexs

#### Updating functions for player simplexs

In [None]:
# find posteriors after historical data
IDunique = np.unique(ID)
playersunique = []
teamsunique = []
positionsunique = []
seasonunique = []
for i in range(len(IDunique)):
    ind = np.where(IDunique[i] == ID)[0][-1]  # most recent team and position
    inds = np.where(IDunique[i] == ID)[0]
    playersunique.append(players[ind])
    teamsunique.append(teams[ind])
    positionsunique.append(positions[ind])
    seasonunique.append(np.max(np.array(season)[inds.astype(int)]))  # most recent season id player was playing

players_parameters_dict = {'ID': IDunique, 'player': playersunique, 'team': teamsunique, 'position': positionsunique, 'last_season': seasonunique,
                          'a_goals': np.zeros(len(IDunique)), 'b_goals': np.zeros(len(IDunique)), 'c_goals': np.zeros(len(IDunique)),
                          'a_mins': np.zeros(len(IDunique)), 'b_mins': np.zeros(len(IDunique)),
                          'a_games': np.zeros(len(IDunique)), 'b_games': np.zeros(len(IDunique))}
players_parameters = pd.DataFrame(data = players_parameters_dict)

In [None]:
players_parameters.loc[players_parameters.index[players_parameters['team'] == 'Man City'], :]

In [None]:
# fill in priors
for i in range(3):
    if (i == 0):
        for j in range(np.shape(id_y2)[0]):
            idind = players_parameters.index[players_parameters['ID'] == y2.loc[y2.index[j], 'ID']]
            if y2.loc[y2.index[j], 'position'] == 'GKP':
                players_parameters.loc[idind, 'a_goals'] = ga_prior_a_g
                players_parameters.loc[idind, 'b_goals'] = ga_prior_b_g
                players_parameters.loc[idind, 'c_goals'] = ga_prior_c_g
                players_parameters.loc[idind, 'a_mins'] = m_prior_a_g
                players_parameters.loc[idind, 'b_mins'] = m_prior_b_g
                players_parameters.loc[idind, 'a_games'] = p_prior_a_g
                players_parameters.loc[idind, 'b_games'] = p_prior_b_g
            if y2.loc[y2.index[j], 'position'] == 'MID':
                players_parameters.loc[idind, 'a_goals'] = ga_prior_a_m
                players_parameters.loc[idind, 'b_goals'] = ga_prior_b_m
                players_parameters.loc[idind, 'c_goals'] = ga_prior_c_m
                players_parameters.loc[idind, 'a_mins'] = m_prior_a_m
                players_parameters.loc[idind, 'b_mins'] = m_prior_b_m
                players_parameters.loc[idind, 'a_games'] = p_prior_a_m
                players_parameters.loc[idind, 'b_games'] = p_prior_b_m
            if y2.loc[y2.index[j], 'position'] == 'DEF':
                players_parameters.loc[idind, 'a_goals'] = ga_prior_a_d
                players_parameters.loc[idind, 'b_goals'] = ga_prior_b_d
                players_parameters.loc[idind, 'c_goals'] = ga_prior_c_d
                players_parameters.loc[idind, 'a_mins'] = m_prior_a_d
                players_parameters.loc[idind, 'b_mins'] = m_prior_b_d
                players_parameters.loc[idind, 'a_games'] = p_prior_a_d
                players_parameters.loc[idind, 'b_games'] = p_prior_b_d
            if y2.loc[y2.index[j], 'position'] == 'FWD':
                players_parameters.loc[idind, 'a_goals'] = ga_prior_a_f
                players_parameters.loc[idind, 'b_goals'] = ga_prior_b_f
                players_parameters.loc[idind, 'c_goals'] = ga_prior_c_f
                players_parameters.loc[idind, 'a_mins'] = m_prior_a_f
                players_parameters.loc[idind, 'b_mins'] = m_prior_b_f
                players_parameters.loc[idind, 'a_games'] = p_prior_a_f
                players_parameters.loc[idind, 'b_games'] = p_prior_b_f
    if (i == 1):
        for j in range(np.shape(id_y3)[0]):
            idind = players_parameters.index[players_parameters['ID'] == y3.loc[y3.index[j], 'ID']]
            if y3.loc[y3.index[j], 'position'] == 'GKP':
                players_parameters.loc[idind, 'a_goals'] = ga_prior_a_g
                players_parameters.loc[idind, 'b_goals'] = ga_prior_b_g
                players_parameters.loc[idind, 'c_goals'] = ga_prior_c_g
                players_parameters.loc[idind, 'a_mins'] = m_prior_a_g
                players_parameters.loc[idind, 'b_mins'] = m_prior_b_g
                players_parameters.loc[idind, 'a_games'] = p_prior_a_g
                players_parameters.loc[idind, 'b_games'] = p_prior_b_g
            if y3.loc[y3.index[j], 'position'] == 'MID':
                players_parameters.loc[idind, 'a_goals'] = ga_prior_a_m
                players_parameters.loc[idind, 'b_goals'] = ga_prior_b_m
                players_parameters.loc[idind, 'c_goals'] = ga_prior_c_m
                players_parameters.loc[idind, 'a_mins'] = m_prior_a_m
                players_parameters.loc[idind, 'b_mins'] = m_prior_b_m
                players_parameters.loc[idind, 'a_games'] = p_prior_a_m
                players_parameters.loc[idind, 'b_games'] = p_prior_b_m
            if y3.loc[y3.index[j], 'position'] == 'DEF':
                players_parameters.loc[idind, 'a_goals'] = ga_prior_a_d
                players_parameters.loc[idind, 'b_goals'] = ga_prior_b_d
                players_parameters.loc[idind, 'c_goals'] = ga_prior_c_d
                players_parameters.loc[idind, 'a_mins'] = m_prior_a_d
                players_parameters.loc[idind, 'b_mins'] = m_prior_b_d
                players_parameters.loc[idind, 'a_games'] = p_prior_a_d
                players_parameters.loc[idind, 'b_games'] = p_prior_b_d
            if y3.loc[y3.index[j], 'position'] == 'FWD':
                players_parameters.loc[idind, 'a_goals'] = ga_prior_a_f
                players_parameters.loc[idind, 'b_goals'] = ga_prior_b_f
                players_parameters.loc[idind, 'c_goals'] = ga_prior_c_f
                players_parameters.loc[idind, 'a_mins'] = m_prior_a_f
                players_parameters.loc[idind, 'b_mins'] = m_prior_b_f
                players_parameters.loc[idind, 'a_games'] = p_prior_a_f
                players_parameters.loc[idind, 'b_games'] = p_prior_b_f

In [None]:
players_parameters.index

In [None]:
# update with historical data - posterior
for i in range(len(players_parameters.index)):
    
    # scale season importance!
    if len(y2.index[players_parameters.loc[players_parameters.index[i], 'ID'] == y2['ID'].values]) > 0:
        idind = y2.index[players_parameters.loc[players_parameters.index[i], 'ID'] == y2['ID'].values]
        gms = (y2.loc[y2.index[idind], 'Games'].values[0]) * 0.3
        goa = (y2.loc[y2.index[idind], 'goals_scored'].values[0]) * 0.3
        mns = y2.loc[y2.index[idind], 'minutes'].values[0] * 0.3
        assi = (y2.loc[y2.index[idind], 'assists'].values[0]) * 0.3
        tgoa = (np.ceil(y2.loc[y2.index[idind], 'TGoals'].values[0] / (38 / y2.loc[y2.index[idind], 'Games'].values[0]))) * 0.3

        post_a_goals, post_b_goals, post_c_goals = update_goals_and_assists_simplex(players_parameters.loc[players_parameters.index[i], 'a_goals'],
                                                                                    players_parameters.loc[players_parameters.index[i], 'b_goals'],
                                                                                    players_parameters.loc[players_parameters.index[i], 'c_goals'],
                                                                                    goa, assi, tgoa)
        post_a_mins, post_b_mins = update_mins_simplex(players_parameters.loc[players_parameters.index[i], 'a_mins'],
                                                          players_parameters.loc[players_parameters.index[i], 'b_mins'],
                                                          mns, gms)
        post_a_played, post_b_played = update_games_played_simplex(players_parameters.loc[players_parameters.index[i], 'a_games'],
                                                          players_parameters.loc[players_parameters.index[i], 'b_games'],
                                                          gms, (38 * 0.3) - gms)

        players_parameters.loc[players_parameters.index[i], 'a_goals'] = post_a_goals
        players_parameters.loc[players_parameters.index[i], 'b_goals'] = post_b_goals
        players_parameters.loc[players_parameters.index[i], 'c_goals'] = post_c_goals
        players_parameters.loc[players_parameters.index[i], 'a_mins'] = post_a_mins
        players_parameters.loc[players_parameters.index[i], 'b_mins'] = post_b_mins
        players_parameters.loc[players_parameters.index[i], 'a_games'] = post_a_played
        players_parameters.loc[players_parameters.index[i], 'b_games'] = post_b_played

    if len(y3.index[players_parameters.loc[players_parameters.index[i], 'ID'] == y3['ID'].values]) > 0:
        idind = y3.index[players_parameters.loc[players_parameters.index[i], 'ID'] == y3['ID'].values]
        gms = (y3.loc[y3.index[idind], 'Games'].values[0]) * 0.5
        goa = (y3.loc[y3.index[idind], 'goals_scored'].values[0]) * 0.5
        mns = y3.loc[y3.index[idind], 'minutes'].values[0] * 0.5
        assi = (y3.loc[y3.index[idind], 'assists'].values[0]) * 0.5
        tgoa =  (np.ceil(y3.loc[y3.index[idind], 'TGoals'].values[0] / (38 / y3.loc[y3.index[idind], 'Games'].values[0]))) * 0.5

        post_a_goals, post_b_goals, post_c_goals = update_goals_and_assists_simplex(players_parameters.loc[players_parameters.index[i], 'a_goals'],
                                                                                    players_parameters.loc[players_parameters.index[i], 'b_goals'],
                                                                                    players_parameters.loc[players_parameters.index[i], 'c_goals'],
                                                                                    goa, assi, tgoa)
        post_a_mins, post_b_mins = update_mins_simplex(players_parameters.loc[players_parameters.index[i], 'a_mins'],
                                                          players_parameters.loc[players_parameters.index[i], 'b_mins'],
                                                          mns, gms)
        post_a_played, post_b_played = update_games_played_simplex(players_parameters.loc[players_parameters.index[i], 'a_games'],
                                                          players_parameters.loc[players_parameters.index[i], 'b_games'],
                                                          gms, (38 * 0.5) - gms)

        players_parameters.loc[players_parameters.index[i], 'a_goals'] = post_a_goals
        players_parameters.loc[players_parameters.index[i], 'b_goals'] = post_b_goals
        players_parameters.loc[players_parameters.index[i], 'c_goals'] = post_c_goals
        players_parameters.loc[players_parameters.index[i], 'a_mins'] = post_a_mins
        players_parameters.loc[players_parameters.index[i], 'b_mins'] = post_b_mins
        players_parameters.loc[players_parameters.index[i], 'a_games'] = post_a_played
        players_parameters.loc[players_parameters.index[i], 'b_games'] = post_b_played

In [None]:
players_parameters

In [None]:
# example of new min played model (taking into account sometimes not starting a game, or prior knowledge they will!)
# poisson and gamma to decide mins per games PLAYED
# and bernoilli and dirichlet to decide games PLAYED
np.random.choice([1, 0], 1, p=(np.random.dirichlet([28.9, 3.5])))[0] * np.random.poisson(np.random.gamma(2300, 1 / 30))

Don't need mins-played parameter included in points from scoring for each team goal, since this is taken into account by dividing by ALL of teams goals in season, even when player not on pitch.

In [None]:
# save parameters
players_parameters.to_csv("../parameters/all_players_params.csv", index=False)