In [None]:
import os
import csv
import matplotlib.pyplot as plot
import numpy as np
import pandas as pd
from collections import Counter
from scipy.stats import poisson
from scipy.stats import beta
from scipy.stats import dirichlet
%run -i utils.py
%run -i priors.py

### Historical training

In [None]:
y2 = pd.read_csv("../data/players_raw_20172018.csv")
y2['team_name'] = team_code(y2['team'], season="2017/2018")

y3 = pd.read_csv("../data/draft_data/draft_player_raw_20182019.csv")
y3['team_name'] = team_code(y3['team_id'], season="2019/2020")  # this is set of only this seasons players and have this years codes

y4 = pd.read_csv("../data/draft_data/draft_player_raw.csv")
y4['team_name'] = team_code(y4['team_id'], season="2019/2020")  # this is set of only this seasons players and have this years codes


In [None]:
# make games played
y2['Games'] = np.ceil(y2['total_points'] / np.maximum(1, y2['points_per_game'].values))
games = np.zeros(len(y3.index))
for i in range(len(y3.index)):
    dat = pd.read_csv("../data/draft_data/players_20182019/" + y3.loc[y3.index[i], '_id'] + ".csv")
    games[i] = sum(dat['mp'] > 0)
y3['Games'] = games

In [None]:
# minutes per game per season
y2['MPerc'] = y2['minutes'] / (90. * 38)
y3['MPerc'] = y3['total_mins'] / (90. * 38)

In [None]:
# full name
y2['full_name'] = (y2['first_name'] + ' ' + y2['second_name'])
y3['full_name'] = (y3['first_name'] + ' ' + y3['second_name'])
y4['full_name'] = (y4['first_name'] + ' ' + y4['second_name'])

In [None]:
# Season - find out if current list of players are new to this season
y2['Season'] = 2
y4['Season'] = 4
y3['Season'] = np.zeros(len(y3.index))
for i in range(len(y3.index)):
    dat = pd.read_csv("../data/draft_data/players_20182019/" + y3.loc[y3.index[i], '_id'] + ".csv")
    y3.loc[y3.index[i], 'Season'] = int(((len(dat.index) > 0) * 3) + ((len(dat.index) == 0) * 4))

In [None]:
# Find out how many available games they were in (i.e. if they were transferred in half way through season)
# assume y2 all available for 38 games
y2['GamesAvailable'] = 38
y3['GamesAvailable'] = np.zeros(len(y3.index))
for i in range(len(y3.index)):
    dat = pd.read_csv("../data/draft_data/players_20182019/" + y3.loc[y3.index[i], '_id'] + ".csv")
    y3.loc[y3.index[i], 'GamesAvailable'] = len(dat.index)

In [None]:
# teams goals when on pitch
y2['TGoals'] = np.zeros(len(y2.index))  # approximate for this season
for i in range(len(y2.index)):
    y2.loc[y2.index[i], 'TGoals'] = np.sum(y2.loc[y2.index[y2['team_name'] == y2.loc[y2.index[i], 'team_name']], 'goals_scored'])
y2['TGoals'] = y2['TGoals'] / (38 / y2['Games'])
y3['TGoals'] = np.zeros(len(y3.index))
for i in range(len(y3.index)):
    dat = pd.read_csv("../data/draft_data/players_20182019/" + y3.loc[y3.index[i], '_id'] + ".csv")
    y3.loc[y3.index[i], 'TGoals'] = np.sum(np.ceil((dat['mp'] > 0) * (dat['mp'] / 90) * dat['goals_for']))

In [None]:
y2p = []
y3p = []
y4p = []
for i in range(len(y2.index)):
    y2p.append(np.array(['GKP', 'DEF', 'MID', 'FWD'])[int(y2.loc[y2.index[i], "element_type"] - 1)])
for i in range(len(y3.index)):
    y3p.append(np.array(['GKP', 'DEF', 'MID', 'FWD'])[int(y3.loc[y3.index[i], "element_type_id"] - 1)])
for i in range(len(y4.index)):
    y4p.append(np.array(['GKP', 'DEF', 'MID', 'FWD'])[int(y4.loc[y4.index[i], "element_type_id"] - 1)])
y2['position'] = y2p
y3['position'] = y3p
y4['position'] = y4p

In [None]:
# only focus on players in league in current season (y3)
ID = y4['_id']
players = y4['full_name']
teams = y4['team_name']
positions = y4['position']
season = y4['Season']

In [None]:
# match them up to players from last year
ids = []
ids3 = []
for i in range(len(y2.index)):
    ind = np.where(y2.loc[y2.index[i], 'full_name'] == players)[0]
    if (len(ind) > 0):
        ids.append(ID[ind[0]])
    else:
        ids.append(np.nan)
for i in range(len(y3.index)):
    ind = np.where(y3.loc[y3.index[i], 'full_name'] == players)[0]
    if (len(ind) > 0):
        ids3.append(ID[ind[0]])
    else:
        ids3.append(np.nan)
y2ids = np.array(ids)
y2['ID'] = y2ids
y3ids = np.array(ids3)
y3['ID'] = y3ids

### Bayesian update of simplexs

#### Updating functions for player simplexs

In [None]:
# creating parameter data frame for all current players
players_parameters_dict = {'ID': ID, 'player': players, 'team': teams, 'position': positions, 'last_season': season,
                          'a_goals': np.zeros(len(ID)), 'b_goals': np.zeros(len(ID)), 'c_goals': np.zeros(len(ID)),
                          'a_mins': np.zeros(len(ID)), 'b_mins': np.zeros(len(ID)),
                          'a_games': np.zeros(len(ID)), 'b_games': np.zeros(len(ID))}
players_parameters = pd.DataFrame(data = players_parameters_dict)

In [None]:
# sanity check
players_parameters.loc[players_parameters.index[players_parameters['team'] == 'Man City'], :]

In [None]:
# fill in priors
for j in range(len(players_parameters.index)):
    if players_parameters.loc[players_parameters.index[j], 'position'] == 'GKP':
        players_parameters.loc[players_parameters.index[j], 'a_goals'] = ga_prior_a_g
        players_parameters.loc[players_parameters.index[j], 'b_goals'] = ga_prior_b_g
        players_parameters.loc[players_parameters.index[j], 'c_goals'] = ga_prior_c_g
        players_parameters.loc[players_parameters.index[j], 'a_mins'] = m_prior_a_g
        players_parameters.loc[players_parameters.index[j], 'b_mins'] = m_prior_b_g
        players_parameters.loc[players_parameters.index[j], 'a_games'] = p_prior_a_g
        players_parameters.loc[players_parameters.index[j], 'b_games'] = p_prior_b_g
    if players_parameters.loc[players_parameters.index[j], 'position'] == 'MID':
        players_parameters.loc[players_parameters.index[j], 'a_goals'] = ga_prior_a_m
        players_parameters.loc[players_parameters.index[j], 'b_goals'] = ga_prior_b_m
        players_parameters.loc[players_parameters.index[j], 'c_goals'] = ga_prior_c_m
        players_parameters.loc[players_parameters.index[j], 'a_mins'] = m_prior_a_m
        players_parameters.loc[players_parameters.index[j], 'b_mins'] = m_prior_b_m
        players_parameters.loc[players_parameters.index[j], 'a_games'] = p_prior_a_m
        players_parameters.loc[players_parameters.index[j], 'b_games'] = p_prior_b_m
    if players_parameters.loc[players_parameters.index[j], 'position'] == 'DEF':
        players_parameters.loc[players_parameters.index[j], 'a_goals'] = ga_prior_a_d
        players_parameters.loc[players_parameters.index[j], 'b_goals'] = ga_prior_b_d
        players_parameters.loc[players_parameters.index[j], 'c_goals'] = ga_prior_c_d
        players_parameters.loc[players_parameters.index[j], 'a_mins'] = m_prior_a_d
        players_parameters.loc[players_parameters.index[j], 'b_mins'] = m_prior_b_d
        players_parameters.loc[players_parameters.index[j], 'a_games'] = p_prior_a_d
        players_parameters.loc[players_parameters.index[j], 'b_games'] = p_prior_b_d
    if players_parameters.loc[players_parameters.index[j], 'position'] == 'FWD':
        players_parameters.loc[players_parameters.index[j], 'a_goals'] = ga_prior_a_f
        players_parameters.loc[players_parameters.index[j], 'b_goals'] = ga_prior_b_f
        players_parameters.loc[players_parameters.index[j], 'c_goals'] = ga_prior_c_f
        players_parameters.loc[players_parameters.index[j], 'a_mins'] = m_prior_a_f
        players_parameters.loc[players_parameters.index[j], 'b_mins'] = m_prior_b_f
        players_parameters.loc[players_parameters.index[j], 'a_games'] = p_prior_a_f
        players_parameters.loc[players_parameters.index[j], 'b_games'] = p_prior_b_f

In [None]:
# update with historical data to find posteriors (priors for first game of season)
# scale season importance - first season weights (1 / 3), second season weights (2 / 3)
scale_y2 = (1. / 3.)
scale_y3 = (2. / 3.)
for i in range(len(players_parameters.index)):
    
    if len(y2.index[players_parameters.loc[players_parameters.index[i], 'ID'] == y2['ID'].values]) > 0:
        idind = y2.index[players_parameters.loc[players_parameters.index[i], 'ID'] == y2['ID'].values][0]
        gms = y2.loc[y2.index[idind], 'Games'] * scale_y2
        goa = y2.loc[y2.index[idind], 'goals_scored'] * scale_y2
        mns = y2.loc[y2.index[idind], 'minutes'] * scale_y2
        assi = y2.loc[y2.index[idind], 'assists'] * scale_y2
        tgoa = y2.loc[y2.index[idind], 'TGoals'] * scale_y2
        gms_available = y2.loc[y2.index[idind], 'GamesAvailable'] * scale_y2

        post_a_goals, post_b_goals, post_c_goals = update_goals_and_assists_simplex(players_parameters.loc[players_parameters.index[i], 'a_goals'],
                                                                                    players_parameters.loc[players_parameters.index[i], 'b_goals'],
                                                                                    players_parameters.loc[players_parameters.index[i], 'c_goals'],
                                                                                    goa, assi, tgoa)
        post_a_mins, post_b_mins = update_mins_simplex(players_parameters.loc[players_parameters.index[i], 'a_mins'],
                                                          players_parameters.loc[players_parameters.index[i], 'b_mins'],
                                                          mns, gms)
        post_a_played, post_b_played = update_games_played_simplex(players_parameters.loc[players_parameters.index[i], 'a_games'],
                                                          players_parameters.loc[players_parameters.index[i], 'b_games'],
                                                          gms, gms_available - gms)

        players_parameters.loc[players_parameters.index[i], 'a_goals'] = post_a_goals
        players_parameters.loc[players_parameters.index[i], 'b_goals'] = post_b_goals
        players_parameters.loc[players_parameters.index[i], 'c_goals'] = post_c_goals
        players_parameters.loc[players_parameters.index[i], 'a_mins'] = post_a_mins
        players_parameters.loc[players_parameters.index[i], 'b_mins'] = post_b_mins
        players_parameters.loc[players_parameters.index[i], 'a_games'] = post_a_played
        players_parameters.loc[players_parameters.index[i], 'b_games'] = post_b_played

    if len(y3.index[players_parameters.loc[players_parameters.index[i], 'ID'] == y3['ID'].values]) > 0:
        idind = y3.index[players_parameters.loc[players_parameters.index[i], 'ID'] == y3['ID'].values][0]
        gms = y3.loc[y3.index[idind], 'Games'] *  scale_y3
        goa = y3.loc[y3.index[idind], 'total_goals'] * scale_y3
        mns = y3.loc[y3.index[idind], 'total_mins'] * scale_y3
        assi = y3.loc[y3.index[idind], 'total_assists'] * scale_y3
        tgoa =  y3.loc[y3.index[idind], 'TGoals'] * scale_y3
        gms_available = y3.loc[y3.index[idind], 'GamesAvailable'] * scale_y3

        post_a_goals, post_b_goals, post_c_goals = update_goals_and_assists_simplex(players_parameters.loc[players_parameters.index[i], 'a_goals'],
                                                                                    players_parameters.loc[players_parameters.index[i], 'b_goals'],
                                                                                    players_parameters.loc[players_parameters.index[i], 'c_goals'],
                                                                                    goa, assi, tgoa)
        post_a_mins, post_b_mins = update_mins_simplex(players_parameters.loc[players_parameters.index[i], 'a_mins'],
                                                          players_parameters.loc[players_parameters.index[i], 'b_mins'],
                                                          mns, gms)
        post_a_played, post_b_played = update_games_played_simplex(players_parameters.loc[players_parameters.index[i], 'a_games'],
                                                          players_parameters.loc[players_parameters.index[i], 'b_games'],
                                                          gms, gms_available - gms)

        players_parameters.loc[players_parameters.index[i], 'a_goals'] = post_a_goals
        players_parameters.loc[players_parameters.index[i], 'b_goals'] = post_b_goals
        players_parameters.loc[players_parameters.index[i], 'c_goals'] = post_c_goals
        players_parameters.loc[players_parameters.index[i], 'a_mins'] = post_a_mins
        players_parameters.loc[players_parameters.index[i], 'b_mins'] = post_b_mins
        players_parameters.loc[players_parameters.index[i], 'a_games'] = post_a_played
        players_parameters.loc[players_parameters.index[i], 'b_games'] = post_b_played

In [None]:
players_parameters

Don't need mins-played parameter included in points from scoring for each team goal, since this is taken into account by dividing by ALL of teams goals in season, even when player not on pitch.

In [None]:
# save parameters
players_parameters.to_csv("../parameters/all_players_params.csv", index=False)