In [1]:
import numpy as np
import pandas as pd
import requests
import datetime
import os
import pystan
import re
import matplotlib.pyplot as plt
import glob

import predict_player_points as ppp
import predict_score
import pickle

In [2]:
def get_gameweek_data():
    """
    Call the API to receive data.
    
    Returns:
    Events, phases, teams, elements, element types
    """
    
    data = requests.get('https://fantasy.premierleague.com/api/bootstrap-static/')
    data = data.json()
    
    return pd.DataFrame(data['events']),\
           pd.DataFrame(data['phases']),\
           pd.DataFrame(data['teams']),\
           pd.DataFrame(data['elements']),\
           pd.DataFrame(data['element_types'])

In [3]:
def get_fixtures(teams_df, gameweek):
    
    if type(gameweek) == int:
    
        data = requests.get(f'https://fantasy.premierleague.com/api/fixtures/?event={gameweek}')
        data = pd.DataFrame(data.json())
    if type(gameweek) == list:
        li = []
        for i in gameweek:
            data = requests.get(f'https://fantasy.premierleague.com/api/fixtures/?event={i}')
            data = pd.DataFrame(data.json())
            li.append(data)
        data = pd.concat(li, axis=0, ignore_index=True)
        
    data['team_a_name'] = pd.merge(data, teams_df, left_on='team_a', right_on='id')[['name']]
    data['team_h_name'] = pd.merge(data, teams_df, left_on='team_h', right_on='id')[['name']]
    
    change_team_name_dict = {'Man Utd' : 'Man United'}
    data['team_a_name'] = data['team_a_name'].replace('Man Utd','Man United')
    data['team_h_name'] = data['team_h_name'].replace('Man Utd','Man United')
    
    data['team_a_name'] = data['team_a_name'].replace('Spurs','Tottenham')
    data['team_h_name'] = data['team_h_name'].replace('Spurs','Tottenham')
    return data

In [4]:
def get_fixture_history_data():
    
    df1 = pd.read_csv('./data/2020_21.csv', usecols=[0, 1] + list(range(3, 24)))
    df2 = pd.read_csv('./data/2019_20.csv', usecols=[0, 1] + list(range(3, 24)))
    df3 = pd.read_csv('./data/2018_19.csv', usecols=list(range(23)))
    df4 = pd.read_csv('./data/2017_18.csv', usecols=list(range(23)))


    li = [df1, df2, df3, df4]

    data = pd.concat(li, axis=0, ignore_index=True)
    
    data['Date'] = pd.to_datetime(data['Date'])
    data['time_diff'] = (pd.Timestamp(datetime.datetime.today()) - data['Date']).dt.days
    
    X = data[['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']]
    
    X = pd.concat([X.iloc[:,[0,1,2]].assign(home=1).rename(columns={'HomeTeam':'team',
                                                                'AwayTeam':'opponent',
                                                                'FTHG':'goals'}),
                   X.iloc[:,[0,1,3]].assign(home=0).rename(columns={'HomeTeam':'team',
                                                                    'AwayTeam':'opponent',
                                                                    'FTAG':'goals'})])
    
    return X, data

In [5]:
def estimate_team_parameters(fixture_df):
    
    fixture_df.rename(columns={'HomeTeam':'team',
                               'AwayTeam':'opponent',
                               'FTHG':'goals'})
    
    print('estimating parameters...')
    params = predict_score.solve_parameters_decay(fixture_df, xi=0.001)
    #as Brentford is not in fixture history...
    params['attack_Brentford'] = 0.9868922096591581 #same as watford
    params['defence_Brentford'] = -0.6796145852623623
    
    return params

In [6]:
fixture_history_data, data = get_fixture_history_data()
try:
    with open('team_parameters.pickle', 'rb') as handle:
        team_parameters = pickle.load(handle)
except:
    team_parameters = estimate_team_parameters(data)
    with open('team_parameters.pickle', 'wb') as handle:
        pickle.dump(team_parameters, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [7]:
events, phases, teams, elements, element_types = get_gameweek_data()

In [8]:
def predict_fixture_scores(fixture_df, params, add_noise=True):
    
    """
    Predicts scores for the gameweek fixture
    """
    score_matrix_list = []
    predicted_score_list = []
    
    for fixure in fixture_df.iterrows():
        
        home_team = fixure[1]['team_h_name']
        away_team = fixure[1]['team_a_name']
        
        score_prob_matrix = predict_score.dixon_coles_simulate_match(params, home_team, away_team, max_goals=6)
        if add_noise:
            score_prob_matrix += np.random.normal(0, 0.01, size=(7,7))
            score_prob_matrix /= score_prob_matrix.sum()
#             y = np.unravel_index(score_prob_matrix.argmax(), score_prob_matrix.shape)
#             y = score_probs[max(x[0]-1, 0):min(x[0]+2,6), max(x[1]-1,0):min(x[1]+2,6)]
#             y = np.unravel_index(score_probs == np.random.choice(y.ravel()), score_probs.ravel().shape)[0]
#             predicted_score = np.unravel_index(y.argmax(), y.shape)
            predicted_score = np.unravel_index(score_prob_matrix.argmax(), score_prob_matrix.shape)
        else:
            predicted_score = np.unravel_index(score_prob_matrix.argmax(), score_prob_matrix.shape)
        
        
        score_matrix_list.append(score_prob_matrix)
        predicted_score_list.append(predicted_score)
    
    return np.array(predicted_score_list) #(h,a)

# Get (score, assist, clean sheet, nothing) probabilities for each player

In [9]:
player_list = glob.glob(f'./data/data/2020-21/players/*')

id_player_dict = {}

for row in player_list:
    idx = int(re.findall('[0-9]+', row)[-1])
    player_name = row.split('/')[-1]
    
    id_player_dict[idx] = player_name
    
element_type_dict = {1 : 'GKP', 2 : 'DEF', 3 : 'MID', 4: 'FWD'}

In [10]:
li = []

for player in player_list:
    idx = int(re.findall('[0-9]+', player)[-1])
    df = pd.read_csv(player + '/gw.csv', usecols=['goals_scored',
                                                  'assists',
                                                  'clean_sheets',
                                                  'minutes',
                                                  'team_a_score',
                                                  'team_h_score',
                                                  'was_home'], nrows=38).assign(player_id=idx)
    
    
    if df.shape[0] == 38:
        df['gw'] = np.arange(1, 39).reshape(-1,1)
        li.append(df)
        
    elif df.shape[0] < 38:
        for _ in range(38 - df.shape[0]):
            df = df.append(pd.Series([0,0,0,0,0,0,False,idx], index=df.columns), ignore_index=True)
        df['gw'] = np.arange(1,39).reshape(-1,1)
        li.append(df)
    else:
        continue
    assert df.shape == (38, 9), 'Not all dfs are the same shape'

data = pd.concat(li, axis=0, ignore_index=True)
data['team_goals'] = data['team_h_score'] * (data['was_home'] == True) + data['team_a_score'] * (data['was_home'] == False)

element_type = pd.read_csv('./data/data/2020-21/players_raw.csv', usecols=['id', 'element_type'])

data = data.merge(element_type, left_on='player_id', right_on='id', how='outer')
data['element_type'] = data['element_type'].map(element_type_dict)

player_names = pd.read_csv('./data/data/2020-21/players_raw.csv', usecols=['id', 'web_name'])
team_names = pd.read_csv('./data/data/2021-22/teams.csv', usecols=['id', 'name'])


INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [11]:
fwd_player_data = ppp.fit_player_data(data, 'FWD')
mid_player_data = ppp.fit_player_data(data, 'MID')
def_player_data = ppp.fit_player_data(data, 'DEF')
gkp_player_data = ppp.fit_player_data(data, 'GKP')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["neither"] = df["team_goals"] - df["goals_scored"] - df["assists"]
INFO:absl:Starting the local TPU driver.
INFO:absl:Unable to initialize backend 'tpu_driver': Not found: Unable to find driver in registry given worker: local://
INFO:absl:Unable to initialize backend 'gpu': Not found: Could not find registered platform with name: "cuda". Available platform names are: Host Interpreter
INFO:absl:Unable to initialize backend 'tpu': Invalid argument: TpuPlatform is not available.


[11.78484764  6.56325361 19.65189875]
Fitting player model for FWD ...


sample: 100%|██████████| 2500/2500 [00:06<00:00, 411.28it/s, 15 steps of size 3.81e-01. acc. prob=0.86]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["neither"] = df["team_goals"] - df["goals_scored"] - df["assists"]


[ 4.75586104  4.78314131 28.46099765]
Fitting player model for MID ...


sample: 100%|██████████| 2500/2500 [00:12<00:00, 204.62it/s, 15 steps of size 3.13e-01. acc. prob=0.82]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["neither"] = df["team_goals"] - df["goals_scored"] - df["assists"]


[ 1.34095016  1.91843964 34.7406102 ]
Fitting player model for DEF ...


sample: 100%|██████████| 2500/2500 [00:09<00:00, 250.58it/s, 15 steps of size 2.51e-01. acc. prob=0.84]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["neither"] = df["team_goals"] - df["goals_scored"] - df["assists"]


[ 0.04065496  0.08130992 37.87803512]
Fitting player model for GKP ...


sample: 100%|██████████| 2500/2500 [00:05<00:00, 425.20it/s, 1 steps of size 3.26e-02. acc. prob=0.79] 


In [12]:
fwd_player_data['player_name'] = fwd_player_data['player_id'].map(id_player_dict)
mid_player_data['player_name'] = mid_player_data['player_id'].map(id_player_dict)
def_player_data['player_name'] = def_player_data['player_id'].map(id_player_dict)
gkp_player_data['player_name'] = gkp_player_data['player_id'].map(id_player_dict)

In [13]:
fwd_data = pd.merge(fwd_player_data, player_names, left_on='player_id', right_on='id')[['player_id',
                                                                                        'prob_score',
                                                                                         'prob_assist',
                                                                                         'prob_neither', 
                                                                                         'web_name']]
mid_data = pd.merge(mid_player_data, player_names, left_on='player_id', right_on='id')[['player_id',
                                                                                        'prob_score',
                                                                                         'prob_assist',
                                                                                         'prob_neither', 
                                                                                         'web_name']]
def_data = pd.merge(def_player_data, player_names, left_on='player_id', right_on='id')[['player_id',
                                                                                        'prob_score',
                                                                                         'prob_assist',
                                                                                         'prob_neither', 
                                                                                         'web_name']]
gkp_data = pd.merge(gkp_player_data, player_names, left_on='player_id', right_on='id')[['player_id',
                                                                                        'prob_score',
                                                                                         'prob_assist',
                                                                                         'prob_neither', 
                                                                                         'web_name']]

In [14]:
elements['now_cost'].max()

125

In [15]:
fwd_data = pd.merge(fwd_data, elements[['web_name', 'team', 'now_cost']], left_on='web_name', right_on='web_name')
mid_data = pd.merge(mid_data, elements[['web_name', 'team', 'now_cost']], left_on='web_name', right_on='web_name')
def_data = pd.merge(def_data, elements[['web_name', 'team', 'now_cost']], left_on='web_name', right_on='web_name')
gkp_data = pd.merge(gkp_data, elements[['web_name', 'team', 'now_cost']], left_on='web_name', right_on='web_name')

In [16]:
fixtures = get_fixtures(teams, list(range(1,6)))

predicted_score = predict_fixture_scores(fixtures, team_parameters)

fixtures['predicted_a_score'] = predicted_score[:,1]
fixtures['predicted_h_score'] = predicted_score[:,0]

In [17]:
home_fixtures = fixtures[['team_h_name', 'predicted_h_score']].rename(columns={'team_h_name' : 'team',
                                                                       'predicted_h_score' : 'score'})
away_fixtures = fixtures[['team_a_name', 'predicted_a_score']].rename(columns={'team_a_name' : 'team',
                                                                       'predicted_a_score' : 'score'})

In [18]:
team_goals = pd.concat([home_fixtures, away_fixtures], axis=0, ignore_index=True)

In [19]:
team_goals = team_goals.groupby('team', as_index=False).sum().sort_values(by='score', ascending=False)

In [20]:
team_goals = pd.merge(team_goals, team_names, left_on='team', right_on='name')[['team', 'score', 'id']]

In [21]:
fwd_final_data = pd.merge(fwd_data, team_goals, left_on='team', right_on='id').drop(['id', 'team_x'], axis=1)
mid_final_data = pd.merge(mid_data, team_goals, left_on='team', right_on='id').drop(['id', 'team_x'], axis=1)
def_final_data = pd.merge(def_data, team_goals, left_on='team', right_on='id').drop(['id', 'team_x'], axis=1)
gkp_final_data = pd.merge(gkp_data, team_goals, left_on='team', right_on='id').drop(['id', 'team_x'], axis=1)

In [22]:
fwd_final_data['predicted_points'] = fwd_final_data['score'] * (fwd_final_data['prob_score'] * 4 + fwd_final_data['prob_assist'] * 3)
mid_final_data['predicted_points'] = mid_final_data['score'] * (mid_final_data['prob_score'] * 5 + mid_final_data['prob_assist'] * 3)
def_final_data['predicted_points'] = def_final_data['score'] * (def_final_data['prob_score'] * 6 + def_final_data['prob_assist'] * 3)
def_final_data['predicted_points'] = gkp_final_data['score'] * (gkp_final_data['prob_score'] * 6 + gkp_final_data['prob_assist'] * 3)

fwd_final_data = fwd_final_data.assign(position='FWD')
mid_final_data = mid_final_data.assign(position='MID')
def_final_data = def_final_data.assign(position='DEF')
gkp_final_data = gkp_final_data.assign(position='GKP')

In [23]:
final_data = pd.concat([fwd_final_data, mid_final_data, def_final_data], axis=0, ignore_index=True)

In [24]:
final_data.sort_values(by='predicted_points', ascending=False).head(20)

Unnamed: 0,player_id,prob_score,prob_assist,prob_neither,web_name,now_cost,team_y,score,predicted_points,position
33,492,0.352239,0.196286,0.451475,Rodrigo,55,Man City,12,23.973785,FWD
96,287,0.221499,0.164868,0.613633,Torres,70,Man City,12,19.225173,MID
94,284,0.193099,0.179786,0.627116,Foden,80,Man City,12,18.058204,MID
92,275,0.188325,0.175453,0.636222,Mahrez,90,Man City,12,17.615801,MID
32,282,0.257568,0.144474,0.597959,Jesus,85,Man City,12,17.564293,FWD
31,256,0.309582,0.172344,0.518074,Origi,50,Liverpool,10,17.553588,FWD
83,254,0.277331,0.111281,0.611387,Salah,125,Liverpool,10,17.205002,MID
90,271,0.214671,0.106781,0.678548,Gündogan,75,Man City,12,16.724364,MID
91,272,0.143348,0.225279,0.631373,De Bruyne,120,Man City,12,16.710941,MID
89,37,0.14265,0.223733,0.633617,Grealish,80,Man City,12,16.613379,MID


In [25]:
criteria_dict = {'GKP' : 2, 'DEF': 5, 'MID' : 5, 'FWD' : 3, 'price' : 1000}
team_dict = {'GKP' : [], 'DEF' : [], 'MID' : [], 'FWD' : [], 'Money Left' : None, 'Predicted Points' : 0}
