# Set-up

In [19]:
import os

import shap
import pandas as pd
import numpy as np
from keras.models import load_model

In [2]:
os.chdir('../..')

In [20]:
from src.models.LSTM.make_predictions import load_retro_data, LSTMPlayerPredictor
from src.models.constants import \
    COLUMNS_TO_DROP_FOR_TRAINING, \
    KICKOFF_MONTH_FEATURES
from src.models.utils import \
    _load_all_historical_data, \
    _map_season_string_to_ordered_numeric, \
    _generate_known_features_for_next_gw, \
    _load_current_season_data, \
    _load_next_fixture_data, \
    _load_model_from_pickle

In [21]:
mms = _load_model_from_pickle('src/models/pickles/min_max_scalar_lstm_v4.pickle')
COLUMNS_TO_SCALE = _load_model_from_pickle('src/models/pickles/min_max_scalar_columns_v4.pickle')

lstm_model = load_model("src/models/pickles/v4_lstm_model.h5")

In [4]:
pd.options.display.max_columns = None

# Process data

In [63]:
PREVIOUS_GW = 8
PREDICTION_SEASON_ORDER = 4
N_STEPS_IN = 5
COLUMNS_TO_DROP_FOR_TRAINING = ['name', 'season', 'season_order', 'team_name', 'team_name_opponent']

In [6]:
full_data = load_retro_data(current_season_data_filepath='data/gw_player_data/gw_29_player_data.parquet')
print(full_data.shape)
full_data.head()

2020-04-06 23:42:14,329 - Loading raw historical FPL data
2020-04-06 23:42:14,519 - Loaded historical data of shape: (67797, 52)
2020-04-06 23:42:14,576 - Creating season order column
2020-04-06 23:42:14,664 - Generating known features for next GW
2020-04-06 23:42:14,960 - Final input shape: (84295, 68)


(84295, 68)


Unnamed: 0,assists,bonus,bps,clean_sheets,creativity,goals_conceded,goals_scored,ict_index,influence,minutes,own_goals,penalties_missed,penalties_saved,red_cards,gw,saves,selected,team_a_score,team_h_score,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,name,team_name,promoted_side,top_6_last_season,season,position_DEF,position_FWD,position_GK,position_MID,team_name_opponent,promoted_side_opponent,top_6_last_season_opponent,late_kickoff,early_kickoff,kickoff_month_Aug,kickoff_month_Sep,kickoff_month_Oct,kickoff_month_Nov,kickoff_month_Dec,kickoff_month_Jan,kickoff_month_Feb,kickoff_month_Mar,kickoff_month_Apr,kickoff_month_May,season_order,next_match_value,next_match_was_home,next_match_promoted_side_opponent,next_match_top_6_last_season_opponent,next_match_kickoff_month_Aug,next_match_kickoff_month_Sep,next_match_kickoff_month_Oct,next_match_kickoff_month_Nov,next_match_kickoff_month_Dec,next_match_kickoff_month_Jan,next_match_kickoff_month_Feb,next_match_kickoff_month_Mar,next_match_kickoff_month_Apr,next_match_kickoff_month_May,next_match_late_kickoff,next_match_early_kickoff
0,0,0,1,0,0.1,1,0,0.0,0.2,24,0,0,0,0,4,0,0,0,4,0.0,1,0,0,0,4.5,False,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Manchester City,0,1,0,0,1,0,0,0,0,0,0,0,0,0,4,4.5,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,0,1,0,0.3,1,0,2.2,1.0,6,0,0,0,0,5,0,14029,1,1,21.0,1,10589,13500,2911,4.5,True,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Burnley,0,0,0,0,0,1,0,0,0,0,0,0,0,0,4,4.5,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0,0,1,0,4.8,0,0,2.5,2.0,20,0,0,0,0,6,0,22804,0,0,18.0,1,8090,11749,3659,4.5,False,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Newcastle United,0,0,1,0,0,1,0,0,0,0,0,0,0,0,4,4.5,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0,0,2,0,0.6,1,0,0.1,0.2,25,0,0,0,0,7,0,32699,0,2,0.0,1,8437,13439,5002,4.5,False,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Chelsea,0,1,0,0,0,1,0,0,0,0,0,0,0,0,4,4.5,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0,3,53,1,23.8,0,2,20.1,70.2,79,0,0,0,0,8,0,35026,0,3,107.0,13,2156,5952,3796,4.5,True,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Tottenham Hotspur,0,1,0,1,0,0,1,0,0,0,0,0,0,0,4,4.5,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
lstm_pred = LSTMPlayerPredictor(
    previous_gw=PREVIOUS_GW,
    prediction_season_order=PREDICTION_SEASON_ORDER
)

In [8]:
gw_prediction_data = lstm_pred.prepare_data_for_lstm(full_data=full_data)

2020-04-06 23:42:15,340 - Number of players available for selection: 39707.0
2020-04-06 23:42:15,419 - Player data shape before: (39707, 68)
2020-04-06 23:42:15,510 - Player data shape after removing players with insufficient GW data: (39674, 68)


Current implementation for predictions:

In [9]:
final_predictions = lstm_pred.make_player_predictions(gw_prediction_data=gw_prediction_data)

In [10]:
final_predictions.head()

Unnamed: 0,name,GW_plus_1,GW_plus_2,GW_plus_3,GW_plus_4,GW_plus_5,position_DEF,position_FWD,position_GK,position_MID,team_name,next_match_value,sum
385,mohamed_salah,5.921669,5.696108,5.706554,5.602525,5.620504,0,0,0,1,Liverpool,12.4,28.547361
436,raheem_sterling,5.740761,5.522748,5.531977,5.429527,5.446102,0,0,0,1,Manchester City,12.1,27.671114
305,kevin_de bruyne,5.47166,5.263114,5.270849,5.169618,5.184401,0,0,0,1,Manchester City,10.0,26.359642
479,sergio_agüero,5.098955,4.902833,4.906945,4.807029,4.818515,0,1,0,0,Manchester City,12.2,24.534277
468,sadio_mané,4.910196,4.720442,4.722247,4.623315,4.632875,0,0,0,1,Liverpool,11.8,23.609075


In [11]:
gw_prediction_data.head()

Unnamed: 0,assists,bonus,bps,clean_sheets,creativity,goals_conceded,goals_scored,ict_index,influence,minutes,own_goals,penalties_missed,penalties_saved,red_cards,gw,saves,selected,team_a_score,team_h_score,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,name,team_name,promoted_side,top_6_last_season,season,position_DEF,position_FWD,position_GK,position_MID,team_name_opponent,promoted_side_opponent,top_6_last_season_opponent,late_kickoff,early_kickoff,kickoff_month_Aug,kickoff_month_Sep,kickoff_month_Oct,kickoff_month_Nov,kickoff_month_Dec,kickoff_month_Jan,kickoff_month_Feb,kickoff_month_Mar,kickoff_month_Apr,kickoff_month_May,season_order,next_match_value,next_match_was_home,next_match_promoted_side_opponent,next_match_top_6_last_season_opponent,next_match_kickoff_month_Aug,next_match_kickoff_month_Sep,next_match_kickoff_month_Oct,next_match_kickoff_month_Nov,next_match_kickoff_month_Dec,next_match_kickoff_month_Jan,next_match_kickoff_month_Feb,next_match_kickoff_month_Mar,next_match_kickoff_month_Apr,next_match_kickoff_month_May,next_match_late_kickoff,next_match_early_kickoff
0,0,0,1,0,0.1,1,0,0.0,0.2,24,0,0,0,0,4,0,0,0,4,0.0,1,0,0,0,4.5,False,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Manchester City,0,1,0,0,1,0,0,0,0,0,0,0,0,0,4,4.5,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,0,1,0,0.3,1,0,2.2,1.0,6,0,0,0,0,5,0,14029,1,1,21.0,1,10589,13500,2911,4.5,True,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Burnley,0,0,0,0,0,1,0,0,0,0,0,0,0,0,4,4.5,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0,0,1,0,4.8,0,0,2.5,2.0,20,0,0,0,0,6,0,22804,0,0,18.0,1,8090,11749,3659,4.5,False,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Newcastle United,0,0,1,0,0,1,0,0,0,0,0,0,0,0,4,4.5,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0,0,2,0,0.6,1,0,0.1,0.2,25,0,0,0,0,7,0,32699,0,2,0.0,1,8437,13439,5002,4.5,False,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Chelsea,0,1,0,0,0,1,0,0,0,0,0,0,0,0,4,4.5,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0,3,53,1,23.8,0,2,20.1,70.2,79,0,0,0,0,8,0,35026,0,3,107.0,13,2156,5952,3796,4.5,True,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Tottenham Hotspur,0,1,0,1,0,0,1,0,0,0,0,0,0,0,4,4.5,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Manually apply steps to `gw_prediction_data`

In [64]:
gw_prediction_data_copy = gw_prediction_data.copy()
gw_prediction_data_copy.shape

(39674, 68)

In [65]:
gw_prediction_data_copy[COLUMNS_TO_SCALE] = mms.transform(gw_prediction_data_copy[COLUMNS_TO_SCALE])
print(gw_prediction_data_copy.shape)
gw_prediction_data_copy.head()

(39674, 68)


Unnamed: 0,assists,bonus,bps,clean_sheets,creativity,goals_conceded,goals_scored,ict_index,influence,minutes,own_goals,penalties_missed,penalties_saved,red_cards,gw,saves,selected,team_a_score,team_h_score,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,name,team_name,promoted_side,top_6_last_season,season,position_DEF,position_FWD,position_GK,position_MID,team_name_opponent,promoted_side_opponent,top_6_last_season_opponent,late_kickoff,early_kickoff,kickoff_month_Aug,kickoff_month_Sep,kickoff_month_Oct,kickoff_month_Nov,kickoff_month_Dec,kickoff_month_Jan,kickoff_month_Feb,kickoff_month_Mar,kickoff_month_Apr,kickoff_month_May,season_order,next_match_value,next_match_was_home,next_match_promoted_side_opponent,next_match_top_6_last_season_opponent,next_match_kickoff_month_Aug,next_match_kickoff_month_Sep,next_match_kickoff_month_Oct,next_match_kickoff_month_Nov,next_match_kickoff_month_Dec,next_match_kickoff_month_Jan,next_match_kickoff_month_Feb,next_match_kickoff_month_Mar,next_match_kickoff_month_Apr,next_match_kickoff_month_May,next_match_late_kickoff,next_match_early_kickoff
0,0.0,0.0,0.143939,0,0.000745,0.142857,0.0,0.0,0.001222,0.266667,0,0,0.0,0,0.081081,0.0,0.0,0.0,0.571429,0.0,0.151515,0.566868,0.0,0.0,0.071429,False,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Manchester City,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1.5,0.071429,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.143939,0,0.002234,0.142857,0.0,0.07074,0.006112,0.066667,0,0,0.0,0,0.108108,0.0,0.003812,0.142857,0.142857,0.136364,0.151515,0.572801,0.017205,0.002868,0.071429,True,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Burnley,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1.5,0.071429,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.143939,0,0.035741,0.0,0.0,0.080386,0.012225,0.222222,0,0,0.0,0,0.135135,0.0,0.006196,0.0,0.0,0.116883,0.151515,0.571401,0.014974,0.003605,0.071429,False,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Newcastle United,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1.5,0.071429,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.151515,0,0.004468,0.142857,0.0,0.003215,0.001222,0.277778,0,0,0.0,0,0.162162,0.0,0.008884,0.0,0.285714,0.0,0.151515,0.571595,0.017128,0.004928,0.071429,False,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Chelsea,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1.5,0.071429,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,1.0,0.537879,1,0.177215,0.0,0.5,0.646302,0.429095,0.877778,0,0,0.0,0,0.189189,0.0,0.009516,0.0,0.428571,0.694805,0.515152,0.568076,0.007586,0.00374,0.071429,True,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Tottenham Hotspur,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1.5,0.071429,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [66]:
gw_prediction_data_copy = gw_prediction_data_copy.groupby('name').tail(N_STEPS_IN)
print(gw_prediction_data_copy.shape)
gw_prediction_data_copy.head()

(2695, 68)


Unnamed: 0,assists,bonus,bps,clean_sheets,creativity,goals_conceded,goals_scored,ict_index,influence,minutes,own_goals,penalties_missed,penalties_saved,red_cards,gw,saves,selected,team_a_score,team_h_score,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,name,team_name,promoted_side,top_6_last_season,season,position_DEF,position_FWD,position_GK,position_MID,team_name_opponent,promoted_side_opponent,top_6_last_season_opponent,late_kickoff,early_kickoff,kickoff_month_Aug,kickoff_month_Sep,kickoff_month_Oct,kickoff_month_Nov,kickoff_month_Dec,kickoff_month_Jan,kickoff_month_Feb,kickoff_month_Mar,kickoff_month_Apr,kickoff_month_May,season_order,next_match_value,next_match_was_home,next_match_promoted_side_opponent,next_match_top_6_last_season_opponent,next_match_kickoff_month_Aug,next_match_kickoff_month_Sep,next_match_kickoff_month_Oct,next_match_kickoff_month_Nov,next_match_kickoff_month_Dec,next_match_kickoff_month_Jan,next_match_kickoff_month_Feb,next_match_kickoff_month_Mar,next_match_kickoff_month_Apr,next_match_kickoff_month_May,next_match_late_kickoff,next_match_early_kickoff
0,0.0,0.0,0.143939,0,0.000745,0.142857,0.0,0.0,0.001222,0.266667,0,0,0.0,0,0.081081,0.0,0.0,0.0,0.571429,0.0,0.151515,0.566868,0.0,0.0,0.071429,False,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Manchester City,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1.5,0.071429,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.143939,0,0.002234,0.142857,0.0,0.07074,0.006112,0.066667,0,0,0.0,0,0.108108,0.0,0.003812,0.142857,0.142857,0.136364,0.151515,0.572801,0.017205,0.002868,0.071429,True,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Burnley,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1.5,0.071429,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.143939,0,0.035741,0.0,0.0,0.080386,0.012225,0.222222,0,0,0.0,0,0.135135,0.0,0.006196,0.0,0.0,0.116883,0.151515,0.571401,0.014974,0.003605,0.071429,False,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Newcastle United,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1.5,0.071429,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.151515,0,0.004468,0.142857,0.0,0.003215,0.001222,0.277778,0,0,0.0,0,0.162162,0.0,0.008884,0.0,0.285714,0.0,0.151515,0.571595,0.017128,0.004928,0.071429,False,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Chelsea,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1.5,0.071429,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,1.0,0.537879,1,0.177215,0.0,0.5,0.646302,0.429095,0.877778,0,0,0.0,0,0.189189,0.0,0.009516,0.0,0.428571,0.694805,0.515152,0.568076,0.007586,0.00374,0.071429,True,0,aaron_connolly,Brighton & Hove Albion,0,0,2019-20,0,1,0,0,Tottenham Hotspur,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1.5,0.071429,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [67]:
COLUMNS_TO_DROP_FOR_TRAINING.remove('name')
print(COLUMNS_TO_DROP_FOR_TRAINING)

gw_prediction_data_copy.drop(
    COLUMNS_TO_DROP_FOR_TRAINING,
    axis=1,
    inplace=True
)
gw_prediction_data_copy.shape

['season', 'season_order', 'team_name', 'team_name_opponent']


(2695, 64)

In [68]:
player_list = []
player_data_list = []

for player, player_data in gw_prediction_data_copy.groupby('name'):
    player_list.append(player)
    player_data_list.append(player_data.drop('name', axis=1))

In [69]:
player_data_list[0]

Unnamed: 0,assists,bonus,bps,clean_sheets,creativity,goals_conceded,goals_scored,ict_index,influence,minutes,own_goals,penalties_missed,penalties_saved,red_cards,gw,saves,selected,team_a_score,team_h_score,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,promoted_side,top_6_last_season,position_DEF,position_FWD,position_GK,position_MID,promoted_side_opponent,top_6_last_season_opponent,late_kickoff,early_kickoff,kickoff_month_Aug,kickoff_month_Sep,kickoff_month_Oct,kickoff_month_Nov,kickoff_month_Dec,kickoff_month_Jan,kickoff_month_Feb,kickoff_month_Mar,kickoff_month_Apr,kickoff_month_May,next_match_value,next_match_was_home,next_match_promoted_side_opponent,next_match_top_6_last_season_opponent,next_match_kickoff_month_Aug,next_match_kickoff_month_Sep,next_match_kickoff_month_Oct,next_match_kickoff_month_Nov,next_match_kickoff_month_Dec,next_match_kickoff_month_Jan,next_match_kickoff_month_Feb,next_match_kickoff_month_Mar,next_match_kickoff_month_Apr,next_match_kickoff_month_May,next_match_late_kickoff,next_match_early_kickoff
0,0.0,0.0,0.143939,0,0.000745,0.142857,0.0,0.0,0.001222,0.266667,0,0,0.0,0,0.081081,0.0,0.0,0.0,0.571429,0.0,0.151515,0.566868,0.0,0.0,0.071429,False,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0.071429,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.143939,0,0.002234,0.142857,0.0,0.07074,0.006112,0.066667,0,0,0.0,0,0.108108,0.0,0.003812,0.142857,0.142857,0.136364,0.151515,0.572801,0.017205,0.002868,0.071429,True,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.071429,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.143939,0,0.035741,0.0,0.0,0.080386,0.012225,0.222222,0,0,0.0,0,0.135135,0.0,0.006196,0.0,0.0,0.116883,0.151515,0.571401,0.014974,0.003605,0.071429,False,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0.071429,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.151515,0,0.004468,0.142857,0.0,0.003215,0.001222,0.277778,0,0,0.0,0,0.162162,0.0,0.008884,0.0,0.285714,0.0,0.151515,0.571595,0.017128,0.004928,0.071429,False,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0.071429,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,1.0,0.537879,1,0.177215,0.0,0.5,0.646302,0.429095,0.877778,0,0,0.0,0,0.189189,0.0,0.009516,0.0,0.428571,0.694805,0.515152,0.568076,0.007586,0.00374,0.071429,True,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0.071429,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [70]:
player_data_list[0].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5 entries, 0 to 4
Data columns (total 63 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   assists                                5 non-null      float64
 1   bonus                                  5 non-null      float64
 2   bps                                    5 non-null      float64
 3   clean_sheets                           5 non-null      int64  
 4   creativity                             5 non-null      float64
 5   goals_conceded                         5 non-null      float64
 6   goals_scored                           5 non-null      float64
 7   ict_index                              5 non-null      float64
 8   influence                              5 non-null      float64
 9   minutes                                5 non-null      float64
 10  own_goals                              5 non-null      int64  
 11  penalties_

In [71]:
player_data_list[0].values.shape

(5, 63)

In [99]:
# Example of flattening player data and reconstructing
pd.DataFrame(player_data_list[0].values.reshape(1, -1).reshape(-1, 63))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62
0,0,0,0.143939,0,0.000744602,0.142857,0.0,0.0,0.00122249,0.266667,0,0,0,0,0.0810811,0,0.0,0.0,0.571429,0.0,0.151515,0.566868,0.0,0.0,0.0714286,False,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0.0714286,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,0,0,0.143939,0,0.0022338,0.142857,0.0,0.0707395,0.00611247,0.0666667,0,0,0,0,0.108108,0,0.00381165,0.142857,0.142857,0.136364,0.151515,0.572801,0.0172053,0.00286804,0.0714286,True,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0714286,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
2,0,0,0.143939,0,0.0357409,0.0,0.0,0.0803859,0.0122249,0.222222,0,0,0,0,0.135135,0,0.0061958,0.0,0.0,0.116883,0.151515,0.571401,0.0149737,0.003605,0.0714286,False,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0.0714286,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0
3,0,0,0.151515,0,0.00446761,0.142857,0.0,0.00321543,0.00122249,0.277778,0,0,0,0,0.162162,0,0.00888425,0.0,0.285714,0.0,0.151515,0.571595,0.0171276,0.00492818,0.0714286,False,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0.0714286,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1
4,0,1,0.537879,1,0.177215,0.0,0.5,0.646302,0.429095,0.877778,0,0,0,0,0.189189,0,0.00951649,0.0,0.428571,0.694805,0.515152,0.568076,0.00758564,0.00373998,0.0714286,True,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0.0714286,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0


In [92]:
input_array = np.concatenate(
    [df.values.reshape(1, N_STEPS_IN, df.values.shape[1]) for df in player_data_list],  # Make each player player DataFrame into a 3D array
    axis=0
)

assert input_array.shape[0] == len(player_list), 'Number of players in input array does not equal number of players in original DataFrame'

input_array.shape

(539, 5, 63)

In [93]:
%time
raw_predictions = lstm_model.predict(input_array)
raw_predictions.shape

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 24.1 µs


(539, 5)

In [94]:
final_predictions = pd.DataFrame(raw_predictions, columns=['GW_plus_1', 'GW_plus_2', 'GW_plus_3', 'GW_plus_4', 'GW_plus_5'])
final_predictions['name'] = player_list
final_predictions.head()

Unnamed: 0,GW_plus_1,GW_plus_2,GW_plus_3,GW_plus_4,GW_plus_5,name
0,2.077689,2.003415,2.018901,1.96114,1.971908,aaron_connolly
1,2.449217,2.349118,2.338723,2.258069,2.253024,aaron_cresswell
2,0.641854,0.643032,0.693726,0.674569,0.703326,aaron_lennon
3,1.944743,1.867459,1.863612,1.792959,1.79069,aaron_mooy
4,2.673024,2.568603,2.563523,2.484003,2.482691,aaron_ramsdale


In [95]:
final_predictions['sum'] = final_predictions['GW_plus_1'] + \
    final_predictions['GW_plus_2'] + \
    final_predictions['GW_plus_3'] + \
    final_predictions['GW_plus_4'] + \
    final_predictions['GW_plus_5']

In [91]:
# Keep sorting outside of predictions function:
final_predictions.sort_values('sum', ascending=False).head()

Unnamed: 0,GW_plus_1,GW_plus_2,GW_plus_3,GW_plus_4,GW_plus_5,name,sum
385,5.92167,5.696109,5.706553,5.602526,5.620504,mohamed_salah,28.547363
436,5.740762,5.522748,5.531977,5.429527,5.446103,raheem_sterling,27.671116
305,5.47166,5.263114,5.270849,5.169618,5.184402,kevin_de bruyne,26.359642
479,5.098956,4.902833,4.906946,4.807029,4.818515,sergio_agüero,24.534279
468,4.910195,4.720442,4.722247,4.623315,4.632874,sadio_mané,23.609074
