In [1]:
# import required libriaries
from pathlib import Path
from helpers import *

In [2]:
# path to data directory
path = Path('/home/jupyter/fpl-prediction/data')

In [3]:
# read in latest training set
df_train = pd.read_csv(path/'train.csv', index_col=0, dtype={'season':str})

# path to current season
season_paths = [path/'2019-20']

# name for current season
season_names = ['1920']

# team codes
teams = pd.read_csv(path/'teams.csv')

# latest remaining season dataset, for latest week's play_proba values
remaining_season = pd.read_csv(path/'remaining_season.csv', index_col=0)

# create players dataset
all_players = build_players(path, season_paths, season_names, teams)

# 2019-20 market value
# table was in slightly different position for 2019-20 season
# should check that 2019-20 season values remain the same once season starts
teams_mv = build_season_mv('1920', 11, range(13,33))

In [4]:
df_train.tail()

Unnamed: 0,player,gw,position,minutes,team,opponent_team,relative_market_value_team,relative_market_value_opponent_team,was_home,total_points,season,play_proba
76140,Yoshinori_Muto,15,4,0,Newcastle United,Sheffield United,0.586862,0.196171,False,0,1920,1.0
76141,Youri_Tielemans,15,3,68,Leicester City,Watford,0.880036,0.504425,True,3,1920,1.0
76142,Yves_Bissouma,15,3,0,Brighton and Hove Albion,Arsenal,0.471069,1.481743,False,0,1920,1.0
76143,Çaglar_Söyüncü,15,2,90,Leicester City,Watford,0.880036,0.504425,True,5,1920,1.0
76144,Ørjan_Nyland,15,1,0,Aston Villa,Chelsea,0.409718,1.731113,False,0,1920,1.0


In [5]:
# find the latest gameweek
last_gw = df_train['gw'][df_train['season'] == '1920'].max()

if np.isnan(last_gw): 
    last_gw = 1 
else: 
    last_gw = last_gw + 1
    
last_gw

16

In [6]:
# build training set for latest gameweek
df_latest = build_season(season_paths[0], season_names[0], all_players, teams, teams_mv, [last_gw])

In [7]:
df_latest

Unnamed: 0,player,gw,position,minutes,team,opponent_team,relative_market_value_team,relative_market_value_opponent_team,was_home,total_points,season
0,Aaron_Connolly,16,4,0,Brighton and Hove Albion,Wolverhampton Wanderers,0.453089,0.746215,True,0,1920
1,Aaron_Cresswell,16,2,50,West Ham United,Arsenal,0.699185,1.413491,True,0,1920
2,Aaron_Lennon,16,3,19,Burnley,Tottenham Hotspur,0.406164,2.001099,False,1,1920
3,Aaron_Mooy,16,3,90,Brighton and Hove Albion,Wolverhampton Wanderers,0.453089,0.746215,True,2,1920
4,Aaron_Ramsdale,16,1,90,Bournemouth,Liverpool,0.713784,2.460966,True,3,1920
5,Aaron_Wan-Bissaka,16,2,90,Manchester United,Manchester City,1.560524,2.711234,False,3,1920
6,Abdoulaye_Doucouré,16,3,77,Watford,Crystal Palace,0.433797,0.464142,True,2,1920
7,Adama_Traoré,16,3,90,Wolverhampton Wanderers,Brighton and Hove Albion,0.746215,0.453089,False,2,1920
8,Adam_Idah,16,4,0,Norwich,Sheffield United,0.295212,0.242030,True,0,1920
9,Adam_Lallana,16,3,0,Liverpool,Bournemouth,2.460966,0.713784,False,0,1920


In [8]:
df_latest.shape

(569, 11)

In [9]:
last_play_proba = remaining_season[remaining_season['gw'] == last_gw][['player', 'play_proba']]

In [10]:
last_play_proba.head()

Unnamed: 0,player,play_proba
0,Ahmed_El Mohamady,1.0
1,James_Chester,1.0
2,Neil_Taylor,1.0
3,Kortney_Hause,1.0
4,Jonathan_Kodjia,1.0


In [11]:
# add latest week's play_proba for each player
df_latest = df_latest.merge(last_play_proba, on='player', how='left')

In [12]:
df_latest

Unnamed: 0,player,gw,position,minutes,team,opponent_team,relative_market_value_team,relative_market_value_opponent_team,was_home,total_points,season,play_proba
0,Aaron_Connolly,16,4,0,Brighton and Hove Albion,Wolverhampton Wanderers,0.453089,0.746215,True,0,1920,1.00
1,Aaron_Cresswell,16,2,50,West Ham United,Arsenal,0.699185,1.413491,True,0,1920,1.00
2,Aaron_Lennon,16,3,19,Burnley,Tottenham Hotspur,0.406164,2.001099,False,1,1920,1.00
3,Aaron_Mooy,16,3,90,Brighton and Hove Albion,Wolverhampton Wanderers,0.453089,0.746215,True,2,1920,1.00
4,Aaron_Ramsdale,16,1,90,Bournemouth,Liverpool,0.713784,2.460966,True,3,1920,1.00
5,Aaron_Wan-Bissaka,16,2,90,Manchester United,Manchester City,1.560524,2.711234,False,3,1920,1.00
6,Abdoulaye_Doucouré,16,3,77,Watford,Crystal Palace,0.433797,0.464142,True,2,1920,1.00
7,Adama_Traoré,16,3,90,Wolverhampton Wanderers,Brighton and Hove Albion,0.746215,0.453089,False,2,1920,1.00
8,Adam_Idah,16,4,0,Norwich,Sheffield United,0.295212,0.242030,True,0,1920,1.00
9,Adam_Lallana,16,3,0,Liverpool,Bournemouth,2.460966,0.713784,False,0,1920,1.00


In [13]:
df_train_new = pd.concat([df_train, df_latest], ignore_index=True, axis=0)

In [14]:
df_train_new.tail()

Unnamed: 0,player,gw,position,minutes,team,opponent_team,relative_market_value_team,relative_market_value_opponent_team,was_home,total_points,season,play_proba
76709,Yoshinori_Muto,16,4,0,Newcastle United,Southampton,0.561705,0.469252,True,0,1920,1.0
76710,Youri_Tielemans,16,3,90,Leicester City,Aston Villa,1.054253,0.522434,False,2,1920,1.0
76711,Yves_Bissouma,16,3,0,Brighton and Hove Albion,Wolverhampton Wanderers,0.453089,0.746215,True,0,1920,1.0
76712,Çaglar_Söyüncü,16,2,90,Leicester City,Aston Villa,1.054253,0.522434,False,2,1920,1.0
76713,Ørjan_Nyland,16,1,0,Aston Villa,Leicester City,0.522434,1.054253,True,0,1920,1.0


In [15]:
# save latest training set to csv
# overwrite existing one
df_train_new.to_csv(path/'train.csv')

In [16]:
## now need to create the prediction set
# start by reading fixtures.csv
# set starting gameweek (where are we right now in the season)
current_gw = last_gw + 1
fixtures = pd.read_csv(path/'fixtures.csv')
fixtures = fixtures[fixtures['gw'] >= current_gw]

In [17]:
fixtures.head(10)

Unnamed: 0,home_team,away_team,gw
160,Arsenal,Manchester City,17
161,Burnley,Newcastle United,17
162,Chelsea,Bournemouth,17
163,Crystal Palace,Brighton and Hove Albion,17
164,Leicester City,Norwich,17
165,Liverpool,Watford,17
166,Manchester United,Everton,17
167,Sheffield United,Aston Villa,17
168,Southampton,West Ham United,17
169,Wolverhampton Wanderers,Tottenham Hotspur,17


In [18]:
# add team codes for home and away teams
fixtures = fixtures.merge(teams, left_on='home_team', right_on='team', how='left')
fixtures = fixtures.merge(teams, left_on='away_team', right_on='team', how='left')
fixtures = fixtures[['gw', 'home_team', 'away_team', 'team_code_x', 'team_code_y']]
fixtures.rename(index=str,
                columns={'team_code_x':'home_team_code',
                         'team_code_y':'away_team_code'},
                inplace=True)

In [19]:
fixtures.head()

Unnamed: 0,gw,home_team,away_team,home_team_code,away_team_code
0,17,Arsenal,Manchester City,3,43
1,17,Burnley,Newcastle United,90,4
2,17,Chelsea,Bournemouth,8,91
3,17,Crystal Palace,Brighton and Hove Albion,31,36
4,17,Leicester City,Norwich,13,45


In [20]:
# join home team to all players for current season
home_df = fixtures.merge(all_players, 
               left_on='home_team_code', 
               right_on='team_1920', 
               how='left')

# pull out the required fields and rename columns
home_df = home_df[['gw', 'home_team', 'away_team', 'full_name', 'position_1920', 'cost_1920', 'play_proba_1920']]
home_df.rename(index=str, 
               columns={'home_team':'team',
                        'away_team':'opponent_team',
                        'full_name':'player',
                        'position_1920':'position',
                        'cost_1920':'price',
                        'play_proba_1920':'play_proba'},
              inplace=True)

# add home flag
home_df['was_home'] = True

In [21]:
# join away team to all players for current season
away_df = fixtures.merge(all_players, 
               left_on='away_team_code', 
               right_on='team_1920', 
               how='left')

# pull out the required fields and rename columns
away_df = away_df[['gw',  'away_team', 'home_team', 'full_name', 'position_1920', 'cost_1920', 'play_proba_1920']]
away_df.rename(index=str, 
               columns={'away_team':'team',
                        'home_team':'opponent_team',
                        'full_name':'player',
                        'position_1920':'position',
                        'cost_1920':'price',
                        'play_proba_1920':'play_proba'},
              inplace=True)

# add home flag
away_df['was_home'] = False

In [22]:
# look at away players
away_df.head()

Unnamed: 0,gw,team,opponent_team,player,position,price,play_proba,was_home
0,17,Manchester City,Arsenal,Aymeric_Laporte,2,63,0,False
1,17,Manchester City,Arsenal,Kyle_Walker,2,58,100,False
2,17,Manchester City,Arsenal,Benjamin_Mendy,2,55,100,False
3,17,Manchester City,Arsenal,Oleksandr_Zinchenko,2,52,100,False
4,17,Manchester City,Arsenal,John_Stones,2,53,0,False


In [23]:
# look at home players
home_df.head()

Unnamed: 0,gw,team,opponent_team,player,position,price,play_proba,was_home
0,17,Arsenal,Manchester City,Shkodran_Mustafi,2,52,100.0,True
1,17,Arsenal,Manchester City,Héctor_Bellerín,2,54,75.0,True
2,17,Arsenal,Manchester City,Sead_Kolasinac,2,52,100.0,True
3,17,Arsenal,Manchester City,Ainsley_Maitland-Niles,2,46,100.0,True
4,17,Arsenal,Manchester City,Sokratis_Papastathopoulos,2,49,,True


In [24]:
# calculate average minutes over last 5 gameweeks
# first get last 5 gameweeks
recent_mins = df_train_new[(df_train_new['gw'] >= current_gw - 5) & 
                           (df_train_new['season'] == season_names[0])][['player', 'minutes', 'play_proba']]

# exclude weeks where the player had an injury designation (i.e. keep where play_proba is 1)
recent_mins = recent_mins[recent_mins['play_proba'] == 1]

len(recent_mins)

# average for each player
recent_mins = recent_mins.groupby(['player'])['minutes'].mean().to_dict()

recent_mins

{'Aaron_Connolly': 30.0,
 'Aaron_Cresswell': 82.0,
 'Aaron_Lennon': 24.4,
 'Aaron_Mooy': 72.0,
 'Aaron_Ramsdale': 90.0,
 'Aaron_Wan-Bissaka': 90.0,
 'Abdoulaye_Doucouré': 87.4,
 'Adam_Idah': 0.0,
 'Adam_Lallana': 18.4,
 'Adam_Masina': 63.4,
 'Adam_Smith': 55.666666666666664,
 'Adam_Webster': 90.0,
 'Adama_Traoré': 89.5,
 'Addji Keaninkin Marc-Israel_Guehi': 0.0,
 'Adrian_Mariappa': 58.0,
 'Adrián_San Miguel del Castillo': 20.4,
 'Ahmed_El Mohamady': 36.0,
 'Ainsley_Maitland-Niles': 18.0,
 'Akin_Famewo': 0.0,
 'Albian_Ajeti': 7.2,
 'Alex_Iwobi': 61.4,
 'Alex_McCarthy': 90.0,
 'Alex_Oxlade-Chamberlain': 47.4,
 'Alexander_Tettey': 67.5,
 'Alexandre_Lacazette': 69.2,
 'Alfie_Whiteman': 0.0,
 'Alireza_Jahanbakhsh': 4.4,
 'Alisson_Ramses Becker': 86.25,
 'Allan_Saint-Maximin': 86.0,
 'Andre_Gray': 30.6,
 'Andreas_Christensen': 45.0,
 'Andreas_Pereira': 51.4,
 'Andrew_Robertson': 87.0,
 'Andriy_Yarmolenko': 34.4,
 'Andros_Townsend': 68.75,
 'Andy_Carroll': 16.25,
 'Andy_Lonergan': 0.0,
 'Ange

In [25]:
# concatenate home and away players
remaining_season_df = home_df.append(away_df).reset_index(drop=True)

# add home and away team market values
remaining_season_df = remaining_season_df.merge(teams_mv[teams_mv['season'] == '1920'],
                                                left_on='team', 
                                                right_on='name', 
                                                how='left').drop(['name', 'season'], axis=1)

remaining_season_df = remaining_season_df.merge(teams_mv[teams_mv['season'] == '1920'],
                                                left_on='opponent_team', 
                                                right_on='name', 
                                                how='left').drop(['name', 'season'], axis=1)

remaining_season_df.rename(index=str, 
                           columns={'relative_market_value_x':'relative_market_value_team',
                                    'relative_market_value_y':'relative_market_value_opponent_team'},
                           inplace=True)

# add season name
remaining_season_df['season'] = '1920'

# divide cost by 10 for actual cost
remaining_season_df['price'] = remaining_season_df['price']/10

# set availability probability
# 0 = 0% chance, 25 = 25% chance, etc
# 'None' or '100' = 100% chance
remaining_season_df.loc[remaining_season_df['play_proba'] == 'None', 'play_proba'] = 100
remaining_season_df['play_proba'] = remaining_season_df['play_proba'].astype('float') / 100

# cast position to integer
remaining_season_df['position'] = remaining_season_df['position'].astype(int)

# add minutes based on last three fixtures
remaining_season_df['minutes'] = remaining_season_df['player'].map(recent_mins)
remaining_season_df['minutes'].fillna(0, inplace=True)

# multiply minutes by play probability
remaining_season_df['minutes'] = remaining_season_df['minutes'] * remaining_season_df['play_proba']



In [26]:
remaining_season_df.head(50)

Unnamed: 0,gw,team,opponent_team,player,position,price,play_proba,was_home,relative_market_value_team,relative_market_value_opponent_team,season,minutes
0,17,Arsenal,Manchester City,Shkodran_Mustafi,2,5.2,1.0,True,1.413491,2.711234,1920,18.0
1,17,Arsenal,Manchester City,Héctor_Bellerín,2,5.4,0.75,True,1.413491,2.711234,1920,32.25
2,17,Arsenal,Manchester City,Sead_Kolasinac,2,5.2,1.0,True,1.413491,2.711234,1920,55.5
3,17,Arsenal,Manchester City,Ainsley_Maitland-Niles,2,4.6,1.0,True,1.413491,2.711234,1920,18.0
4,17,Arsenal,Manchester City,Sokratis_Papastathopoulos,2,4.9,1.0,True,1.413491,2.711234,1920,54.0
5,17,Arsenal,Manchester City,Nacho_Monreal,2,5.0,0.0,True,1.413491,2.711234,1920,0.0
6,17,Arsenal,Manchester City,Laurent_Koscielny,2,5.0,0.0,True,1.413491,2.711234,1920,0.0
7,17,Arsenal,Manchester City,Konstantinos_Mavropanos,2,4.4,1.0,True,1.413491,2.711234,1920,0.0
8,17,Arsenal,Manchester City,Carl_Jenkinson,2,4.5,0.0,True,1.413491,2.711234,1920,0.0
9,17,Arsenal,Manchester City,Rob_Holding,2,4.5,0.75,True,1.413491,2.711234,1920,14.25


In [27]:
# save latest prediction set to csv
remaining_season_df.to_csv(path/'remaining_season.csv')