In [38]:
# import required libriaries
from pathlib import Path
from helpers import *

In [39]:
# path to data directory
path = Path('/home/jupyter/fpl-prediction/data')

In [54]:
# read in latest training set
df_train = pd.read_csv(path/'train.csv', index_col=0, dtype={'season':str})

# path to current season
season_paths = [path/'2019-20']

# name for current season
season_names = ['1920']

# team codes
teams = pd.read_csv(path/'teams.csv')

# latest remaining season dataset, for latest week's play_proba values
remaining_season = pd.read_csv(path/'remaining_season.csv', index_col=0)

# create players dataset
all_players = build_players(path, season_paths, season_names, teams)

# 2019-20 market value
# table was in slightly different position for 2019-20 season
# should check that 2019-20 season values remain the same once season starts
teams_mv = build_season_mv('1920', 11, range(13,33))

In [45]:
df_train.head()

Unnamed: 0,player,gw,position,minutes,team,opponent_team,relative_market_value_team,relative_market_value_opponent_team,was_home,total_points,season
0,Aaron_Cresswell,1,2,0,West Ham United,Chelsea,0.895471,2.243698,False,0,1617
1,Aaron_Lennon,1,3,15,Everton,Tottenham Hotspur,1.057509,1.43369,True,1,1617
2,Aaron_Ramsey,1,3,60,Arsenal,Liverpool,1.944129,1.46586,True,2,1617
3,Abdoulaye_Doucouré,1,3,0,Watford,Southampton,0.7042,0.796805,False,0,1617
4,Abdul Rahman_Baba,1,2,0,Chelsea,West Ham United,2.243698,0.895471,True,0,1617


In [46]:
# find the latest gameweek
last_gw = df_train['gw'][df_train['season'] == '1920'].max()

if np.isnan(last_gw): 
    last_gw = 1 
else: 
    last_gw = last_gw + 1
    
last_gw

1

In [47]:
# build training set for latest gameweek
df_latest = build_season(season_paths[0], season_names[0], all_players, teams, teams_mv, [last_gw])

In [48]:
df_latest

Unnamed: 0,player,gw,position,minutes,team,opponent_team,relative_market_value_team,relative_market_value_opponent_team,was_home,total_points,season
0,Aaron_Cresswell,1,2,90,West Ham United,Manchester City,0.739196,2.727025,True,0,1920
1,Aaron_Lennon,1,3,6,Burnley,Southampton,0.441799,0.624210,True,1,1920
2,Aaron_Mooy,1,3,0,Brighton and Hove Albion,Watford,0.476156,0.555819,False,0,1920
3,Aaron_Ramsdale,1,1,90,Bournemouth,Sheffield United,0.687124,0.150631,True,2,1920
4,Aaron_Wan-Bissaka,1,2,90,Manchester United,Chelsea,1.679697,1.798870,True,8,1920
5,Abdoulaye_Doucouré,1,3,90,Watford,Brighton and Hove Albion,0.555819,0.476156,True,0,1920
6,Adama_Traoré,1,3,0,Wolverhampton Wanderers,Leicester City,0.673382,0.886283,False,0,1920
7,Adam_Lallana,1,3,0,Liverpool,Norwich,2.297572,0.198300,True,0,1920
8,Adam_Masina,1,2,0,Watford,Brighton and Hove Albion,0.555819,0.476156,True,0,1920
9,Adam_Smith,1,2,90,Bournemouth,Sheffield United,0.687124,0.150631,True,1,1920


In [49]:
df_latest.shape

(526, 11)

In [52]:
last_play_proba = remaining_season[remaining_season['gw'] == 1][['player', 'play_proba']]

In [53]:
last_play_proba.head()

Unnamed: 0,player,play_proba


In [16]:
# add latest week's play_proba for each player
df_latest = df_latest.merge(last_play_proba, on='player', how='left')

In [17]:
df_latest

Unnamed: 0,player,gw,position,minutes,team,opponent_team,relative_market_value_team,relative_market_value_opponent_team,was_home,total_points,season,play_proba
0,Aaron_Cresswell,1,2,90,West Ham United,Manchester City,0.739196,2.727025,True,0,1920,1.00
1,Aaron_Lennon,1,3,6,Burnley,Southampton,0.441799,0.624210,True,1,1920,1.00
2,Aaron_Mooy,1,3,0,Brighton and Hove Albion,Watford,0.476156,0.555819,False,0,1920,
3,Aaron_Ramsdale,1,1,90,Bournemouth,Sheffield United,0.687124,0.150631,True,2,1920,1.00
4,Aaron_Wan-Bissaka,1,2,90,Manchester United,Chelsea,1.679697,1.798870,True,8,1920,1.00
5,Abdoulaye_Doucouré,1,3,90,Watford,Brighton and Hove Albion,0.555819,0.476156,True,0,1920,1.00
6,Adama_Traoré,1,3,0,Wolverhampton Wanderers,Leicester City,0.673382,0.886283,False,0,1920,1.00
7,Adam_Lallana,1,3,0,Liverpool,Norwich,2.297572,0.198300,True,0,1920,1.00
8,Adam_Masina,1,2,0,Watford,Brighton and Hove Albion,0.555819,0.476156,True,0,1920,1.00
9,Adam_Smith,1,2,90,Bournemouth,Sheffield United,0.687124,0.150631,True,1,1920,1.00


In [23]:
df_train_new = pd.concat([df_train, df_latest], ignore_index=True, axis=0)

In [58]:
# save latest training set to csv
# overwrite existing one
df_train_new.to_csv(path/'train.csv')

In [25]:
## now need to create the prediction set
# start by reading fixtures.csv
# set starting gameweek (where are we right now in the season)
current_gw = last_gw + 1
fixtures = pd.read_csv(path/'fixtures.csv')
fixtures = fixtures[fixtures['gw'] >= current_gw]

In [26]:
fixtures.head(10)

Unnamed: 0,home_team,away_team,gw
10,Arsenal,Burnley,2
11,Aston Villa,Bournemouth,2
12,Brighton and Hove Albion,West Ham United,2
13,Everton,Watford,2
14,Norwich,Newcastle United,2
15,Southampton,Liverpool,2
16,Manchester City,Tottenham Hotspur,2
17,Sheffield United,Crystal Palace,2
18,Chelsea,Leicester City,2
19,Wolverhampton Wanderers,Manchester United,2


In [27]:
# add team codes for home and away teams
fixtures = fixtures.merge(teams, left_on='home_team', right_on='team', how='left')
fixtures = fixtures.merge(teams, left_on='away_team', right_on='team', how='left')
fixtures = fixtures[['gw', 'home_team', 'away_team', 'team_code_x', 'team_code_y']]
fixtures.rename(index=str,
                columns={'team_code_x':'home_team_code',
                         'team_code_y':'away_team_code'},
                inplace=True)

In [28]:
fixtures.head()

Unnamed: 0,gw,home_team,away_team,home_team_code,away_team_code
0,2,Arsenal,Burnley,3,90
1,2,Aston Villa,Bournemouth,7,91
2,2,Brighton and Hove Albion,West Ham United,36,21
3,2,Everton,Watford,11,57
4,2,Norwich,Newcastle United,45,4


In [29]:
# join home team to all players for current season
home_df = fixtures.merge(all_players, 
               left_on='home_team_code', 
               right_on='team_1920', 
               how='left')

# pull out the required fields and rename columns
home_df = home_df[['gw', 'home_team', 'away_team', 'full_name', 'position_1920', 'cost_1920', 'play_proba_1920']]
home_df.rename(index=str, 
               columns={'home_team':'team',
                        'away_team':'opponent_team',
                        'full_name':'player',
                        'position_1920':'position',
                        'cost_1920':'price',
                        'play_proba_1920':'play_proba'},
              inplace=True)

# add home flag
home_df['was_home'] = True

In [30]:
# join away team to all players for current season
away_df = fixtures.merge(all_players, 
               left_on='away_team_code', 
               right_on='team_1920', 
               how='left')

# pull out the required fields and rename columns
away_df = away_df[['gw',  'away_team', 'home_team', 'full_name', 'position_1920', 'cost_1920', 'play_proba_1920']]
away_df.rename(index=str, 
               columns={'away_team':'team',
                        'home_team':'opponent_team',
                        'full_name':'player',
                        'position_1920':'position',
                        'cost_1920':'price',
                        'play_proba_1920':'play_proba'},
              inplace=True)

# add home flag
away_df['was_home'] = False

In [31]:
# look at away players
away_df.head()

Unnamed: 0,gw,team,opponent_team,player,position,price,play_proba,was_home
0,2,Burnley,Arsenal,Ben_Mee,2,50,,False
1,2,Burnley,Arsenal,James_Tarkowski,2,50,100.0,False
2,2,Burnley,Arsenal,Matthew_Lowton,2,45,,False
3,2,Burnley,Arsenal,Phil_Bardsley,2,45,,False
4,2,Burnley,Arsenal,Charlie_Taylor,2,45,,False


In [32]:
# look at home players
home_df.head()

Unnamed: 0,gw,team,opponent_team,player,position,price,play_proba,was_home
0,2,Arsenal,Burnley,Shkodran_Mustafi,2,55,,True
1,2,Arsenal,Burnley,Héctor_Bellerín,2,55,0.0,True
2,2,Arsenal,Burnley,Sead_Kolasinac,2,55,25.0,True
3,2,Arsenal,Burnley,Ainsley_Maitland-Niles,2,50,,True
4,2,Arsenal,Burnley,Sokratis_Papastathopoulos,2,50,,True


In [33]:
# concatenate home and away players
remaining_season_df = home_df.append(away_df).reset_index(drop=True)

# add home and away team market values
remaining_season_df = remaining_season_df.merge(teams_mv[teams_mv['season'] == '1920'],
                                                left_on='team', 
                                                right_on='name', 
                                                how='left').drop(['name', 'season'], axis=1)

remaining_season_df = remaining_season_df.merge(teams_mv[teams_mv['season'] == '1920'],
                                                left_on='opponent_team', 
                                                right_on='name', 
                                                how='left').drop(['name', 'season'], axis=1)

remaining_season_df.rename(index=str, 
                           columns={'relative_market_value_x':'relative_market_value_team',
                                    'relative_market_value_y':'relative_market_value_opponent_team'},
                           inplace=True)

# add season name
remaining_season_df['season'] = '1920'

# divide cost by 10 for actual cost
remaining_season_df['price'] = remaining_season_df['price']/10

# set availability probability
# 0 = 0% chance, 25 = 25% chance, etc
# 'None' or '100' = 100% chance
remaining_season_df.loc[remaining_season_df['play_proba'] == 'None', 'play_proba'] = 100
remaining_season_df['play_proba'] = remaining_season_df['play_proba'].astype('float') / 100

# set minutes equal to 90 multiplied by their play probabiliby for all players, for now
remaining_season_df['minutes'] = 90 * remaining_season_df['play_proba']

# cast position to integer
remaining_season_df['position'] = remaining_season_df['position'].astype(int)

In [34]:
remaining_season_df.head()

Unnamed: 0,gw,team,opponent_team,player,position,price,play_proba,was_home,relative_market_value_team,relative_market_value_opponent_team,season,minutes
0,2,Arsenal,Burnley,Shkodran_Mustafi,2,5.5,1.0,True,1.448866,0.441799,1920,90.0
1,2,Arsenal,Burnley,Héctor_Bellerín,2,5.5,0.0,True,1.448866,0.441799,1920,0.0
2,2,Arsenal,Burnley,Sead_Kolasinac,2,5.5,0.25,True,1.448866,0.441799,1920,22.5
3,2,Arsenal,Burnley,Ainsley_Maitland-Niles,2,5.0,1.0,True,1.448866,0.441799,1920,90.0
4,2,Arsenal,Burnley,Sokratis_Papastathopoulos,2,5.0,1.0,True,1.448866,0.441799,1920,90.0


In [37]:
# save latest prediction set to csv
remaining_season_df.to_csv(path/'remaining_season.csv')