# Creates player data files for a season
## Imports and Data reads

In [107]:
import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None

season = '2021-22'

players_raw = pd.read_csv('https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/' + season + '/players_raw.csv')
input_merged_gw = pd.read_csv('https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/' + season + '/gws/merged_gw.csv', encoding = "ISO-8859-1")

## Basic player info
Reads basic info about the player such as ids, names and position

In [108]:
player_info = players_raw.copy()
player_info = player_info[['id','first_name','second_name','element_type','now_cost','team_code']]
player_info.rename(columns = {'id':'player_id','element_type':'position','now_cost':'cost'}, inplace = True)

## Set Opponent Difficulty
Difficulty = number of points the team finished the season on. Projections are used for the 19/20 season as it is currently ongoing.

In [109]:
opp_diff_array = np.zeros(21)

#21/22 Season expected points
opp_diff_array[1] = 59  # Arsenal
opp_diff_array[2] = 44  # Aston Villa
opp_diff_array[3] = 45  # Brentford
opp_diff_array[4] = 54  # Brighton
opp_diff_array[5] = 39  # Burnley
opp_diff_array[6] = 79   # Chelsea
opp_diff_array[7] = 50   # Crystal Palace
opp_diff_array[8] = 47   # Everton
opp_diff_array[9] = 52  # Leicester
opp_diff_array[10] = 44  # Leeds
opp_diff_array[11] = 81.0 # Liverpool
opp_diff_array[12] = 85 # Man City
opp_diff_array[13] = 62    # Man Utd
opp_diff_array[14] = 32 # Newcastle
opp_diff_array[15] = 28 # Norwich
opp_diff_array[16] = 46 # Southampton
opp_diff_array[17] = 53 # Spurs
opp_diff_array[18] = 34 # Watford
opp_diff_array[19] = 64 # West Ham
opp_diff_array[20] = 48 # Wolves

#20/21 Season
# opp_diff_array[1] = 61  # Arsenal
# opp_diff_array[2] = 55  # Aston Villa
# opp_diff_array[3] = 41  # Brighton
# opp_diff_array[4] = 39     # Burnley
# opp_diff_array[5] = 67   # Chelsea
# opp_diff_array[6] = 44   # Crystal Palace
# opp_diff_array[7] = 59   # Everton
# opp_diff_array[8] = 28  # Fulham
# opp_diff_array[9] = 66  # Leicester
# opp_diff_array[10] = 59  # Leeds
# opp_diff_array[11] = 69 # Liverpool
# opp_diff_array[12] = 86 # Man City
# opp_diff_array[13] = 74    # Man Utd
# opp_diff_array[14] = 45 # Newcastle
# opp_diff_array[15] = 23 # Sheffield Utd
# opp_diff_array[16] = 43 # Southampton
# opp_diff_array[17] = 62 # Spurs
# opp_diff_array[18] = 26 # West Brom
# opp_diff_array[19] = 65 # West Ham
# opp_diff_array[20] = 45 # Wolves

# 19/20 Season
# opp_diff_array[1] = 54.29  # Arsenal
# opp_diff_array[2] = 33.93  # Aston Villa
# opp_diff_array[3] = 35.38  # Bournemouth
# opp_diff_array[4] = 38     # Brighton
# opp_diff_array[5] = 51.1   # Burnley
# opp_diff_array[6] = 62.9   # Chelsea
# opp_diff_array[7] = 51.1   # Crystal Palace
# opp_diff_array[8] = 48.48  # Everton
# opp_diff_array[9] = 69.44  # Leicester
# opp_diff_array[10] = 107.45# Liverpool
# opp_diff_array[11] = 77.36 # Man City
# opp_diff_array[12] = 59    # Man Utd
# opp_diff_array[13] = 46.86 # Newcastle
# opp_diff_array[14] = 27.52 # Norwich
# opp_diff_array[15] = 58.36 # Sheffield Utd
# opp_diff_array[16] = 44.55 # Southampton
# opp_diff_array[17] = 53.72 # Spurs
# opp_diff_array[18] = 35.38 # Watford
# opp_diff_array[19] = 35.38 # West Ham
# opp_diff_array[20] = 56.34 # Wolves

# 18/19 Season
# opp_diff_array[1] = 70  # Arsenal
# opp_diff_array[2] = 45  # Bournemouth
# opp_diff_array[3] = 36  # Brighton
# opp_diff_array[4] = 40  # Burnley
# opp_diff_array[5] = 34  # Cardiff City
# opp_diff_array[6] = 72  # Chelsea
# opp_diff_array[7] = 49  # Crystal Palace
# opp_diff_array[8] = 54  # Everton
# opp_diff_array[9] = 26  # Fulham
# opp_diff_array[10] = 16 # Huddersfield
# opp_diff_array[11] = 52 # Leicester
# opp_diff_array[12] = 97 # Liverpool
# opp_diff_array[13] = 98 # Man city
# opp_diff_array[14] = 55 # Man Utd
# opp_diff_array[15] = 45 # Newcastle
# opp_diff_array[16] = 39 # Southampton
# opp_diff_array[17] = 71 # Spurs
# opp_diff_array[18] = 50 # Watford
# opp_diff_array[19] = 52 # West Ham
# opp_diff_array[20] = 57 # Wolves

## Create rolling df
Creates rolling dataframe where each entry contains statistics from the previous *n_prev_games* (=3 by default). Removes gameweeks in the range of 1-*n_prev_games* due to the lack of sufficient prevouis games played.

In [110]:
n_prev_gws = 3

merged_gw = input_merged_gw.copy()

# Limit gw info coloumns and rename them
merged_gw = merged_gw[['element','GW','total_points','minutes','was_home','assists','bps','clean_sheets','goals_conceded',
                          'goals_scored','ict_index','influence','opponent_team','own_goals','penalties_missed','value',
                          'penalties_saved','yellow_cards','red_cards','saves','selected','threat','creativity',
                          'transfers_balance']]


merged_gw.rename(columns = {'element':'player_id','transfer_balance':'net_transfers','selected':'selected_by',
                            'total_points':'points'}, inplace = True)

# Merge player info and gameweek info dfs
df = pd.merge(merged_gw,player_info, on='player_id').fillna(0)

# Set the opp_diff for each game
df['opp_diff'] = opp_diff_array[df.opponent_team]

df = df.set_index(['GW'])

# Constant coloumns holding the basic player data
df_const_cols = df[['player_id','position','first_name','second_name','team_code','points','opp_diff','was_home','minutes','value']]

# Set the statistics for a player for a gameweek equal to the sum of the statistics from the n_prev_gws gameweeks
df = df.groupby(['player_id']).rolling(n_prev_gws).agg({'minutes':np.sum, 'bps':np.sum, 'influence':np.sum,
                                                        'threat':np.sum,'ict_index':np.sum, 'creativity':np.sum,
                                                        'yellow_cards':np.sum, 'red_cards':np.sum, 'selected_by':np.sum,
                                                        'transfers_balance':np.sum, 'goals_scored':np.sum,
                                                        'assists':np.sum, 'points':np.sum,
                                                        'saves':np.sum,'goals_conceded':np.sum,'clean_sheets':np.sum}).shift(1).fillna(0)

# Rename coloumns
df.rename(columns = {'minutes':'minutes_sum','bps':'bps_sum','goals_scored':'goals_sum','assists':'assists_sum','points':'points_sum',
                      'saves':'saves_sum','yellow_cards':'yel_sum','red_cards':'red_sum','transfers_balance':'tran_sum',
                      'goals_conceded':'goals_con_sum','clean_sheets':'clean_sheets_sum','creativity':'creat_sum','threat':'threat_sum',
                      'ict_index':'ict_sum','influence':'influence_sum'}, inplace = True)

# Add player information back by merging the stats and the const_cols dfs
df = pd.merge(df_const_cols,df, on=['GW','player_id']).reset_index()

# Remove Gameweek rows less than n_prev_gws
df = df[df.GW>n_prev_gws]
df = df[df.minutes > 0]

# Save csvs
df['season'] = 2122
#only for the first season, use header=true
# df.to_csv("Player_Data_1.csv",mode='a',header='true')

df.to_csv("Player_Data_1.csv",mode='a',header='false')