<a href="https://colab.research.google.com/github/robert-shepherd/fpl/blob/main/Project_creating_model_history_file.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Creating model history file


---

The purpose of this script is to combine data from previous FPL seasons (18/19 and 19/20) with the latest data available for this season (GW11 for 20/21).

Data sources:
* Latest season: FPL API
* Historic seasons: User Github: https://github.com/vaastav/Fantasy-Premier-League

Output:
* Data is downloaded and subsequently saved to: https://raw.githubusercontent.com/robert-shepherd/fpl/main/fpl_combined_data.csv


In [1]:
#Loading libraries
import pandas as pd
import requests
from google.colab import files

## Reading in historical data from the API

In [24]:
#Import data from API
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
json = r.json()
json.keys()

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])

In [25]:
#Create a dataframe of live elements
elements_df = pd.DataFrame(json['elements'])

In [33]:
#Loop through history and create a file
for x in elements_df.index :
    element_id = elements_df.id[x]
    url = f'https://fantasy.premierleague.com/api/element-summary/{element_id}/'
    r = requests.get(url)
    json = r.json()
    json_history_df = pd.DataFrame(json['history'])
    
    if x == 0 :
        all_history_df = json_history_df
    else : 
        all_history_df = all_history_df.append(json_history_df)


In [34]:
all_history_df.shape

(6799, 31)

In [35]:
all_history_df.head()

Unnamed: 0,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out
0,1,2,8,0,False,2020-09-12T11:30:00Z,0.0,3.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,70,0,76656,0,0
1,1,9,19,0,True,2020-09-19T19:00:00Z,2.0,1.0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,69,-16828,68335,995,17823
2,1,23,11,0,False,2020-09-28T19:00:00Z,3.0,1.0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,69,-11451,59793,675,12126
3,1,29,15,0,True,2020-10-04T13:00:00Z,2.0,1.0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,68,-5324,56403,647,5971
4,1,44,12,0,False,2020-10-17T16:30:00Z,1.0,0.0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,68,-4224,53689,616,4840


In [38]:
all_history_df.dtypes

element                int64
fixture                int64
opponent_team          int64
total_points           int64
was_home                bool
kickoff_time          object
team_h_score         float64
team_a_score         float64
round                  int64
minutes                int64
goals_scored           int64
assists                int64
clean_sheets           int64
goals_conceded         int64
own_goals              int64
penalties_saved        int64
penalties_missed       int64
yellow_cards           int64
red_cards              int64
saves                  int64
bonus                  int64
bps                    int64
influence             object
creativity            object
threat                object
ict_index             object
value                  int64
transfers_balance      int64
selected               int64
transfers_in           int64
transfers_out          int64
dtype: object

## Pulling in historic data

In [47]:
url_1819 = 'https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/2018-19/gws/merged_gw.csv'
url_1920 = 'https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/2019-20/gws/merged_gw.csv'

df_1819 = pd.read_csv(url_1819, encoding='latin-1')
df_1920 = pd.read_csv(url_1920, encoding='latin-1')

In [48]:
df_1819.head()

Unnamed: 0,name,assists,attempted_passes,big_chances_created,big_chances_missed,bonus,bps,clean_sheets,clearances_blocks_interceptions,completed_passes,creativity,dribbles,ea_index,element,errors_leading_to_goal,errors_leading_to_goal_attempt,fixture,fouls,goals_conceded,goals_scored,ict_index,id,influence,key_passes,kickoff_time,kickoff_time_formatted,loaned_in,loaned_out,minutes,offside,open_play_crosses,opponent_team,own_goals,penalties_conceded,penalties_missed,penalties_saved,recoveries,red_cards,round,saves,selected,tackled,tackles,target_missed,team_a_score,team_h_score,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,winning_goals,yellow_cards,GW
0,Aaron_Cresswell_402,0,0,0,0,0,0,0,0,0,0.0,0,0,402,0,0,5,0,0,0,0.0,402,0.0,0,2018-08-12T12:30:00Z,12 Aug 13:30,0,0,0,0,0,12,0,0,0,0,0,0,1,0,103396,0,0,0,0,4,0.0,0,0,0,0,55,False,0,0,1
1,Aaron_Lennon_83,0,22,0,1,0,6,1,1,17,12.3,0,0,83,0,0,8,1,0,0,3.9,83,10.0,0,2018-08-12T12:30:00Z,12 Aug 13:30,0,0,90,0,1,16,0,0,0,0,2,0,1,0,15138,1,2,0,0,0,17.0,3,0,0,0,50,False,0,0,1
2,Aaron_Mooy_199,0,51,0,0,0,24,0,2,40,18.2,1,0,199,0,0,4,1,3,0,3.8,199,20.2,1,2018-08-11T14:00:00Z,11 Aug 15:00,0,0,90,0,0,6,0,0,0,0,11,0,1,0,192110,1,6,0,3,0,0.0,2,0,0,0,55,True,0,0,1
3,Aaron_Ramsey_14,0,11,0,0,0,7,0,0,7,10.8,1,0,14,0,0,1,0,1,0,2.9,14,9.4,1,2018-08-12T15:00:00Z,12 Aug 16:00,0,0,53,2,0,13,0,0,0,0,1,0,1,0,60423,0,2,0,2,0,9.0,1,0,0,0,75,True,0,0,1
4,Aaron_Wan-Bissaka_145,1,29,1,0,3,38,1,11,19,14.0,2,0,145,0,0,3,0,0,0,6.0,145,46.0,1,2018-08-11T14:00:00Z,11 Aug 15:00,0,0,90,0,0,9,0,0,0,0,7,0,1,0,652304,2,0,0,2,0,0.0,12,0,0,0,40,False,0,0,1


In [49]:
df_1819.dtypes

name                                object
assists                              int64
attempted_passes                     int64
big_chances_created                  int64
big_chances_missed                   int64
bonus                                int64
bps                                  int64
clean_sheets                         int64
clearances_blocks_interceptions      int64
completed_passes                     int64
creativity                         float64
dribbles                             int64
ea_index                             int64
element                              int64
errors_leading_to_goal               int64
errors_leading_to_goal_attempt       int64
fixture                              int64
fouls                                int64
goals_conceded                       int64
goals_scored                         int64
ict_index                          float64
id                                   int64
influence                          float64
key_passes 

## Combining data sources

In [78]:
#Filtering out latest week for current season
df_2021 = all_history_df[all_history_df['round'] < 12].copy()
max(df_2021['round'])

11

In [79]:
#Retrieving column names from latest season
names = df_2021.columns
names

Index(['element', 'fixture', 'opponent_team', 'total_points', 'was_home',
       'kickoff_time', 'team_h_score', 'team_a_score', 'round', 'minutes',
       'goals_scored', 'assists', 'clean_sheets', 'goals_conceded',
       'own_goals', 'penalties_saved', 'penalties_missed', 'yellow_cards',
       'red_cards', 'saves', 'bonus', 'bps', 'influence', 'creativity',
       'threat', 'ict_index', 'value', 'transfers_balance', 'selected',
       'transfers_in', 'transfers_out'],
      dtype='object')

In [80]:
#Filtering historic data to just columns available in latest data
df_1819_filtered = df_1819[names].copy()
df_1920_filtered = df_1920[names].copy()

In [81]:
#Adding season labels
df_1819_filtered['season'] = 2018
df_1920_filtered['season'] = 2019
df_2021['season'] = 2020

In [95]:
#Casting latest season influence, creativity, threat and ict_index to float
df_2021['influence'] = df_2021['influence'].astype(float)
df_2021['creativity'] = df_2021['creativity'].astype(float)
df_2021['threat'] = df_2021['threat'].astype(float)
df_2021['ict_index'] = df_2021['ict_index'].astype(float)
df_2021.dtypes

element                int64
fixture                int64
opponent_team          int64
total_points           int64
was_home                bool
kickoff_time          object
team_h_score         float64
team_a_score         float64
round                  int64
minutes                int64
goals_scored           int64
assists                int64
clean_sheets           int64
goals_conceded         int64
own_goals              int64
penalties_saved        int64
penalties_missed       int64
yellow_cards           int64
red_cards              int64
saves                  int64
bonus                  int64
bps                    int64
influence            float64
creativity           float64
threat               float64
ict_index            float64
value                  int64
transfers_balance      int64
selected               int64
transfers_in           int64
transfers_out          int64
season                 int64
dtype: object

In [101]:
#Combining datasets
combined_df = pd.concat([df_1819_filtered, df_1920_filtered, df_2021]).copy()

## Checking new dataset before outputting

In [107]:
#Checking new dataset
combined_df.head()

Unnamed: 0,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out,season
0,402,5,12,0,False,2018-08-12T12:30:00Z,4.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,55,0,103396,0,0,2018
1,83,8,16,3,False,2018-08-12T12:30:00Z,0.0,0.0,1,90,0,0,1,0,0,0,0,0,0,0,0,6,10.0,12.3,17.0,3.9,50,0,15138,0,0,2018
2,199,4,6,2,True,2018-08-11T14:00:00Z,0.0,3.0,1,90,0,0,0,3,0,0,0,0,0,0,0,24,20.2,18.2,0.0,3.8,55,0,192110,0,0,2018
3,14,1,13,1,True,2018-08-12T15:00:00Z,0.0,2.0,1,53,0,0,0,1,0,0,0,0,0,0,0,7,9.4,10.8,9.0,2.9,75,0,60423,0,0,2018
4,145,3,9,12,False,2018-08-11T14:00:00Z,0.0,2.0,1,90,0,1,1,0,0,0,0,0,0,0,3,38,46.0,14.0,0.0,6.0,40,0,652304,0,0,2018


In [108]:
#Checking new dataset

combined_df.dtypes

element                int64
fixture                int64
opponent_team          int64
total_points           int64
was_home                bool
kickoff_time          object
team_h_score         float64
team_a_score         float64
round                  int64
minutes                int64
goals_scored           int64
assists                int64
clean_sheets           int64
goals_conceded         int64
own_goals              int64
penalties_saved        int64
penalties_missed       int64
yellow_cards           int64
red_cards              int64
saves                  int64
bonus                  int64
bps                    int64
influence            float64
creativity           float64
threat               float64
ict_index            float64
value                  int64
transfers_balance      int64
selected               int64
transfers_in           int64
transfers_out          int64
season                 int64
dtype: object

In [112]:
#Downloading dataset
combined_df.to_csv('fpl_combined_data.csv') 
files.download('fpl_combined_data.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>