In [1]:
# This Fantasy Premier League project was/is inspired by an article published by Paul Corcoran's on Medium 
# "A data driven approach to become a better FPL manager" - https://blog.devgenius.io/using-data-to-become-a-better-fpl-manager-2c4d178d6107"
# The idea is to scrape data from the Fantasy Premier League API and generate a Streamlit tool to present the data in tabular form

In [4]:
# Import relevant packages
import pandas as pd
import requests
from sklearn.preprocessing import MinMaxScaler # package is scikit-learn
import warnings
import json
warnings.filterwarnings('ignore')

In [5]:
# Connect to the Fantasy Premier League API - this endpoint responds with player specific data
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
res = requests.get(url)

In [6]:
# Check the status of the connection
if res.status_code == 200:
    print('The API request was successful.')
else:
    print(f'Error: The API request failed with status code {res.status_code}.')
# This code should return the text “The API request was successful” if the connection made was successful

The API request was successful.


In [7]:
# Store the output in json format (typically a dictionary)
json = res.json()

In [8]:
# Use the .keys() method to return the keys of the dictionary 
json.keys()

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])

In [9]:
# We are interested in the following fields and need to transform each into a pd.DataFrame
# teams (contains the name and id of each team in the current season) 
teams_df = pd.DataFrame(json['teams'])
# elements_types_df (mapping for player positions)
elements_types_df = pd.DataFrame(json['element_types'])
# elements_df (all of the player stats) 
elements_df = pd.DataFrame(json['elements'])

In [None]:
# Anther way to do this
# Grab the data behind the FPL site
# data = requests.get('https://fantasy.premierleague.com/api/bootstrap-static/').json()
# Convert the data into a table
# dataTable = pd.DataFrame(data['elements'])
# List the variables
# dataTable.columns
# Select a few columns and show the top ten players by xG per 90
# dataTable[['web_name', 'points_per_game', 'total_points']].sort_values('points_per_game', ascending=False).head(10)

In [11]:
# Back to our project
# List the variables available in elements_df (the various player statistics)
elements_df.columns

Index(['chance_of_playing_next_round', 'chance_of_playing_this_round', 'code',
       'cost_change_event', 'cost_change_event_fall', 'cost_change_start',
       'cost_change_start_fall', 'dreamteam_count', 'element_type', 'ep_next',
       'ep_this', 'event_points', 'first_name', 'form', 'id', 'in_dreamteam',
       'news', 'news_added', 'now_cost', 'photo', 'points_per_game',
       'second_name', 'selected_by_percent', 'special', 'squad_number',
       'status', 'team', 'team_code', 'total_points', 'transfers_in',
       'transfers_in_event', 'transfers_out', 'transfers_out_event',
       'value_form', 'value_season', 'web_name', 'minutes', 'goals_scored',
       'assists', 'clean_sheets', 'goals_conceded', 'own_goals',
       'penalties_saved', 'penalties_missed', 'yellow_cards', 'red_cards',
       'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat',
       'ict_index', 'starts', 'expected_goals', 'expected_assists',
       'expected_goal_involvements', 'expected_goals_con

In [12]:
# In order to avoid confusion we rename elements_df as players_df and select the variables that we are interested in

players_df = elements_df[
    ['web_name', 'first_name', 'second_name', 'team', 'element_type', 'now_cost', 'total_points', 'bonus', 'points_per_game', 'form',   
     'minutes', 'starts', 'starts_per_90', 'chance_of_playing_next_round', 'chance_of_playing_this_round', 'selected_by_percent', 
     'transfers_in', 'goals_scored', 'expected_goals', 'expected_goals_per_90', 'assists', 'expected_assists', 'expected_assists_per_90', 
     'expected_goal_involvements', 'expected_goal_involvements_per_90', 'goals_conceded', 'goals_conceded_per_90',
     'expected_goals_conceded', 'expected_goals_conceded_per_90', 'clean_sheets', 'clean_sheets_per_90', 'saves', 'saves_per_90', 
     'penalties_saved']]

# Some columns need to be converted from a string to a number (float)

cols_to_convert = ['now_cost', 'total_points', 'bonus', 'points_per_game', 'form', 'minutes', 'starts', 'starts_per_90', 
                   'selected_by_percent', 'transfers_in', 'goals_scored', 'expected_goals', 'expected_goals_per_90', 'assists', 
                   'expected_assists', 'expected_assists_per_90', 'expected_goal_involvements', 'expected_goal_involvements_per_90', 
                   'goals_conceded', 'goals_conceded_per_90', 'expected_goals_conceded', 'expected_goals_conceded_per_90', 
                   'clean_sheets', 'clean_sheets_per_90', 'saves', 'saves_per_90', 'penalties_saved']

players_df[cols_to_convert] = players_df[cols_to_convert].astype(float)

# Concat the first and second name variables into a new variable (Player)

players_df['Player'] = players_df['first_name'] + ' ' + players_df['second_name']


In [None]:
# List the variables in the players_df
# players_df.columns

In [13]:
# Print the players_df dataframe
players_df

Unnamed: 0,web_name,first_name,second_name,team,element_type,now_cost,total_points,bonus,points_per_game,form,...,goals_conceded,goals_conceded_per_90,expected_goals_conceded,expected_goals_conceded_per_90,clean_sheets,clean_sheets_per_90,saves,saves_per_90,penalties_saved,Player
0,Balogun,Folarin,Balogun,1,4,44.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.00,0.00,0.0,0.00,0.0,0.0,0.0,Folarin Balogun
1,Cédric,Cédric,Alves Soares,1,2,39.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.00,0.00,0.0,0.00,0.0,0.0,0.0,Cédric Alves Soares
2,M.Elneny,Mohamed,Elneny,1,3,44.0,4.0,0.0,4.0,0.0,...,0.0,0.00,0.00,0.00,0.0,0.00,0.0,0.0,0.0,Mohamed Elneny
3,Fábio Vieira,Fábio,Ferreira Vieira,1,3,54.0,21.0,0.0,2.6,0.0,...,2.0,0.76,1.48,0.56,1.0,0.38,0.0,0.0,0.0,Fábio Ferreira Vieira
4,Gabriel,Gabriel,dos Santos Magalhães,1,2,50.0,76.0,6.0,3.6,6.0,...,19.0,0.99,15.88,0.83,6.0,0.31,0.0,0.0,0.0,Gabriel dos Santos Magalhães
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
804,Hesketh,Owen,Hesketh,20,3,45.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.00,0.00,0.0,0.00,0.0,0.0,0.0,Owen Hesketh
805,Barnett,Ty,Barnett,20,3,45.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.00,0.00,0.0,0.00,0.0,0.0,0.0,Ty Barnett
806,Griffiths,Harvey,Griffiths,20,3,45.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.00,0.00,0.0,0.00,0.0,0.0,0.0,Harvey Griffiths
807,Mosquera,Yerson,Mosquera,20,2,40.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.00,0.00,0.0,0.00,0.0,0.0,0.0,Yerson Mosquera


In [14]:
# The format of the now_cost variable needs to be converted (in line with the Fantasy Premier League site values)
players_df['now_cost'] = (players_df['now_cost'] / 10).apply(lambda x: '{:.1f}'.format(x))

In [15]:
cols_to_convert = ['now_cost']
players_df[cols_to_convert] = players_df[cols_to_convert].astype(float)

In [16]:
# Check to data type for each variable
print(players_df.dtypes) 

web_name                              object
first_name                            object
second_name                           object
team                                   int64
element_type                           int64
now_cost                             float64
total_points                         float64
bonus                                float64
points_per_game                      float64
form                                 float64
minutes                              float64
starts                               float64
starts_per_90                        float64
chance_of_playing_next_round         float64
chance_of_playing_this_round         float64
selected_by_percent                  float64
transfers_in                         float64
goals_scored                         float64
expected_goals                       float64
expected_goals_per_90                float64
assists                              float64
expected_assists                     float64
expected_a

In [17]:
# Add a calculated columns which will determine ROI per million spent on the player
players_df["ROI_Per_Million"] = (players_df["total_points"] / players_df["now_cost"])

In [18]:
# You can map the team and position names by using the map() method
players_df['team'] = players_df.team.map(teams_df.set_index('id').name)
players_df['position'] = players_df.element_type.map(elements_types_df.set_index('id').singular_name)
# Filter out players who have not played any minutes
players_df = players_df[players_df['minutes'] != 0]

In [19]:
# Aggregate the team totals
team_stats = players_df.groupby('team').agg({'total_points': 'sum', 'bonus': 'sum', 'goals_scored': 'sum','assists': 'sum','expected_goals': 'sum','expected_assists': 'sum'}).sort_values('total_points',ascending=False).reset_index()
# Add a couple of calculated columns
team_stats['performance_xG'] = team_stats['goals_scored'] - team_stats['expected_goals']
team_stats['performance_xA'] = team_stats['assists'] - team_stats['expected_assists']
columns = ['team_name','team_total_points','team_bonus','team_goals_scored','team_assists','team_expected_goals','team_expected_assists','team_performance_xG','team_performance_xA']
# rename these columns to avoid clashing with merge
team_stats.columns = columns
# Rearrange the columns to make more practical sense
team_stats = team_stats[['team_name','team_total_points','team_bonus','team_goals_scored','team_expected_goals','team_performance_xG','team_assists','team_expected_assists','team_performance_xA']]
# Rename columns
team_stats.rename(columns = {'team_name':'team', 'team_total_points':'points', 'team_bonus':'bonus', 'team_goals_scored':'goals', 'team_expected_goals':'xG', 'team_performance_xG':'goals - xG', 'team_assists':'assists', 'team_expected_assists':'xA', 'team_performance_xA':'assists - xA'}, inplace = True)

In [20]:
# Check your work
team_stats

Unnamed: 0,team,points,bonus,goals,xG,goals - xG,assists,xA,assists - xA
0,Liverpool,1201.0,99.0,48.0,50.74,-2.74,49.0,30.68,18.32
1,Arsenal,1175.0,102.0,45.0,45.97,-0.97,42.0,30.56,11.44
2,Aston Villa,1086.0,95.0,46.0,42.01,3.99,45.0,28.35,16.65
3,Spurs,1075.0,102.0,46.0,40.95,5.05,44.0,30.94,13.06
4,Newcastle,1072.0,81.0,47.0,46.97,0.03,38.0,24.89,13.11
5,Man City,1045.0,104.0,50.0,42.56,7.44,45.0,33.09,11.91
6,West Ham,958.0,79.0,35.0,33.91,1.09,31.0,19.23,11.77
7,Man Utd,942.0,65.0,31.0,32.61,-1.61,26.0,20.83,5.17
8,Fulham,942.0,68.0,30.0,31.52,-1.52,27.0,18.95,8.05
9,Brighton,919.0,88.0,40.0,40.03,-0.03,37.0,28.18,8.82


In [21]:
# Subset the data based on player positions
defenders_df = players_df[players_df['position'] == 'Defender']
midfielders_df = players_df[players_df['position'] == 'Midfielder']
forwards_df = players_df[players_df['position'] == 'Forward']
goalkeepers_df = players_df[players_df['position'] == 'Goalkeeper']


In [23]:
# Subset each position dataframe with specific metrics
goalkeepers_df = goalkeepers_df[['Player','team','position','total_points','bonus','points_per_game','form','minutes','now_cost','ROI_Per_Million','goals_conceded','expected_goals_conceded','clean_sheets','saves','saves_per_90']]
goalkeepers_df['performance_xG_def'] = goalkeepers_df['goals_conceded'] - goalkeepers_df['expected_goals_conceded']
# Position newly-calculated metric within the dataframe
goalkeepers_df = goalkeepers_df[['Player','team','position','total_points','bonus','points_per_game','form','minutes','now_cost','ROI_Per_Million','goals_conceded','expected_goals_conceded','performance_xG_def','clean_sheets','saves','saves_per_90']]
defenders_df = defenders_df[['Player','team','position','total_points','bonus','points_per_game','form','minutes','now_cost','ROI_Per_Million','goals_scored','expected_goals','expected_goals_per_90','assists','expected_assists','expected_assists_per_90','goals_conceded','expected_goals_conceded','clean_sheets']]
defenders_df['performance_xG_def'] = defenders_df['goals_conceded'] - defenders_df['expected_goals_conceded']
# Position newly-calculated metric within the dataframe
defenders_df = defenders_df[['Player','team','position','total_points','bonus','points_per_game','form','minutes','now_cost','ROI_Per_Million','goals_scored','expected_goals','expected_goals_per_90','assists','expected_assists','expected_assists_per_90','goals_conceded','expected_goals_conceded','performance_xG_def','clean_sheets']]
midfielders_df = midfielders_df[['Player','team','position','total_points','bonus','points_per_game','form','minutes','now_cost','ROI_Per_Million','goals_scored','expected_goals','expected_goals_per_90','assists','expected_assists','expected_assists_per_90','expected_goals_conceded']]
midfielders_df['performance_xG_off'] = midfielders_df['goals_scored'] - midfielders_df['expected_goals']
# Position newly-calculated metric within the dataframe
midfielders_df = midfielders_df[['Player','team','position','total_points','bonus','points_per_game','form','minutes','now_cost','ROI_Per_Million','goals_scored','expected_goals','performance_xG_off','expected_goals_per_90','assists','expected_assists','expected_assists_per_90','expected_goals_conceded']]
forwards_df = forwards_df[['Player','team','position','total_points','bonus','points_per_game','form','minutes','now_cost','ROI_Per_Million','goals_scored','expected_goals','expected_goals_per_90','assists','expected_assists','expected_assists_per_90']]
forwards_df['performance_xG_off'] = forwards_df['goals_scored'] - forwards_df['expected_goals']
# Position newly-calculated metric within the dataframe
forwards_df = forwards_df[['Player','team','position','total_points','bonus','points_per_game','form','minutes','now_cost','ROI_Per_Million','goals_scored','expected_goals','performance_xG_off','expected_goals_per_90','assists','expected_assists','expected_assists_per_90']]


In [24]:
# Rename columns in order to make practical sense to the end user
goalkeepers_df.rename(columns = {'Player':'player', 'team':'team', 'position':'position', 'total_points':'points', 'bonus':'bonus', 'points_per_game':'points p/g', 'form':'form', 'minutes':'minutes', 'now_cost':'now cost', 'ROI_Per_Million':'roi per million', 'goals_conceded':'conceded', 'expected_goals_conceded':'xGA', 'performance_xG_def':'conceded - xGA', 'clean_sheets':'clean sheets', 'saves':'saves', 'saves_per_90':'saves per 90'}, inplace = True)

In [25]:
# Rename columns in order to make practical sense to the end user
defenders_df.rename(columns = {'Player':'player', 'team':'team', 'position':'position', 'total_points':'points', 'bonus':'bonus', 'points_per_game':'points p/g', 'form':'form', 'minutes':'minutes', 'now_cost':'now cost', 'ROI_Per_Million':'roi per million', 'goals_scored':'goals', 'expected_goals':'xG', 'expected_goals_per_90':'xG per 90', 'assists':'assists', 'expected_assists':'xA', 'expected_assists_per_90':'xA per 90', 'expected_goal_involvements':'expected goal involvements', 'expected_goal_involvements_per_90':'expected goal involvement per 90', 'goals_conceded':'conceded', 'expected_goals_conceded':'xGA', 'performance_xG_def':'conceded - xGA', 'clean_sheets':'clean sheets'}, inplace = True)

In [26]:
# Rename columns in order to make practical sense to the end user
midfielders_df.rename(columns = {'Player':'player', 'team':'team', 'position':'position', 'total_points':'points', 'bonus':'bonus', 'points_per_game':'points p/g', 'form':'form', 'minutes':'minutes', 'now_cost':'now cost', 'ROI_Per_Million':'roi per million', 'goals_scored':'goals', 'expected_goals':'xG', 'performance_xG_off':'goals - xG', 'expected_goals_per_90':'xG per 90', 'assists':'assists', 'expected_assists':'xA', 'expected_assists_per_90':'xA per 90', 'expected_goals_conceded':'xGA'}, inplace = True)

In [27]:
# Rename columns in order to make practical sense to the end user
forwards_df.rename(columns = {'Player':'player', 'team':'team', 'position':'position', 'total_points':'points', 'bonus':'bonus', 'points_per_game':'points p/g', 'form':'form', 'minutes':'minutes', 'now_cost':'now cost', 'ROI_Per_Million':'roi per million', 'goals_scored':'goals', 'expected_goals':'xG', 'performance_xG_off':'goals - xG', 'expected_goals_per_90':'xG per 90', 'assists':'assists', 'expected_assists':'xA', 'expected_assists_per_90':'xA per 90'}, inplace = True)

In [29]:
# Export each dataframe to an individual CSV file
team_stats.to_csv('teams.csv',index=False)
goalkeepers_df.to_csv('goalkeepers.csv',index=False)
defenders_df.to_csv('defenders.csv',index=False)
midfielders_df.to_csv('midfielders.csv',index=False)
forwards_df.to_csv('forwards.csv',index=False)