In [1]:
import requests, json
from pprint import pprint
import pandas as pd
pd.set_option('display.max_columns', None)
from tqdm.auto import tqdm
tqdm.pandas()

  from pandas import Panel


In [2]:
# base url for all FPL API endpoints
base_url = 'https://fantasy.premierleague.com/api/'

# get data from bootstrap-static endpoint
r = requests.get(base_url+'bootstrap-static/').json()

In [3]:
def get_season_history(player_id):
    '''get all past season info for a given player_id'''
    
    # send GET request to
    # https://fantasy.premierleague.com/api/element-summary/{PID}/
    r = requests.get(
            base_url + 'element-summary/' + str(player_id) + '/'
    ).json()
    
    # extract 'history_past' data from response into dataframe
    df = pd.json_normalize(r['history_past'])
    df['player_id'] = player_id
    
    return df

In [4]:
# create players dataframe
players = pd.json_normalize(r['elements'])
# create teams dataframe
teams = pd.json_normalize(r['teams'])
# get position information from 'element_types' field
positions = pd.json_normalize(r['element_types'])

In [5]:
# select columns of interest from players df
players = players[
    ['id', 'first_name', 'second_name', 'web_name', 'team',
     'element_type']
]

# join team name
players = players.merge(
    teams[['id', 'name']],
    left_on='team',
    right_on='id',
    suffixes=['_player', None]
).drop(['team', 'id'], axis=1)

# join player positions
players = players.merge(
    positions[['id', 'singular_name_short']],
    left_on='element_type',
    right_on='id'
).drop(['element_type', 'id'], axis=1)

players.head()

Unnamed: 0,id_player,first_name,second_name,web_name,name,singular_name_short
0,1,Bernd,Leno,Leno,Arsenal,GKP
1,2,Rúnar Alex,Rúnarsson,Rúnarsson,Arsenal,GKP
2,532,Karl,Hein,Hein,Arsenal,GKP
3,559,Aaron,Ramsdale,Ramsdale,Arsenal,GKP
4,572,Arthur,Okonkwo,Okonkwo,Arsenal,GKP


In [6]:
# get gameweek histories for each player
points = players['id_player'].progress_apply(get_season_history)

# combine results into single dataframe
points = pd.concat(df for df in points)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=714.0), HTML(value='')))




In [7]:
points

Unnamed: 0,season_name,element_code,start_cost,end_cost,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,player_id
0,2018/19,80201.0,50.0,49.0,106.0,2835.0,0.0,0.0,6.0,42.0,0.0,0.0,0.0,0.0,0.0,105.0,5.0,568.0,807.2,0.0,0.0,80.5,1
1,2019/20,80201.0,50.0,48.0,114.0,2649.0,0.0,0.0,7.0,39.0,0.0,1.0,0.0,2.0,0.0,113.0,10.0,591.0,843.2,0.0,0.0,84.1,1
2,2020/21,80201.0,50.0,50.0,131.0,3131.0,0.0,0.0,11.0,37.0,1.0,1.0,0.0,0.0,1.0,86.0,11.0,625.0,702.2,0.0,2.0,70.3,1
0,2020/21,115918.0,45.0,44.0,1.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,8.0,16.6,0.0,0.0,1.7,2
0,2016/17,225321.0,40.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,559
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,2019/20,441271.0,40.0,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,443
1,2020/21,441271.0,45.0,44.0,13.0,573.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,80.0,100.4,59.7,26.0,18.7,443
0,2020/21,448514.0,50.0,50.0,54.0,1400.0,1.0,1.0,3.0,23.0,0.0,0.0,0.0,1.0,0.0,0.0,6.0,264.0,243.2,234.5,196.0,67.4,470
0,2020/21,481626.0,40.0,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,549


In [8]:
players

Unnamed: 0,id_player,first_name,second_name,web_name,name,singular_name_short
0,1,Bernd,Leno,Leno,Arsenal,GKP
1,2,Rúnar Alex,Rúnarsson,Rúnarsson,Arsenal,GKP
2,532,Karl,Hein,Hein,Arsenal,GKP
3,559,Aaron,Ramsdale,Ramsdale,Arsenal,GKP
4,572,Arthur,Okonkwo,Okonkwo,Arsenal,GKP
...,...,...,...,...,...,...
709,470,Rayan,Ait Nouri,Ait Nouri,Wolves,DEF
710,549,Christian,Marques,Marques,Wolves,DEF
711,635,Hugo,Bueno,Bueno,Wolves,DEF
712,685,Toti António,Gomes,Toti,Wolves,DEF


In [10]:
# join web_name
points = players[['id_player', 'web_name', 'name', 'singular_name_short']].merge(
    points,
    left_on='id_player',
    right_on='player_id'
)

In [11]:
new_df = points[points['season_name']=='2020/21']

In [13]:
new_df[['id_player','web_name', 'name', 'singular_name_short', 'end_cost', 'total_points']]

Unnamed: 0,id_player,web_name,name,singular_name_short,end_cost,total_points
2,1,Leno,Arsenal,GKP,50.0,131.0
3,2,Rúnarsson,Arsenal,GKP,44.0,1.0
7,559,Ramsdale,Arsenal,GKP,46.0,123.0
13,28,Steer,Aston Villa,GKP,39.0,0.0
20,30,Martínez,Aston Villa,GKP,53.0,186.0
...,...,...,...,...,...,...
2233,439,Kilman,Wolves,DEF,38.0,58.0
2235,443,Hoever,Wolves,DEF,44.0,13.0
2236,470,Ait Nouri,Wolves,DEF,50.0,54.0
2237,549,Marques,Wolves,DEF,39.0,0.0
