In [222]:
# imports
import requests
import pandas as pd
import numpy as np

In [223]:
# gets data from API
def fetchData(url):
    try:
        r = requests.get(url)
        return r.json()
    except:
        return None

In [224]:
# calls fetchData function for chosen URL
general_info_json = fetchData('https://fantasy.premierleague.com/api/bootstrap-static/')
fixtures_json = fetchData('https://fantasy.premierleague.com/api/fixtures/')
#players_detailed_json = fetchData('https://fantasy.premierleague.com/api/element-summary/{' + element_id + '}/')
#gameweek_live_json = fetchData('https://fantasy.premierleague.com/api/event/{' + event_id + '}/live/')

In [225]:
# gets a list of keys of the json
general_info_json.keys()

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])

In [226]:
# gets a list of keys of the json
fixtures_json[0].keys()

dict_keys(['code', 'event', 'finished', 'finished_provisional', 'id', 'kickoff_time', 'minutes', 'provisional_start_time', 'started', 'team_a', 'team_a_score', 'team_h', 'team_h_score', 'stats', 'team_h_difficulty', 'team_a_difficulty', 'pulse_id'])

In [227]:
events_df = pd.DataFrame(general_info_json['events'])

In [228]:
# shows how many values are null for each column
events_df.isnull().sum()

id                            0
name                          0
deadline_time                 0
average_entry_score           0
finished                      0
data_checked                  0
highest_scoring_entry        30
deadline_time_epoch           0
deadline_time_game_offset     0
highest_score                30
is_previous                   0
is_current                    0
is_next                       0
chip_plays                    0
most_selected                30
most_transferred_in          30
top_element                  30
top_element_info             30
transfers_made                0
most_captained               30
most_vice_captained          30
dtype: int64

In [229]:
# creates 2 dataframes with rows split based on whether the value in "finished" is True or False
data_checked = events_df.groupby(events_df.finished)
data_checked_df = data_checked.get_group(True)
data_not_checked_df = data_checked.get_group(False)

# https://www.delftstack.com/howto/python-pandas/split-pandas-dataframe/

In [230]:
data_checked_df.isnull().sum()

id                           0
name                         0
deadline_time                0
average_entry_score          0
finished                     0
data_checked                 0
highest_scoring_entry        0
deadline_time_epoch          0
deadline_time_game_offset    0
highest_score                0
is_previous                  0
is_current                   0
is_next                      0
chip_plays                   0
most_selected                0
most_transferred_in          0
top_element                  0
top_element_info             0
transfers_made               0
most_captained               0
most_vice_captained          0
dtype: int64

No missing values in data_checked_df (all missing values moved to data_not_checked_df)

In [231]:
data_not_checked_df.isnull().sum()

id                            0
name                          0
deadline_time                 0
average_entry_score           0
finished                      0
data_checked                  0
highest_scoring_entry        30
deadline_time_epoch           0
deadline_time_game_offset     0
highest_score                30
is_previous                   0
is_current                    0
is_next                       0
chip_plays                    0
most_selected                30
most_transferred_in          30
top_element                  30
top_element_info             30
transfers_made                0
most_captained               30
most_vice_captained          30
dtype: int64

In [232]:
# removes all columns with missing data from data_not_checked_df
try:
    data_not_checked_df.dropna(axis=1, inplace=True)
except:
    pass
data_not_checked_df.isnull().sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


id                           0
name                         0
deadline_time                0
average_entry_score          0
finished                     0
data_checked                 0
deadline_time_epoch          0
deadline_time_game_offset    0
is_previous                  0
is_current                   0
is_next                      0
chip_plays                   0
transfers_made               0
dtype: int64

This proves that the removed columns from data_not_checked_df were missing values in certain columns, as those held data which can only exist once the data has been checked (as suspected). Since we have proven this, we can refactor events_df into two new dataframes, one containing the general event data and the other containing detailed data and stats, only for the events which have had the data checked.

In [233]:
# general event data (checked and non-checked data)
general_events_info_df = events_df[['id', 'name', 'deadline_time', 'is_previous', 'is_current', 'is_next', 'finished', 'data_checked']]

In [234]:
general_events_info_df.isnull().sum()

id               0
name             0
deadline_time    0
is_previous      0
is_current       0
is_next          0
finished         0
data_checked     0
dtype: int64

No missing data as data can exist without the data being checked and updated once the event is finished.

In [235]:
# detailed data and stats (checked data only)
general_events_stats_df = data_checked_df[['id', 'average_entry_score', 'deadline_time_epoch', 'deadline_time_game_offset', 'chip_plays', 'transfers_made']]

In [236]:
general_events_stats_df.isnull().sum()

id                           0
average_entry_score          0
deadline_time_epoch          0
deadline_time_game_offset    0
chip_plays                   0
transfers_made               0
dtype: int64

We now have the two dataframes as planned with no missing data.

In [263]:
phases_df = pd.DataFrame(general_info_json['phases'])
phases_df.isnull().sum()

id             0
name           0
start_event    0
stop_event     0
dtype: int64

In [266]:
teams_df = pd.DataFrame(general_info_json['teams'])
teams_df.isnull().sum()

code                      0
draw                      0
form                     20
id                        0
loss                      0
name                      0
played                    0
points                    0
position                  0
short_name                0
strength                  0
team_division            20
unavailable               0
win                       0
strength_overall_home     0
strength_overall_away     0
strength_attack_home      0
strength_attack_away      0
strength_defence_home     0
strength_defence_away     0
pulse_id                  0
dtype: int64

In [267]:
def calculateTeamForm():
    return

In [238]:
#creates data frame
elements_df = pd.DataFrame(general_info_json['elements'])

In [239]:
elements_types_df = pd.DataFrame(general_info_json['element_types'])

In [240]:
elements_df['position'] = elements_df.element_type.map(elements_types_df.set_index('id').singular_name)

In [241]:
elements_df.columns

Index(['chance_of_playing_next_round', 'chance_of_playing_this_round', 'code',
       'cost_change_event', 'cost_change_event_fall', 'cost_change_start',
       'cost_change_start_fall', 'dreamteam_count', 'element_type', 'ep_next',
       'ep_this', 'event_points', 'first_name', 'form', 'id', 'in_dreamteam',
       'news', 'news_added', 'now_cost', 'photo', 'points_per_game',
       'second_name', 'selected_by_percent', 'special', 'squad_number',
       'status', 'team', 'team_code', 'total_points', 'transfers_in',
       'transfers_in_event', 'transfers_out', 'transfers_out_event',
       'value_form', 'value_season', 'web_name', 'minutes', 'goals_scored',
       'assists', 'clean_sheets', 'goals_conceded', 'own_goals',
       'penalties_saved', 'penalties_missed', 'yellow_cards', 'red_cards',
       'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat',
       'ict_index', 'influence_rank', 'influence_rank_type', 'creativity_rank',
       'creativity_rank_type', 'threat_rank'

In [242]:
fantasy_stats_df = elements_df[['first_name', 'second_name','team','position','selected_by_percent','now_cost','transfers_in','transfers_out','value_season', 'total_points']]

In [243]:
#player_cost_df = elements_df[['id','second_name','cost_change_event','cost_change_event_fall','cost_change_start','now_cost']]
#chance_of_playing_df = elements_df[['id','second_name','chance_of_playing_next_round','chance_of_playing_this_round']]

In [244]:
#final_team = {'GK':0,'GK2':0,'DEF1':0,'DEF2':0,'DEF3':0,'DEF4':0,'DEF5':0,'MID1':0,'MID2':0,'MID3':0,'MID4':0,'MID5':0,'FOR1':0,'FOR2':0,'FOR3':0,}

In [245]:
choosing_team_df = elements_df[['id','first_name','second_name','team','position','now_cost','status','total_points','value_season']]

In [246]:
# removes all players who currently are not 100% available for the next game
choosing_team_df = choosing_team_df[choosing_team_df.status == 'a']

# removes all players who currently has no points
choosing_team_df = choosing_team_df[choosing_team_df.total_points != '0.0']

In [247]:
#choosing_team_df.replace({'element_type' : { 1 : 'GK', 2 : 'DEF', 3 : 'MID' , 4 : 'FOR'}})

In [248]:
# best value GKs
choosing_team_df[choosing_team_df.position == 'Goalkeeper'].sort_values(by=['value_season'], ascending = False).head(10)

Unnamed: 0,id,first_name,second_name,team,position,now_cost,status,total_points,value_season
73,80,David,Raya Martin,3,Goalkeeper,46,a,32,7.0
115,69,Robert,Sánchez,4,Goalkeeper,46,a,31,6.7
490,353,Hugo,Lloris,17,Goalkeeper,55,a,36,6.5
610,475,José,Malheiro de Sá,20,Goalkeeper,50,a,32,6.4
186,143,Edouard,Mendy,6,Goalkeeper,61,a,39,6.4
465,334,Alex,McCarthy,16,Goalkeeper,45,a,28,6.2
194,146,Vicente,Guaita,7,Goalkeeper,45,a,28,6.2
378,270,David,de Gea,13,Goalkeeper,51,a,30,5.9
357,257,Ederson,Santana de Moraes,12,Goalkeeper,60,a,35,5.8
328,231,Alisson,Ramses Becker,11,Goalkeeper,60,a,35,5.8


In [249]:
# best value DEFs
choosing_team_df[choosing_team_df.position == 'Defender'].sort_values(by=['value_season'], ascending = False).head(10)

Unnamed: 0,id,first_name,second_name,team,position,now_cost,status,total_points,value_season
103,56,Shane,Duffy,4,Defender,44,a,40,9.1
84,91,Ethan,Pinnock,3,Defender,46,a,40,8.7
69,76,Pontus,Jansson,3,Defender,47,a,40,8.5
356,256,João Pedro Cavaco,Cancelo,12,Defender,63,a,50,7.9
188,527,Trevoh,Chalobah,6,Defender,48,a,38,7.9
106,59,Lewis,Dunk,4,Defender,50,a,38,7.6
163,119,César,Azpilicueta,6,Defender,62,a,42,6.8
506,370,Sergio,Reguilón,17,Defender,51,a,34,6.7
166,122,Marcos,Alonso,6,Defender,59,a,39,6.6
362,262,Rúben Santos,Gato Alves Dias,12,Defender,62,a,40,6.5


In [250]:
# best value MIDs
choosing_team_df[choosing_team_df.position == 'Midfielder'].sort_values(by=['value_season'], ascending = False).head(10)

Unnamed: 0,id,first_name,second_name,team,position,now_cost,status,total_points,value_season
243,482,Andros,Townsend,8,Midfielder,57,a,45,7.9
572,419,Saïd,Benrahma,19,Midfielder,66,a,47,7.1
169,125,Mateo,Kovacic,6,Midfielder,52,a,35,6.7
542,399,Ismaila,Sarr,18,Midfielder,63,a,42,6.7
244,484,Demarai,Gray,8,Midfielder,58,a,38,6.6
330,233,Mohamed,Salah,11,Midfielder,128,a,83,6.5
193,144,Conor,Gallagher,7,Midfielder,57,a,36,6.3
501,365,Pierre-Emile,Højbjerg,17,Midfielder,49,a,29,5.9
268,210,Youri,Tielemans,9,Midfielder,64,a,37,5.8
395,289,Mason,Greenwood,13,Midfielder,76,a,43,5.7


In [251]:
# best value FORs
choosing_team_df[choosing_team_df.position == 'Forward'].sort_values(by=['value_season'], ascending = False).head(10)

Unnamed: 0,id,first_name,second_name,team,position,now_cost,status,total_points,value_season
566,413,Michail,Antonio,19,Forward,81,a,50,6.2
421,307,Allan,Saint-Maximin,14,Forward,68,a,39,5.7
263,205,Jamie,Vardy,9,Forward,106,a,56,5.3
110,63,Neal,Maupay,4,Forward,66,a,34,5.2
71,78,Ivan,Toney,3,Forward,64,a,33,5.2
549,450,Emmanuel,Dennis,18,Forward,52,a,26,5.0
363,263,Gabriel Fernando,de Jesus,12,Forward,86,a,41,4.8
61,337,Danny,Ings,2,Forward,78,a,37,4.7
615,583,Hee-Chan,Hwang,20,Forward,56,a,26,4.6
436,315,Teemu,Pukki,15,Forward,59,a,24,4.1


In [252]:
# best value players
choosing_team_df.sort_values(by=['value_season'], ascending = False).head(10)

Unnamed: 0,id,first_name,second_name,team,position,now_cost,status,total_points,value_season
103,56,Shane,Duffy,4,Defender,44,a,40,9.1
84,91,Ethan,Pinnock,3,Defender,46,a,40,8.7
69,76,Pontus,Jansson,3,Defender,47,a,40,8.5
188,527,Trevoh,Chalobah,6,Defender,48,a,38,7.9
356,256,João Pedro Cavaco,Cancelo,12,Defender,63,a,50,7.9
243,482,Andros,Townsend,8,Midfielder,57,a,45,7.9
106,59,Lewis,Dunk,4,Defender,50,a,38,7.6
572,419,Saïd,Benrahma,19,Midfielder,66,a,47,7.1
73,80,David,Raya Martin,3,Goalkeeper,46,a,32,7.0
163,119,César,Azpilicueta,6,Defender,62,a,42,6.8


In [253]:
position_points_value_df = elements_df[['status', 'minutes', 'position', 'total_points', 'value_season']]
position_points_value_df['value'] = position_points_value_df.value_season.astype(float)
position_points_value_df = position_points_value_df[position_points_value_df.status != 'u']
position_points_value_df = position_points_value_df[position_points_value_df.minutes != 0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [254]:
# total points per position on average
# only including players currently in the premier league and who have played 1 min+
position_points_df_pivot = pd.pivot_table(data=position_points_value_df[['position', 'total_points']],index=['position'])
position_points_df_pivot.sort_values(by=['total_points'], ascending = False)

Unnamed: 0_level_0,total_points
position,Unnamed: 1_level_1
Goalkeeper,20.857143
Forward,17.192982
Defender,14.973154
Midfielder,14.941176


In [255]:
# value per position on average
# only including players currently in the premier league and who have played 1 min+
position_value_df_pivot = pd.pivot_table(data=position_points_value_df[['position', 'value']],index=['position'])
position_value_df_pivot.sort_values(by=['value'], ascending = False)

Unnamed: 0_level_0,value
position,Unnamed: 1_level_1
Goalkeeper,4.153571
Defender,3.03557
Midfielder,2.517112
Forward,2.414035


Goalkeepers seem to get the most points on average (likely skewed due to the lower amount of goalkeepers included compared to other positions) and they are also the best in terms of value.

Forwards on average get a high amount of points but are the worst in terms of value (most high scoring forwards are the most expensive players in the game)

In [256]:
position_points_value_df.groupby('position').size()

position
Defender      149
Forward        57
Goalkeeper     28
Midfielder    187
dtype: int64

The count of players per position shows how little goalkeepers are included in this compared to other positions

In [257]:
# best value FORs
choosing_team_df[choosing_team_df.position == 'Goalkeeper'].sort_values(by=['total_points'], ascending = False).head(2)

Unnamed: 0,id,first_name,second_name,team,position,now_cost,status,total_points,value_season
186,143,Edouard,Mendy,6,Goalkeeper,61,a,39,6.4
490,353,Hugo,Lloris,17,Goalkeeper,55,a,36,6.5


In [258]:
choosing_team_df[choosing_team_df.position == 'Defender'].sort_values(by=['total_points'], ascending = False).head(5)

Unnamed: 0,id,first_name,second_name,team,position,now_cost,status,total_points,value_season
356,256,João Pedro Cavaco,Cancelo,12,Defender,63,a,50,7.9
163,119,César,Azpilicueta,6,Defender,62,a,42,6.8
334,237,Trent,Alexander-Arnold,11,Defender,75,a,41,5.5
326,229,Virgil,van Dijk,11,Defender,66,a,41,6.2
103,56,Shane,Duffy,4,Defender,44,a,40,9.1


In [259]:
choosing_team_df[choosing_team_df.position == 'Midfielder'].sort_values(by=['total_points'], ascending = False).head(5)

Unnamed: 0,id,first_name,second_name,team,position,now_cost,status,total_points,value_season
330,233,Mohamed,Salah,11,Midfielder,128,a,83,6.5
495,359,Heung-Min,Son,17,Midfielder,101,a,51,5.0
327,230,Sadio,Mané,11,Midfielder,119,a,49,4.1
572,419,Saïd,Benrahma,19,Midfielder,66,a,47,7.1
243,482,Andros,Townsend,8,Midfielder,57,a,45,7.9


In [260]:
choosing_team_df[choosing_team_df.position == 'Forward'].sort_values(by=['total_points'], ascending = False).head(3)

Unnamed: 0,id,first_name,second_name,team,position,now_cost,status,total_points,value_season
263,205,Jamie,Vardy,9,Forward,106,a,56,5.3
566,413,Michail,Antonio,19,Forward,81,a,50,6.2
363,263,Gabriel Fernando,de Jesus,12,Forward,86,a,41,4.8
