In [1]:
# imports
import requests
import pandas as pd
import appconstants
import managejson
from api import fetchData
from data import getPlayerGameweekData
pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
# gets data from API
'''
def fetchData(url):
    try:
        r = requests.get(url)
        return r.json()
    except:
        return None
'''

'\ndef fetchData(url):\n    try:\n        r = requests.get(url)\n        return r.json()\n    except:\n        return None\n'

In [3]:
# calls fetchData function for chosen URL
general_info_json = fetchData('https://fantasy.premierleague.com/api/bootstrap-static/', None)
fixtures_json = fetchData('https://fantasy.premierleague.com/api/fixtures/', None)
#players_detailed_json = fetchData('https://fantasy.premierleague.com/api/element-summary/{' + element_id + '}/', None)
#gameweek_live_json = fetchData('https://fantasy.premierleague.com/api/event/{' + event_id + '}/live/')

manager_basic_info = fetchData('https://fantasy.premierleague.com/api/entry/882831/', None)
manager_history = fetchData('https://fantasy.premierleague.com/api/entry/123457/history/', None)

In [4]:
# gets a list of keys of the json
general_info_json.keys()

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])

In [5]:
events_df = pd.DataFrame(general_info_json['events'])

In [6]:
# shows how many values are null for each column
events_df.isnull().sum()

id                           0
name                         0
deadline_time                0
average_entry_score          0
finished                     0
data_checked                 0
highest_scoring_entry        8
deadline_time_epoch          0
deadline_time_game_offset    0
highest_score                8
is_previous                  0
is_current                   0
is_next                      0
cup_leagues_created          0
h2h_ko_matches_created       0
chip_plays                   0
most_selected                8
most_transferred_in          8
top_element                  8
top_element_info             8
transfers_made               0
most_captained               8
most_vice_captained          8
dtype: int64

In [7]:
# creates 2 dataframes with rows split based on whether the value in "finished" is True or False
data_checked = events_df.groupby(events_df.finished)
data_checked_df = data_checked.get_group(True)
data_not_checked_df = data_checked.get_group(False)

# https://www.delftstack.com/howto/python-pandas/split-pandas-dataframe/

In [8]:
data_checked_df.isnull().sum()

id                           0
name                         0
deadline_time                0
average_entry_score          0
finished                     0
data_checked                 0
highest_scoring_entry        0
deadline_time_epoch          0
deadline_time_game_offset    0
highest_score                0
is_previous                  0
is_current                   0
is_next                      0
cup_leagues_created          0
h2h_ko_matches_created       0
chip_plays                   0
most_selected                0
most_transferred_in          0
top_element                  0
top_element_info             0
transfers_made               0
most_captained               0
most_vice_captained          0
dtype: int64

No missing values in data_checked_df (all missing values moved to data_not_checked_df)

In [9]:
data_not_checked_df.isnull().sum()

id                           0
name                         0
deadline_time                0
average_entry_score          0
finished                     0
data_checked                 0
highest_scoring_entry        8
deadline_time_epoch          0
deadline_time_game_offset    0
highest_score                8
is_previous                  0
is_current                   0
is_next                      0
cup_leagues_created          0
h2h_ko_matches_created       0
chip_plays                   0
most_selected                8
most_transferred_in          8
top_element                  8
top_element_info             8
transfers_made               0
most_captained               8
most_vice_captained          8
dtype: int64

In [10]:
events_general_info_df = events_df[['id', 'name', 'is_previous', 'is_current', 'is_next', 'finished', 'data_checked']]
events_general_info_df.isnull().sum()

id              0
name            0
is_previous     0
is_current      0
is_next         0
finished        0
data_checked    0
dtype: int64

In [11]:
events_deadline_df = events_df[['id', 'deadline_time', 'deadline_time_epoch', 'deadline_time_game_offset']]
events_general_info_df.isnull().sum()

id              0
name            0
is_previous     0
is_current      0
is_next         0
finished        0
data_checked    0
dtype: int64

In [12]:
events_user_scored_df = events_df[['id', 'average_entry_score', 'highest_scoring_entry', 'highest_score']]
events_user_scored_df.isnull().sum()

id                       0
average_entry_score      0
highest_scoring_entry    8
highest_score            8
dtype: int64

In [13]:
try:
    events_user_scored_df.dropna(inplace=True, axis=0)
except:
    pass
events_user_scored_df.isnull().sum()

id                       0
average_entry_score      0
highest_scoring_entry    0
highest_score            0
dtype: int64

In [14]:
events_user_management_df = events_df[['id', 'chip_plays', 'most_selected', 'most_transferred_in', 'top_element', 'top_element_info', 'transfers_made', 'most_captained', 'most_vice_captained']]
events_user_management_df.isnull().sum()

id                     0
chip_plays             0
most_selected          8
most_transferred_in    8
top_element            8
top_element_info       8
transfers_made         0
most_captained         8
most_vice_captained    8
dtype: int64

In [15]:
try:
    events_user_management_df.dropna(inplace=True, axis=0)
except:
    pass
events_user_management_df.isnull().sum()

id                     0
chip_plays             0
most_selected          0
most_transferred_in    0
top_element            0
top_element_info       0
transfers_made         0
most_captained         0
most_vice_captained    0
dtype: int64

In [16]:
game_settings_full_dict = general_info_json.get('game_settings')
game_settings_full_dict.keys()

dict_keys(['league_join_private_max', 'league_join_public_max', 'league_max_size_public_classic', 'league_max_size_public_h2h', 'league_max_size_private_h2h', 'league_max_ko_rounds_private_h2h', 'league_prefix_public', 'league_points_h2h_win', 'league_points_h2h_lose', 'league_points_h2h_draw', 'league_ko_first_instead_of_random', 'cup_start_event_id', 'cup_stop_event_id', 'cup_qualifying_method', 'cup_type', 'squad_squadplay', 'squad_squadsize', 'squad_team_limit', 'squad_total_spend', 'ui_currency_multiplier', 'ui_use_special_shirts', 'ui_special_shirt_exclusions', 'stats_form_days', 'sys_vice_captain_enabled', 'transfers_cap', 'transfers_sell_on_fee', 'league_h2h_tiebreak_stats', 'timezone'])

In [17]:
#def moveKey(dictionary, key, newDictionary):
#    if key in dictionary:
#        newDictionary[key] = dictionary.get(key)
#        del dictionary[key]

In [18]:
#game_settings_league_dict = {}
#for key in ['league_join_private_max', 'league_join_public_max', 'league_max_size_public_classic', 'league_max_size_public_h2h', 'league_max_size_private_h2h', 'league_max_size_private_h2h', 'league_max_ko_rounds_private_h2h', 'league_prefix_public', 'league_points_h2h_win', 'league_points_h2h_lose', 'league_points_h2h_draw', 'league_ko_first_instead_of_random', ]:
#    moveKey(game_settings_full_dict, key, game_settings_league_dict)

#game_settings_full_dict.keys(), game_settings_league_dict.keys()

In [19]:
phases_df = pd.DataFrame(general_info_json['phases'])
phases_df.isnull().sum()

id             0
name           0
start_event    0
stop_event     0
dtype: int64

In [20]:
teams_df = pd.DataFrame(general_info_json['teams'])
teams_df.isnull().sum()

code                      0
draw                      0
form                     20
id                        0
loss                      0
name                      0
played                    0
points                    0
position                  0
short_name                0
strength                  0
team_division            20
unavailable               0
win                       0
strength_overall_home     0
strength_overall_away     0
strength_attack_home      0
strength_attack_away      0
strength_defence_home     0
strength_defence_away     0
pulse_id                  0
dtype: int64

We are not dropping the 'form' column as that is something which will be useful in our models, despite the missing values (we will calculate our own values)

In [21]:
# Need to work out the best way to calculate form of a team.
def calculateTeamForm():
    return

In [22]:

try:
    teams_df.drop('team_division', inplace=True, axis=1)
except:
    pass
teams_df.isnull().sum()

code                      0
draw                      0
form                     20
id                        0
loss                      0
name                      0
played                    0
points                    0
position                  0
short_name                0
strength                  0
unavailable               0
win                       0
strength_overall_home     0
strength_overall_away     0
strength_attack_home      0
strength_attack_away      0
strength_defence_home     0
strength_defence_away     0
pulse_id                  0
dtype: int64

In [23]:
teams_results_df = teams_df[['code', 'position', 'played', 'win', 'draw', 'loss', 'points']]

In [24]:
teams_name_dict = teams_df[['code', 'id', 'name']].set_index('code')['name'].to_dict

In [25]:
teams_short_name_dict = teams_df[['code', 'short_name']].set_index('code')['short_name'].to_dict

In [26]:
teams_rating_df = teams_df[['code', 'strength', 'strength_attack_home', 'strength_defence_home', 'strength_overall_home', 'strength_attack_away', 'strength_defence_away', 'strength_overall_away']]

In [27]:
teams_unavailable_dict = teams_df[['code', 'unavailable']].set_index('code')['unavailable'].to_dict

In [28]:
teams_pulse_id_dict = teams_df[['code', 'pulse_id']].set_index('code')['pulse_id'].to_dict

In [29]:
total_players = general_info_json.get('total_players')

In [30]:
elements_df = pd.DataFrame(general_info_json['elements'])

In [31]:
elements_df.keys()

Index(['chance_of_playing_next_round', 'chance_of_playing_this_round', 'code',
       'cost_change_event', 'cost_change_event_fall', 'cost_change_start',
       'cost_change_start_fall', 'dreamteam_count', 'element_type', 'ep_next',
       'ep_this', 'event_points', 'first_name', 'form', 'id', 'in_dreamteam',
       'news', 'news_added', 'now_cost', 'photo', 'points_per_game',
       'second_name', 'selected_by_percent', 'special', 'squad_number',
       'status', 'team', 'team_code', 'total_points', 'transfers_in',
       'transfers_in_event', 'transfers_out', 'transfers_out_event',
       'value_form', 'value_season', 'web_name', 'minutes', 'goals_scored',
       'assists', 'clean_sheets', 'goals_conceded', 'own_goals',
       'penalties_saved', 'penalties_missed', 'yellow_cards', 'red_cards',
       'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat',
       'ict_index', 'influence_rank', 'influence_rank_type', 'creativity_rank',
       'creativity_rank_type', 'threat_rank'

In [32]:
player_id_df = elements_df[['id', 'code', 'first_name', 'second_name', 'web_name', 'photo']]
player_id_df.isnull().sum()

id             0
code           0
first_name     0
second_name    0
web_name       0
photo          0
dtype: int64

In [33]:
player_team_position_df = elements_df[['id', 'team_code', 'element_type']]
player_team_position_df.isnull().sum()

id              0
team_code       0
element_type    0
dtype: int64

In [34]:
player_cost_df = elements_df[['id', 'now_cost', 'cost_change_start', 'cost_change_start_fall', 'cost_change_event', 'cost_change_event_fall']]
player_cost_df.isnull().sum()

id                        0
now_cost                  0
cost_change_start         0
cost_change_start_fall    0
cost_change_event         0
cost_change_event_fall    0
dtype: int64

In [35]:
player_status_df = elements_df[['id', 'status']]
player_status_df.isnull().sum()

id        0
status    0
dtype: int64

In [36]:
player_points_df = elements_df[['id', 'event_points', 'total_points', 'bonus', 'bps']]
player_points_df.isnull().sum()

id              0
event_points    0
total_points    0
bonus           0
bps             0
dtype: int64

In [37]:
player_user_transfers_df = elements_df[['id', 'transfers_in', 'transfers_in_event', 'transfers_out', 'transfers_out_event', 'selected_by_percent']]
player_user_transfers_df.isnull().sum()

id                     0
transfers_in           0
transfers_in_event     0
transfers_out          0
transfers_out_event    0
selected_by_percent    0
dtype: int64

In [38]:
player_stats_df = elements_df[['id', 'minutes', 'goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'own_goals', 'penalties_saved', 'penalties_missed', 'yellow_cards', 'red_cards', 'saves']]
player_stats_df.isnull().sum()

id                  0
minutes             0
goals_scored        0
assists             0
clean_sheets        0
goals_conceded      0
own_goals           0
penalties_saved     0
penalties_missed    0
yellow_cards        0
red_cards           0
saves               0
dtype: int64

In [39]:
player_value_df = elements_df[['id', 'value_form', 'value_season', 'points_per_game']]
player_value_df.isnull().sum()

id                 0
value_form         0
value_season       0
points_per_game    0
dtype: int64

In [40]:
player_ep_ict_df = elements_df[['id', 'ep_next', 'ep_this', 'influence', 'influence_rank', 'creativity', 'creativity_rank', 'creativity_rank_type', 'threat', 'threat_rank', 'threat_rank_type', 'ict_index', 'ict_index_rank', 'ict_index_rank_type']]
player_ep_ict_df.isnull().sum()

id                      0
ep_next                 0
ep_this                 0
influence               0
influence_rank          0
creativity              0
creativity_rank         0
creativity_rank_type    0
threat                  0
threat_rank             0
threat_rank_type        0
ict_index               0
ict_index_rank          0
ict_index_rank_type     0
dtype: int64

In [41]:
player_set_piece_df = elements_df[['id', 'team_code', 'corners_and_indirect_freekicks_order', 'direct_freekicks_order', 'penalties_order']]
player_set_piece_df.isnull().sum(), player_set_piece_df.notnull().sum()

(id                                        0
 team_code                                 0
 corners_and_indirect_freekicks_order    628
 direct_freekicks_order                  632
 penalties_order                         648
 dtype: int64,
 id                                      714
 team_code                               714
 corners_and_indirect_freekicks_order     86
 direct_freekicks_order                   82
 penalties_order                          66
 dtype: int64)

Most of the data in this dataframe is missing, however, the data that we have stored is useful.

In [42]:
player_set_piece_df.corners_and_indirect_freekicks_order.unique(), player_set_piece_df.direct_freekicks_order.unique(), player_set_piece_df.penalties_order.unique()

(array([nan,  1.,  3.,  4.,  2.,  5.,  6.]),
 array([nan,  4.,  3.,  2.,  1.,  5.,  6.]),
 array([nan,  2.,  3.,  1.,  4.,  5.,  6.]))

From the unique values shown, we can see that there is a good amount of useful data. The missing data is due to the unlikelyhood of more than 5-7 players being missing from a game. To confirm that there is a decent set piece order stored for every team, we will create bar charts to show the spread of data.

In [43]:
player_set_piece_df.corners_and_indirect_freekicks_order.describe()

count    86.000000
mean      2.744186
std       1.364991
min       1.000000
25%       2.000000
50%       3.000000
75%       4.000000
max       6.000000
Name: corners_and_indirect_freekicks_order, dtype: float64

In [44]:
player_set_piece_df.direct_freekicks_order.describe()

count    82.000000
mean      2.756098
std       1.427775
min       1.000000
25%       2.000000
50%       3.000000
75%       4.000000
max       6.000000
Name: direct_freekicks_order, dtype: float64

In [45]:
player_set_piece_df.penalties_order.describe()

count    66.000000
mean      2.515152
std       1.267856
min       1.000000
25%       1.250000
50%       2.000000
75%       3.000000
max       6.000000
Name: penalties_order, dtype: float64

As we can see from these dataframe stats, every team has at least 1 player who they prefer to take a set piece over other players. Also, the mean values show that for most of the teams there is a decent sized order of preference. For that reason, we will keep the data we have, just change the way it is stored so that it is grouped by the team (soon).

In [46]:
try:
    player_set_piece_df.fillna(0, inplace=True)
except:
    pass
player_set_piece_df.isnull().sum()

id                                      0
team_code                               0
corners_and_indirect_freekicks_order    0
direct_freekicks_order                  0
penalties_order                         0
dtype: int64

In [47]:
news_df = elements_df[['id', 'news', 'news_added']]
news_df.isnull().sum()

id              0
news            0
news_added    163
dtype: int64

News is important, however, should only be used if recent and up to date as old news will skew the predictions. We will remove any data which does not have a timestamp.

In [48]:
try:
    news_df.dropna(inplace=True, axis=0)
except:
    pass
news_df.isnull().sum()

id            0
news          0
news_added    0
dtype: int64

In [49]:
player_dreamteam_df = elements_df[['id', 'dreamteam_count', 'in_dreamteam']]
player_dreamteam_df.isnull().sum()

id                 0
dreamteam_count    0
in_dreamteam       0
dtype: int64

In [50]:
chance_of_playing_df = elements_df[['id', 'status', 'chance_of_playing_this_round', 'chance_of_playing_next_round']]
chance_of_playing_df.isnull().sum()

id                                0
status                            0
chance_of_playing_this_round    165
chance_of_playing_next_round    163
dtype: int64

I suspect that the missing data in this dataframe is due to the status of players, we can check this by finding out the types of status and splitting the dataframe. 

In [51]:
chance_of_playing_df.status.unique()

array(['a', 'u', 'd', 'i', 's', 'n'], dtype=object)

In [52]:
status_grouped_df = chance_of_playing_df.groupby(chance_of_playing_df.status)
a_status_df = status_grouped_df.get_group('a') # available
u_status_df = status_grouped_df.get_group('u') # unavailable
i_status_df = status_grouped_df.get_group('i') # injured
d_status_df = status_grouped_df.get_group('d') # returning from injury soon
n_status_df = status_grouped_df.get_group('n') # other
s_status_df = status_grouped_df.get_group('s') # suspended

Now that the dataframes have been split, we can check which have the most missing data to see if one group is having a large affent on the amount, or whether it is random missing data.

In [53]:
a_status_df.isnull().sum(), u_status_df.isnull().sum(), i_status_df.isnull().sum(), d_status_df.isnull().sum(), n_status_df.isnull().sum(), s_status_df.isnull().sum()

(id                                0
 status                            0
 chance_of_playing_this_round    163
 chance_of_playing_next_round    163
 dtype: int64,
 id                              0
 status                          0
 chance_of_playing_this_round    0
 chance_of_playing_next_round    0
 dtype: int64,
 id                              0
 status                          0
 chance_of_playing_this_round    0
 chance_of_playing_next_round    0
 dtype: int64,
 id                              0
 status                          0
 chance_of_playing_this_round    2
 chance_of_playing_next_round    0
 dtype: int64,
 id                              0
 status                          0
 chance_of_playing_this_round    0
 chance_of_playing_next_round    0
 dtype: int64,
 id                              0
 status                          0
 chance_of_playing_this_round    0
 chance_of_playing_next_round    0
 dtype: int64)

This is the majority of the missing data, as suspected, the status plays a part in this. I assume it is difficult to gather predictions on whether or not an available player is going to play without further calculations.

For now, we can use this dataframe to store the chance of availability of players who are not 100% available (status != a)

In [54]:
# removing all rows of 100% available players.
chance_of_playing_df = chance_of_playing_df[chance_of_playing_df.status != 'a']
chance_of_playing_df.isnull().sum()

id                              0
status                          0
chance_of_playing_this_round    2
chance_of_playing_next_round    0
dtype: int64

As there is still a small amount of missing data which cannot be predicted accurately, it is easier to just presume the player will not be available, to avaid losing points in the event.

In [55]:
try:
    chance_of_playing_df.fillna(0, inplace=True)
except:
    pass
chance_of_playing_df.isnull().sum()

id                              0
status                          0
chance_of_playing_this_round    0
chance_of_playing_next_round    0
dtype: int64

In [56]:
element_stats_df = pd.DataFrame(general_info_json['element_stats'])
element_stats_df.isnull().sum()

label    0
name     0
dtype: int64

In [57]:
element_types_df = pd.DataFrame(general_info_json['element_types'])
element_types_df.isnull().sum()

id                      0
plural_name             0
plural_name_short       0
singular_name           0
singular_name_short     0
squad_select            0
squad_min_play          0
squad_max_play          0
ui_shirt_specific       0
sub_positions_locked    0
element_count           0
dtype: int64

In [58]:
# gets a list of keys of the json
fixtures_json[0].keys()

dict_keys(['code', 'event', 'finished', 'finished_provisional', 'id', 'kickoff_time', 'minutes', 'provisional_start_time', 'started', 'team_a', 'team_a_score', 'team_h', 'team_h_score', 'stats', 'team_h_difficulty', 'team_a_difficulty', 'pulse_id'])

In [59]:
fixtures_df = pd.DataFrame(fixtures_json)
fixtures_df.head()

Unnamed: 0,code,event,finished,finished_provisional,id,kickoff_time,minutes,provisional_start_time,started,team_a,team_a_score,team_h,team_h_score,stats,team_h_difficulty,team_a_difficulty,pulse_id
0,2210441,,False,False,171,,0,False,,5,,2,,[],2,3,66512
1,2210591,,False,False,321,,0,False,,11,,2,,[],4,3,66662
2,2210534,,False,False,264,,0,False,,9,,6,,[],3,5,66605
3,2210592,,False,False,322,,0,False,,7,,8,,[],2,2,66663
4,2210593,,False,False,323,,0,False,,6,,10,,[],4,2,66664


In [60]:
fixtures_event_code_df = fixtures_df[['id', 'code', 'event']]
fixtures_event_code_df.isnull().sum()

id       0
code     0
event    9
dtype: int64

In [61]:
fixtures_timings_df = fixtures_df[['id', 'kickoff_time', 'minutes', 'started', 'finished', 'finished_provisional']]
fixtures_timings_df.isnull().sum()

id                      0
kickoff_time            9
minutes                 0
started                 9
finished                0
finished_provisional    0
dtype: int64

In [62]:
fixtures_teams_df = fixtures_df[['id', 'team_h', 'team_a']]
fixtures_teams_df.isnull().sum()

id        0
team_h    0
team_a    0
dtype: int64

In [63]:
fixtures_scores_df = fixtures_df[['id', 'team_h_score', 'team_a_score']]
fixtures_scores_df.isnull().sum()

id               0
team_h_score    92
team_a_score    92
dtype: int64

In [64]:
try:
    fixtures_scores_df.dropna(inplace=True, axis=0)
except:
    pass
fixtures_scores_df.isnull().sum()

id              0
team_h_score    0
team_a_score    0
dtype: int64

In [65]:
fixtures_difficulty_df = fixtures_df[['id', 'team_h_difficulty', 'team_a_difficulty']]
fixtures_difficulty_df.isnull().sum()

id                   0
team_h_difficulty    0
team_a_difficulty    0
dtype: int64

In [66]:
gameweek_fdr = fixtures_df[['event', 'team_h', 'team_h_difficulty', 'team_a', 'team_a_difficulty']]

In [67]:
fdr_dict_temp = {'event': [], 'team_id': [], 'fdr': []}
for team in teams_df['id']:
    for event in events_df['id']:
        fdr_temp = fixtures_df[['event', 'team_h', 'team_h_difficulty', 'team_a', 'team_a_difficulty']]
        fdr_temp = fdr_temp[(fdr_temp['event'].isin([event, event + 1, event + 2])) & ((fdr_temp['team_h'] == team) | (fdr_temp['team_a'] == team))]
        fdr_count_temp = 0
        fixture_count_temp = 0
        for fixture in fdr_temp.values:
            fixture_count_temp += 1
            if fixture[1] == team:
                fdr_count_temp += fixture[2]
            else:
                fdr_count_temp += fixture[4]
        fdr_dict_temp.get('event').append(event)
        fdr_dict_temp.get('team_id').append(team)
        try:
            fdr_dict_temp.get('fdr').append(round(fdr_count_temp/fixture_count_temp, 1))
        except:
            fdr_dict_temp.get('fdr').append(0)

team_fdr = pd.DataFrame(fdr_dict_temp).sort_values(by=['event', 'team_id'], ignore_index=True)
team_fdr

Unnamed: 0,event,team_id,fdr
0,1,1,3.7
1,1,2,2.0
2,1,3,2.7
3,1,4,2.0
4,1,5,3.3
...,...,...,...
755,38,16,3.0
756,38,17,2.0
757,38,18,5.0
758,38,19,3.0


In [68]:
team_form_dict_temp = {'event': [], 'team_id': [], 'form': []}
data_checked_events_temp = events_df['id']
data_checked_events_temp = data_checked_events_temp[events_df['data_checked'] == True]
for team in teams_df['id']:
    for event in data_checked_events_temp:
        form_temp = fixtures_df[['event', 'team_h', 'team_h_score', 'team_a', 'team_a_score']]
        form_temp = form_temp[(form_temp['event'].isin([event, event - 1, event - 2, event - 3, event - 4])) & ((form_temp['team_h'] == team) | (form_temp['team_a'] == team))]
        form_count_temp = 0
        for fixture in form_temp.values:
            if fixture[1] == team:
                if fixture[2] > fixture[4]:
                    form_count_temp += 1
                elif fixture[2] < fixture[4]:
                    form_count_temp -= 1
            else:
                if fixture[2] < fixture[4]:
                    form_count_temp += 1
                elif fixture[2] > fixture[4]:
                    form_count_temp -= 1
        team_form_dict_temp.get('event').append(event)
        team_form_dict_temp.get('team_id').append(team)
        team_form_dict_temp.get('form').append(form_count_temp)

team_form = pd.DataFrame(team_form_dict_temp).sort_values(by=['event', 'team_id'], ignore_index=True)
team_form['team_id'] = team_form.team_id.map(teams_df.set_index('id').name)
team_form

Unnamed: 0,event,team_id,form
0,1,Arsenal,-1
1,1,Aston Villa,-1
2,1,Brentford,1
3,1,Brighton,1
4,1,Burnley,-1
...,...,...,...
595,30,Southampton,-1
596,30,Spurs,3
597,30,Watford,-1
598,30,West Ham,0


We can adapt the form calculation to include difficulty of game.

Home win = + Home difficulty

Home lose = - Away difficulty

Home draw = - 3 + Home difficulty

Away win = + Away difficulty

Away lose = - Home difficulty

Away draw = - 3 + Away difficulty

In [69]:
team_form_dict_temp = {'event': [], 'team': [], 'form': []}
data_checked_events_temp = events_df['id']
data_checked_events_temp = data_checked_events_temp[events_df['data_checked'] == True]
for team in teams_df['id']:
    for event in data_checked_events_temp:
        form_temp = fixtures_df[['event', 'team_h', 'team_h_score', 'team_h_difficulty', 'team_a', 'team_a_score', 'team_a_difficulty']]
        form_temp = form_temp[(form_temp['event'].isin([event, event - 1, event - 2, event - 3, event - 4])) & ((form_temp['team_h'] == team) | (form_temp['team_a'] == team))]
        form_count_temp = 0
        for fixture in form_temp.values:
            if fixture[1] == team: # if home team
                if fixture[2] > fixture[5]: # if home team win
                    form_count_temp += fixture[3]
                elif fixture[2] < fixture[5]: # if home team lose
                    form_count_temp -= fixture[6]
                else: # if draw
                    form_count_temp += (fixture[3] - 3)
            elif fixture[4] == team: # if away team
                if fixture[2] < fixture[5]: # if away team win
                    form_count_temp += fixture[6]
                elif fixture[2] > fixture[5]: # if away team lose
                    form_count_temp -= fixture[3]
                else: # if draw
                    form_count_temp += (fixture[6] - 3)
        team_form_dict_temp.get('event').append(event)
        team_form_dict_temp.get('team').append(team)
        team_form_dict_temp.get('form').append(form_count_temp)

team_form = pd.DataFrame(team_form_dict_temp).sort_values(by=['event', 'team'], ignore_index=True)
team_form['team'] = team_form.team.map(teams_df.set_index('id').name)
team_form

Unnamed: 0,event,team,form
0,1,Arsenal,-3.0
1,1,Aston Villa,-3.0
2,1,Brentford,3.0
3,1,Brighton,2.0
4,1,Burnley,-2.0
...,...,...,...
595,30,Southampton,-2.0
596,30,Spurs,9.0
597,30,Watford,0.0
598,30,West Ham,-1.0


In [70]:
team_form[team_form['event'] == 9].sort_values(by=['form'], ascending=False, ignore_index=True)

Unnamed: 0,event,team,form
0,9,Man City,11.0
1,9,Liverpool,9.0
2,9,Arsenal,7.0
3,9,Chelsea,5.0
4,9,Brentford,4.0
5,9,Wolves,2.0
6,9,West Ham,1.0
7,9,Leicester,0.0
8,9,Southampton,-1.0
9,9,Watford,-1.0


In [71]:
player_value_df_sorted = player_value_df[['id', 'value_season']].sort_values(by=['value_season'], ascending=False, ignore_index=True)
player_value_df_sorted

Unnamed: 0,id,value_season
0,510,9.8
1,153,9.8
2,186,9.8
3,302,9.7
4,291,9.6
...,...,...
709,653,0.0
710,312,0.0
711,182,0.0
712,628,0.0


In [72]:
top_15_inform_players = player_value_df_sorted['id'][:10]
top_15_inform_players

0    510
1    153
2    186
3    302
4    291
5    135
6      6
7    239
8    471
9    579
Name: id, dtype: int64

In [73]:
teams_df

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,strength,unavailable,win,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id
0,3,0,,1,0,Arsenal,0,0,0,ARS,4,False,0,1210,1270,1150,1210,1190,1220,1
1,7,0,,2,0,Aston Villa,0,0,0,AVL,3,False,0,1130,1160,1140,1110,1090,1090,2
2,94,0,,3,0,Brentford,0,0,0,BRE,2,False,0,1060,1070,1120,1150,1080,1120,130
3,36,0,,4,0,Brighton,0,0,0,BHA,3,False,0,1130,1130,1160,1160,1100,1120,131
4,90,0,,5,0,Burnley,0,0,0,BUR,2,False,0,1060,1050,1080,1130,1060,1100,43
5,8,0,,6,0,Chelsea,0,0,0,CHE,5,False,0,1320,1350,1300,1300,1250,1290,4
6,31,0,,7,0,Crystal Palace,0,0,0,CRY,3,False,0,1090,1100,1110,1160,1080,1160,6
7,11,0,,8,0,Everton,0,0,0,EVE,3,False,0,1090,1090,1130,1090,1110,1110,7
8,13,0,,9,0,Leicester,0,0,0,LEI,3,False,0,1160,1190,1100,1080,1150,1160,26
9,2,0,,10,0,Leeds,0,0,0,LEE,3,False,0,1090,1100,1080,1070,1100,1100,9


In [74]:
fixtures_df

Unnamed: 0,code,event,finished,finished_provisional,id,kickoff_time,minutes,provisional_start_time,started,team_a,team_a_score,team_h,team_h_score,stats,team_h_difficulty,team_a_difficulty,pulse_id
0,2210441,,False,False,171,,0,False,,5,,2,,[],2,3,66512
1,2210591,,False,False,321,,0,False,,11,,2,,[],4,3,66662
2,2210534,,False,False,264,,0,False,,9,,6,,[],3,5,66605
3,2210592,,False,False,322,,0,False,,7,,8,,[],2,2,66663
4,2210593,,False,False,323,,0,False,,6,,10,,[],4,2,66664
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,2210646,38.0,False,False,376,2022-05-22T15:00:00Z,0,False,False,13,,7,,[],3,2,66717
376,2210647,38.0,False,False,377,2022-05-22T15:00:00Z,0,False,False,16,,9,,[],2,3,66718
377,2210648,38.0,False,False,378,2022-05-22T15:00:00Z,0,False,False,20,,11,,[],3,5,66719
378,2210649,38.0,False,False,379,2022-05-22T15:00:00Z,0,False,False,2,,12,,[],3,5,66720


In [75]:
teams_df["goals_for"] = 0
teams_df["goals_against"] = 0
teams_df["goal_difference"] = 0

In [76]:
started_fixtures = fixtures_df[['team_h', 'team_h_score', 'team_a', 'team_a_score']]
started_fixtures = started_fixtures[fixtures_df['minutes'] != 0]

for index, row in started_fixtures.iterrows():
    teams_df.at[row['team_h']-1, 'played'] += 1
    teams_df.at[row['team_a']-1, 'played'] += 1
    if row['team_h_score'] > row['team_a_score']:
        teams_df.at[row['team_h']-1, 'win'] += 1
        teams_df.at[row['team_a']-1, 'loss'] += 1
        teams_df.at[row['team_h']-1, 'points'] += 3

    elif row['team_h_score'] == row['team_a_score']:
        teams_df.at[row['team_h']-1, 'draw'] += 1
        teams_df.at[row['team_a']-1, 'draw'] += 1
        teams_df.at[row['team_h']-1, 'points'] += 1
        teams_df.at[row['team_a']-1, 'points'] += 1

    else:
        teams_df.at[row['team_a']-1, 'win'] += 1
        teams_df.at[row['team_h']-1, 'loss'] += 1
        teams_df.at[row['team_a']-1, 'points'] += 3
    
    teams_df.at[row['team_h']-1, 'goals_for'] += row['team_h_score']
    teams_df.at[row['team_a']-1, 'goals_for'] += row['team_a_score']

    teams_df.at[row['team_h']-1, 'goals_against'] += row['team_a_score']
    teams_df.at[row['team_a']-1, 'goals_against'] += row['team_h_score']

for index, row in teams_df.iterrows():
    teams_df.at[index, 'goal_difference'] = row['goals_for'] - row['goals_against']

teams_df

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,...,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id,goals_for,goals_against,goal_difference
0,3,3,,1,8,Arsenal,28,54,0,ARS,...,1210,1270,1150,1210,1190,1220,1,44,31,13
1,7,3,,2,15,Aston Villa,29,36,0,AVL,...,1130,1160,1140,1110,1090,1090,2,41,40,1
2,94,6,,3,16,Brentford,30,30,0,BRE,...,1060,1070,1120,1150,1080,1120,130,33,47,-14
3,36,12,,4,10,Brighton,29,33,0,BHA,...,1130,1130,1160,1160,1100,1120,131,26,36,-10
4,90,12,,5,12,Burnley,27,21,0,BUR,...,1060,1050,1080,1130,1060,1100,43,22,38,-16
5,8,8,,6,3,Chelsea,28,59,0,CHE,...,1320,1350,1300,1300,1250,1290,4,57,19,38
6,31,13,,7,9,Crystal Palace,29,34,0,CRY,...,1090,1100,1110,1160,1080,1160,6,39,38,1
7,11,4,,8,16,Everton,27,25,0,EVE,...,1090,1090,1130,1090,1110,1110,7,29,47,-18
8,13,6,,9,11,Leicester,27,36,0,LEI,...,1160,1190,1100,1080,1150,1160,26,42,46,-4
9,2,8,,10,15,Leeds,30,29,0,LEE,...,1090,1100,1080,1070,1100,1100,9,34,67,-33


In [77]:
for index, row in teams_df.sort_values(by=['points', 'goal_difference', 'goals_for'], ascending=False, ignore_index=True).iterrows():
    teams_df.at[row['id']-1, 'position'] = index + 1

teams_df.sort_values(by=['points', 'goal_difference', 'goals_for'], ascending=False, ignore_index=True)

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,...,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id,goals_for,goals_against,goal_difference
0,43,4,,12,3,Man City,29,70,1,MCI,...,1350,1370,1340,1360,1340,1360,11,68,18,50
1,14,6,,11,2,Liverpool,29,69,2,LIV,...,1330,1350,1300,1350,1340,1350,10,75,20,55
2,8,8,,6,3,Chelsea,28,59,3,CHE,...,1320,1350,1300,1300,1250,1290,4,57,19,38
3,3,3,,1,8,Arsenal,28,54,4,ARS,...,1210,1270,1150,1210,1190,1220,1,44,31,13
4,6,3,,17,10,Spurs,29,51,5,TOT,...,1210,1270,1190,1200,1210,1210,21,47,36,11
5,1,8,,13,7,Man Utd,29,50,6,MUN,...,1210,1270,1170,1190,1190,1200,12,48,40,8
6,21,6,,19,10,West Ham,30,48,7,WHU,...,1200,1200,1110,1150,1160,1180,25,49,39,10
7,39,4,,20,12,Wolves,30,46,8,WOL,...,1200,1220,1150,1160,1080,1100,38,31,26,5
8,7,3,,2,15,Aston Villa,29,36,9,AVL,...,1130,1160,1140,1110,1090,1090,2,41,40,1
9,13,6,,9,11,Leicester,27,36,10,LEI,...,1160,1190,1100,1080,1150,1160,26,42,46,-4


In [78]:
league_table_df = teams_df[['position', 'id', 'played', 'win', 'draw', 'loss', 'goals_for', 'goals_against', 'goal_difference', 'points']].sort_values(by=['points', 'goal_difference', 'goals_for'], ascending=False, ignore_index=True)
league_table_df['id'] = league_table_df.id.map(teams_df.set_index('id').name)
league_table_df = league_table_df.rename(columns={"id": "team"})

league_table_df

Unnamed: 0,position,team,played,win,draw,loss,goals_for,goals_against,goal_difference,points
0,1,Man City,29,22,4,3,68,18,50,70
1,2,Liverpool,29,21,6,2,75,20,55,69
2,3,Chelsea,28,17,8,3,57,19,38,59
3,4,Arsenal,28,17,3,8,44,31,13,54
4,5,Spurs,29,16,3,10,47,36,11,51
5,6,Man Utd,29,14,8,7,48,40,8,50
6,7,West Ham,30,14,6,10,49,39,10,48
7,8,Wolves,30,14,4,12,31,26,5,46
8,9,Aston Villa,29,11,3,15,41,40,1,36
9,10,Leicester,27,10,6,11,42,46,-4,36


In [79]:
upcoming_fixtures = fixtures_df[['team_h', 'kickoff_time', 'team_a']]
upcoming_fixtures = upcoming_fixtures[fixtures_df['minutes'] == 0]
upcoming_fixtures['team_h'] = upcoming_fixtures.team_h.map(teams_df.set_index('id').name)
upcoming_fixtures['team_a'] = upcoming_fixtures.team_a.map(teams_df.set_index('id').name)
upcoming_fixtures[['date','time']] = upcoming_fixtures['kickoff_time'].str.split('T',expand=True)
upcoming_fixtures['time'] = upcoming_fixtures['time'].str[:-4]
upcoming_fixtures = upcoming_fixtures[['date', 'team_h', 'time', 'team_a']]
upcoming_fixtures

Unnamed: 0,date,team_h,time,team_a
0,,Aston Villa,,Burnley
1,,Aston Villa,,Liverpool
2,,Chelsea,,Leicester
3,,Everton,,Crystal Palace
4,,Leeds,,Chelsea
...,...,...,...,...
375,2022-05-22,Crystal Palace,15:00,Man Utd
376,2022-05-22,Leicester,15:00,Southampton
377,2022-05-22,Liverpool,15:00,Wolves
378,2022-05-22,Man City,15:00,Aston Villa


In [80]:
elements_df

Unnamed: 0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,ep_next,...,threat_rank,threat_rank_type,ict_index_rank,ict_index_rank_type,corners_and_indirect_freekicks_order,corners_and_indirect_freekicks_text,direct_freekicks_order,direct_freekicks_text,penalties_order,penalties_text
0,100.0,100.0,80201,0,0,-5,5,1,1,1.7,...,583,60,427,30,,,,,,
1,0.0,0.0,115918,0,0,0,0,0,1,0.0,...,478,19,533,54,,,,,,
2,0.0,0.0,47431,0,0,-2,2,0,3,0.0,...,710,294,710,294,,,,,,
3,0.0,0.0,54694,0,0,-4,4,0,4,0.0,...,43,17,116,24,,,,,,
4,100.0,100.0,58822,0,0,-3,3,0,2,3.3,...,322,106,305,101,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
709,0.0,0.0,428610,0,0,0,0,0,3,0.0,...,621,248,644,251,,,,,,
710,,,510362,0,0,0,0,0,2,0.5,...,654,238,448,162,,,,,,
711,0.0,0.0,225000,0,0,0,0,0,2,0.0,...,487,189,542,197,,,,,,
712,0.0,0.0,232351,0,0,0,0,0,3,0.0,...,609,242,633,245,,,,,,


In [81]:
elements_df.keys()

Index(['chance_of_playing_next_round', 'chance_of_playing_this_round', 'code',
       'cost_change_event', 'cost_change_event_fall', 'cost_change_start',
       'cost_change_start_fall', 'dreamteam_count', 'element_type', 'ep_next',
       'ep_this', 'event_points', 'first_name', 'form', 'id', 'in_dreamteam',
       'news', 'news_added', 'now_cost', 'photo', 'points_per_game',
       'second_name', 'selected_by_percent', 'special', 'squad_number',
       'status', 'team', 'team_code', 'total_points', 'transfers_in',
       'transfers_in_event', 'transfers_out', 'transfers_out_event',
       'value_form', 'value_season', 'web_name', 'minutes', 'goals_scored',
       'assists', 'clean_sheets', 'goals_conceded', 'own_goals',
       'penalties_saved', 'penalties_missed', 'yellow_cards', 'red_cards',
       'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat',
       'ict_index', 'influence_rank', 'influence_rank_type', 'creativity_rank',
       'creativity_rank_type', 'threat_rank'

In [82]:
element_types_df

Unnamed: 0,id,plural_name,plural_name_short,singular_name,singular_name_short,squad_select,squad_min_play,squad_max_play,ui_shirt_specific,sub_positions_locked,element_count
0,1,Goalkeepers,GKP,Goalkeeper,GKP,2,1,1,True,[12],83
1,2,Defenders,DEF,Defender,DEF,5,3,5,False,[],242
2,3,Midfielders,MID,Midfielder,MID,5,2,5,False,[],296
3,4,Forwards,FWD,Forward,FWD,3,1,3,False,[],93


In [83]:
players_df = elements_df[['second_name', 'team', 'element_type', 'now_cost', 'selected_by_percent', 'form', 'total_points']]
players_df['team'] = players_df.team.map(teams_df.set_index('id').name)
players_df['element_type'] = players_df.element_type.map(element_types_df.set_index('id').singular_name)
players_df

Unnamed: 0,second_name,team,element_type,now_cost,selected_by_percent,form,total_points
0,Leno,Arsenal,Goalkeeper,45,0.9,1.2,10
1,Rúnarsson,Arsenal,Goalkeeper,40,0.5,0.0,0
2,Borges Da Silva,Arsenal,Midfielder,63,0.1,0.0,0
3,Aubameyang,Arsenal,Forward,96,1.3,0.0,44
4,Soares,Arsenal,Defender,42,0.3,2.8,24
...,...,...,...,...,...,...,...
709,Cavaco Jordão,Wolves,Midfielder,45,0.0,0.0,0
710,Gomes,Wolves,Defender,45,0.0,0.0,3
711,Sanderson,Wolves,Defender,40,0.0,0.0,0
712,Giles,Wolves,Midfielder,45,0.0,0.0,0


In [84]:
manager_basic_info.keys()

dict_keys(['id', 'joined_time', 'started_event', 'favourite_team', 'player_first_name', 'player_last_name', 'player_region_id', 'player_region_name', 'player_region_iso_code_short', 'player_region_iso_code_long', 'summary_overall_points', 'summary_overall_rank', 'summary_event_points', 'summary_event_rank', 'current_event', 'leagues', 'name', 'name_change_blocked', 'kit', 'last_deadline_bank', 'last_deadline_value', 'last_deadline_total_transfers'])

In [85]:
managerInfo = {}
managerInfo['player_name'] =  manager_basic_info.get('player_first_name') + ' ' + manager_basic_info.get('player_last_name')
managerInfo['joined_event'] = manager_basic_info.get('started_event')
managerInfo['overall_points'] = manager_basic_info.get('summary_overall_points')
managerInfo['overall_rank'] = manager_basic_info.get('summary_overall_rank')
managerInfo['team_value'] = manager_basic_info.get('last_deadline_value')
managerInfo['bank_balance'] = manager_basic_info.get('last_deadline_bank')

managerInfo

{'player_name': 'Reece Lance',
 'joined_event': 1,
 'overall_points': 1537,
 'overall_rank': 3070075,
 'team_value': 990,
 'bank_balance': 6}

In [86]:
manager_history['current']

[{'event': 1,
  'points': 69,
  'total_points': 69,
  'rank': 3436438,
  'rank_sort': 3437545,
  'overall_rank': 3436433,
  'bank': 0,
  'value': 1000,
  'event_transfers': 0,
  'event_transfers_cost': 0,
  'points_on_bench': 0},
 {'event': 2,
  'points': 60,
  'total_points': 101,
  'rank': 2985344,
  'rank_sort': 2985352,
  'overall_rank': 5050254,
  'bank': 3,
  'value': 1001,
  'event_transfers': 8,
  'event_transfers_cost': 28,
  'points_on_bench': 8},
 {'event': 3,
  'points': 67,
  'total_points': 168,
  'rank': 1117841,
  'rank_sort': 1118584,
  'overall_rank': 3914768,
  'bank': 3,
  'value': 1004,
  'event_transfers': 0,
  'event_transfers_cost': 0,
  'points_on_bench': 4},
 {'event': 4,
  'points': 62,
  'total_points': 222,
  'rank': 3008014,
  'rank_sort': 3008083,
  'overall_rank': 3953690,
  'bank': 2,
  'value': 1004,
  'event_transfers': 4,
  'event_transfers_cost': 8,
  'points_on_bench': 0},
 {'event': 5,
  'points': 78,
  'total_points': 288,
  'rank': 321618,
  'ra

In [87]:
manager_history['current'][0].keys()
history = {}
history['points'] = manager_history['current'][0].get('points')
history['total_points'] = manager_history['current'][0].get('total_points')
history['rank'] = manager_history['current'][0].get('rank')
history['bank_balance'] = manager_history['current'][0].get('bank')
history['team_value'] = manager_history['current'][0].get('value')
history['transfers'] = manager_history['current'][0].get('event_transfers')
history['transfers_cost'] = manager_history['current'][0].get('event_transfers_cost')
history['points_on_bench'] = manager_history['current'][0].get('points_on_bench')

history

{'points': 69,
 'total_points': 69,
 'rank': 3436438,
 'bank_balance': 0,
 'team_value': 1000,
 'transfers': 0,
 'transfers_cost': 0,
 'points_on_bench': 0}

In [88]:
data = []
for key in ['player_name', 'joined_event', 'overall_points', 'overall_rank', 'team_value', 'bank_balance']:
    data.append([key, manager_basic_info.get(key)])

manager_basic_info_df = pd.DataFrame(data, columns = ['keys', 'values'])
manager_basic_info_df

Unnamed: 0,keys,values
0,player_name,
1,joined_event,
2,overall_points,
3,overall_rank,
4,team_value,
5,bank_balance,


In [89]:
gameweek_live_json = fetchData('https://fantasy.premierleague.com/api/event/1/live/', None)

In [90]:
id = 59
gameweek_live_json.get('elements')[id-1].get('stats').get('total_points')
'id = ' + str(gameweek_live_json.get('elements')[id-1].get('id')) + ', ' + 'points = ' + str(gameweek_live_json.get('elements')[id-1].get('stats').get('total_points'))

'id = 59, points = 2'

In [91]:
test = fetchData('https://fantasy.premierleague.com/api/entry/758476/event/1/picks/', None)
test = test.get('picks')
test = pd.DataFrame(test)
test['element']

0     231
1     229
2      67
3     122
4     256
5     240
6      21
7     399
8      33
9     337
10     63
11    200
12      4
13     93
14    410
Name: element, dtype: int64

In [92]:
temp = []
for element in test['element']:
    temp.append(gameweek_live_json.get('elements')[element-1].get('stats').get('total_points'))
test['points'] = temp
test

Unnamed: 0,element,position,multiplier,is_captain,is_vice_captain,points
0,231,1,1,False,False,7
1,229,2,1,False,False,7
2,67,3,1,False,False,1
3,122,4,1,False,False,15
4,256,5,1,False,False,2
5,240,6,1,False,False,8
6,21,7,1,False,False,2
7,399,8,1,False,False,9
8,33,9,1,False,False,1
9,337,10,2,False,True,7


CHOOSING THE TEAM FOR THE FIRST WEEK:

Getting player stats from previous seasons:

In [93]:
"""
for element in elements_df['id']:
    noOfPastSeasons = len(player_history)
    if noOfPastSeasons > 0:
        player_history[noOfPastSeasons-1].get('total_points')
    else:
        None
"""

"\nfor element in elements_df['id']:\n    noOfPastSeasons = len(player_history)\n    if noOfPastSeasons > 0:\n        player_history[noOfPastSeasons-1].get('total_points')\n    else:\n        None\n"

In [94]:
playersHistoryDict = managejson.fetchStoredData('/Users/reecelance/Desktop/University/Year 3/MA838 - Capstone Project/Project/a-data-analytics-approach-to-fantasy-football-management/python/players_history.json')
playersHistoryDict.get('1')[len(playersHistoryDict.get('1'))-1]

TypeError: object of type 'NoneType' has no len()

In [None]:
past_player_codes = []
past_player_points = []
for element in playersHistoryDict:
    players_seasons = playersHistoryDict.get(element)
    if players_seasons != []:
        previous_season = players_seasons[len(players_seasons)-1]
        past_player_codes.append(previous_season.get('element_code'))
        past_player_points.append(previous_season.get('total_points'))

past_player_points_dict = {'past_player_codes':past_player_codes, 'past_player_points':past_player_points}
past_player_points_df = pd.DataFrame(past_player_points_dict)

In [None]:
elements_df['start_cost'] = elements_df['now_cost'] - elements_df['cost_change_start_fall']

In [None]:
past_player_value_df = elements_df[['code', 'id', 'start_cost', 'first_name', 'second_name', 'element_type']]
past_player_value_df['last_season_points'] = past_player_value_df.code.map(past_player_points_df.set_index('past_player_codes').past_player_points)
past_player_value_df['value'] = past_player_value_df['last_season_points'] / past_player_value_df['start_cost']
past_player_value_df = past_player_value_df.dropna()
past_player_value_df['value'] = past_player_value_df['value'].astype('float')
past_player_value_df['last_season_points'] = past_player_value_df['last_season_points'].astype('int32')
past_player_value_df.sort_values(by=['last_season_points'], ascending=False)

Choosing team for gameweek 1:

In [None]:
past_gk_value_df = past_player_value_df[past_player_value_df['element_type'] == 1]
past_gk_value_df = past_gk_value_df.sort_values(by=['value'], ascending=False).head(2)

In [None]:
past_def_value_df = past_player_value_df[past_player_value_df['element_type'] == 2]
past_def_value_df = past_def_value_df.sort_values(by=['value'], ascending=False).head(5)

In [None]:
past_mid_value_df = past_player_value_df[past_player_value_df['element_type'] == 3]
past_mid_value_df = past_mid_value_df.sort_values(by=['value'], ascending=False).head(5)

In [None]:
past_fwd_value_df = past_player_value_df[past_player_value_df['element_type'] == 4]
past_fwd_value_df = past_fwd_value_df.sort_values(by=['value'], ascending=False).head(3)

In [None]:
gameweek1_value_df = pd.concat([past_gk_value_df, past_def_value_df, past_mid_value_df, past_fwd_value_df])
gameweek1_value_df

Calculating cost of whole team and remaining balance:

In [None]:
past_player_value_total_cost = sum(gameweek1_value_df['start_cost'])
'Cost of team: £' + str(past_player_value_total_cost/10) + 'M | Remaining: £' + str(100-(past_player_value_total_cost/10)) + 'M'

Calculating points for whole team:

In [None]:
def getPointsForTeam(df, captain_id, week):
    newDf = df[['id']]
    player_data_list = getPlayerGameweekData(week)
    pointsForWeek = []
    for element in newDf['id']:
        try:
            points = player_data_list[element-1].get('stats').get('total_points')
            if int(element) == int(captain_id):
                pointsForWeek.append(points*2)
            else:
                pointsForWeek.append(points)
        except:
            pointsForWeek.append(0)

    newDf['points'] = pointsForWeek
    return sum(newDf['points'])

In [None]:
getPointsForTeam(gameweek1_value_df, gameweek1_value_df.sort_values(by=['value'], ascending=False).iloc[0]['id'], 1)

Changing Team:

In [None]:
past_top_points = past_player_value_df

past_gk_points = past_gk_value_df.sort_values(by=['last_season_points'], ascending=False)
past_def_points = past_def_value_df.sort_values(by=['last_season_points'], ascending=False)
past_mid_points = past_mid_value_df.sort_values(by=['last_season_points'], ascending=False)
past_fwd_points = past_fwd_value_df.sort_values(by=['last_season_points'], ascending=False)

past_top_points_positions = [past_gk_points, past_def_points, past_mid_points, past_fwd_points]

past_top_points = past_top_points.sort_values(by=['last_season_points'], ascending=False)

for index in range(1, len(past_top_points.index)):
    cost = 0
    for df in past_top_points_positions:
        cost += sum(df['start_cost'])
    temp_position = past_top_points.iloc[[index]]['element_type']
    lowest_in_pos = past_top_points_positions[int(temp_position)-1].iloc[[-1]]
    new_player = past_top_points.iloc[[index]]
    if new_player.iloc[0]['id'] not in past_top_points_positions[int(temp_position)-1]['id'].to_list():
        if (new_player.iloc[0]['last_season_points'] > lowest_in_pos.iloc[0]['last_season_points']) and (cost - lowest_in_pos.iloc[0]['start_cost'] + new_player.iloc[0]['start_cost']) < 1000:
            past_top_points_positions[int(temp_position)-1].iloc[[-1]] = new_player
            past_top_points_positions[int(temp_position)-1] = past_top_points_positions[int(temp_position)-1].sort_values(by=['last_season_points'], ascending=False)

past_top_points_positions[1]

In [None]:
gameweek1_value_df_temp = pd.concat(past_top_points_positions)
gameweek1_value_df_temp

In [None]:
total_points = 0
for week in [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]:
    total_points += getPointsForTeam(gameweek1_value_df_temp, gameweek1_value_df_temp.sort_values(by=['last_season_points'], ascending=False).iloc[0]['id'], week)
total_points

In [None]:
gameweek1_cost = sum(gameweek1_value_df_temp['start_cost'])
'Cost of team: £' + str(gameweek1_cost/10) + 'M | Remaining: £' + str(100-(gameweek1_cost/10)) + 'M'

In [None]:
elements_df[['id', 'web_name', 'minutes']].tail(30)