In [1]:
import requests
import json
import pandas as pd

# Loading and assessing data

In [2]:
# Making API call and storing response
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
print("Status code: ", r.status_code)

Status code:  200


In [3]:
# Storing API respons in variable
response_dict = r.json()

In [4]:
# Exploring different keys
print(response_dict.keys())

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])


Teams are stored in 'teams' and players are stored in 'elements'

In [5]:
# Asssesing the content of a single player
print(response_dict['elements'][1])

{'chance_of_playing_next_round': 100, 'chance_of_playing_this_round': 100, 'code': 98745, 'cost_change_event': 0, 'cost_change_event_fall': 0, 'cost_change_start': -3, 'cost_change_start_fall': 3, 'dreamteam_count': 0, 'element_type': 2, 'ep_next': '2.3', 'ep_this': '0.7', 'event_points': 0, 'first_name': 'Héctor', 'form': '0.2', 'id': 2, 'in_dreamteam': False, 'news': '', 'news_added': '2019-12-09T20:00:21.228098Z', 'now_cost': 52, 'photo': '98745.jpg', 'points_per_game': '2.9', 'second_name': 'Bellerín', 'selected_by_percent': '1.0', 'special': False, 'squad_number': None, 'status': 'a', 'team': 1, 'team_code': 3, 'total_points': 44, 'transfers_in': 206616, 'transfers_in_event': 0, 'transfers_out': 159819, 'transfers_out_event': 0, 'value_form': '0.0', 'value_season': '8.5', 'web_name': 'Bellerín', 'minutes': 1156, 'goals_scored': 1, 'assists': 0, 'clean_sheets': 4, 'goals_conceded': 18, 'own_goals': 0, 'penalties_saved': 0, 'penalties_missed': 0, 'yellow_cards': 2, 'red_cards': 0, '

In [6]:
# Asssesing the content of a team
print(response_dict['teams'][1])

{'code': 7, 'draw': 0, 'form': None, 'id': 2, 'loss': 0, 'name': 'Aston Villa', 'played': 0, 'points': 0, 'position': 0, 'short_name': 'AVL', 'strength': 2, 'team_division': None, 'unavailable': False, 'win': 0, 'strength_overall_home': 1020, 'strength_overall_away': 1050, 'strength_attack_home': 970, 'strength_attack_away': 980, 'strength_defence_home': 1000, 'strength_defence_away': 1040, 'pulse_id': 2}


We need to transfer information about the teams and the player positions to the data for each player. In addition we will remove necessary data from the player dataset and transfer it to a format that is easier to work with.

Going forward we will:
1. Map the player's team (can be found in team-dictionary) and their position (in element_types) - which we'll later add to the dataset
2. Filter out only the necessary data for each player & Transform to pandas DataFrame
3. Enrich pandas DataFrame with data from 1.

# 1. Mapping player teams and positions

In [7]:
# Team id's
teams = response_dict['teams']

team_list = []
for team in teams:
    team_id = {
        team['code'] : team['name']
    }
    team_list.append(team_id)

In [8]:
team_dict = {}
for team in team_list:
    team_dict.update(team)

print(team_dict)

{3: 'Arsenal', 7: 'Aston Villa', 91: 'Bournemouth', 36: 'Brighton', 90: 'Burnley', 8: 'Chelsea', 31: 'Crystal Palace', 11: 'Everton', 13: 'Leicester', 14: 'Liverpool', 43: 'Man City', 1: 'Man Utd', 4: 'Newcastle', 45: 'Norwich', 49: 'Sheffield Utd', 20: 'Southampton', 6: 'Spurs', 57: 'Watford', 21: 'West Ham', 39: 'Wolves'}


In [9]:
# Position id's
element_types = response_dict['element_types']

position_list = []
for element_type in element_types:
    position_id = {
        element_type['id'] : element_type['plural_name_short']
    }
    position_list.append(position_id)

In [10]:
position_dict = {}
for position in position_list:
    position_dict.update(position)

print(position_dict)

{1: 'GKP', 2: 'DEF', 3: 'MID', 4: 'FWD'}


We now have one dictionary containing the codes of the team names and one dictionary containing positions and name of positions. We can use these to enrich the data for each player

## 2. Filter out the data features we want & Convert to pandas DataFrame

In [11]:
wanted_features = ['first_name', 'second_name', 'team_code','element_type','news','now_cost', 'total_points', 'minutes',
                   'form',  'value_season', 'points_per_game', 'value_form',
                    'goals_scored', 'assists', 'dreamteam_count','clean_sheets', 
                   'goals_conceded', 'own_goals','penalties_saved', 'penalties_missed',
                   'yellow_cards', 'red_cards', 'saves', 'bonus',
                   'influence', 'creativity', 'threat', 'ict_index', 'selected_by_percent'
                  ]

In [12]:
player_data = response_dict['elements']

In [13]:
# Converting the list of players to a DataFrame
players_df = pd.DataFrame(player_data)
# Choosing only the columns that we want
players_df = players_df[wanted_features]
players_df.head()

Unnamed: 0,first_name,second_name,team_code,element_type,news,now_cost,total_points,minutes,form,value_season,...,penalties_missed,yellow_cards,red_cards,saves,bonus,influence,creativity,threat,ict_index,selected_by_percent
0,Shkodran,Mustafi,3,2,Hamstring injury - Expected back 31 Oct,51,43,1205,0.2,8.4,...,0,2,0,0,2,277.2,45.5,155.0,47.9,0.4
1,Héctor,Bellerín,3,2,,52,44,1156,0.2,8.5,...,0,2,0,0,4,187.8,76.9,103.0,37.0,1.0
2,Sead,Kolasinac,3,2,,52,55,1694,1.0,10.6,...,0,4,0,0,1,269.6,182.5,81.0,53.0,0.5
3,Ainsley,Maitland-Niles,3,2,,45,41,1382,0.5,9.1,...,0,4,1,0,3,301.8,182.0,58.0,53.6,2.2
4,Sokratis,Papastathopoulos,3,2,,48,57,1696,0.0,11.9,...,0,6,0,0,5,436.2,36.8,110.0,58.5,1.3


# 3. Enriching Dataframe & manipulating data

In [14]:
# Replacing team_code with team name
players_df = players_df.replace({'team_code': team_dict})

# Replacing id with position
players_df = players_df.replace({'element_type': position_dict})

In [15]:
# Renaming columns
players_df = players_df.rename(columns={'team_code': 'team', 'element_type':'position'})
players_df.head()

Unnamed: 0,first_name,second_name,team,position,news,now_cost,total_points,minutes,form,value_season,...,penalties_missed,yellow_cards,red_cards,saves,bonus,influence,creativity,threat,ict_index,selected_by_percent
0,Shkodran,Mustafi,Arsenal,DEF,Hamstring injury - Expected back 31 Oct,51,43,1205,0.2,8.4,...,0,2,0,0,2,277.2,45.5,155.0,47.9,0.4
1,Héctor,Bellerín,Arsenal,DEF,,52,44,1156,0.2,8.5,...,0,2,0,0,4,187.8,76.9,103.0,37.0,1.0
2,Sead,Kolasinac,Arsenal,DEF,,52,55,1694,1.0,10.6,...,0,4,0,0,1,269.6,182.5,81.0,53.0,0.5
3,Ainsley,Maitland-Niles,Arsenal,DEF,,45,41,1382,0.5,9.1,...,0,4,1,0,3,301.8,182.0,58.0,53.6,2.2
4,Sokratis,Papastathopoulos,Arsenal,DEF,,48,57,1696,0.0,11.9,...,0,6,0,0,5,436.2,36.8,110.0,58.5,1.3


In [16]:
# Combining first and last name to one column
players_df['player_name'] = players_df['first_name'].str.cat(players_df['second_name'], sep=' ')
# Removing first_name and second_name columns
players_df = players_df.drop(['first_name', 'second_name'], axis=1)

From the news-column one can determine if a player is injured or unavailable. Players that are available and not injured have no text in this column. I will create a function that returns "True" if there is text, and "False" if there is no text. I will use this function to create an "unavailable"-column for the DataFrame.

In [17]:
# Function for creating cell-values
def unavailable(row):
    if row['news'] != '':
        return True
    else:
        return False

In [18]:
# Using function to create new column:
players_df['unavailable'] = players_df.apply(lambda row: unavailable(row), axis=1)
players_df.head()

Unnamed: 0,team,position,news,now_cost,total_points,minutes,form,value_season,points_per_game,value_form,...,red_cards,saves,bonus,influence,creativity,threat,ict_index,selected_by_percent,player_name,unavailable
0,Arsenal,DEF,Hamstring injury - Expected back 31 Oct,51,43,1205,0.2,8.4,2.9,0.0,...,0,0,2,277.2,45.5,155.0,47.9,0.4,Shkodran Mustafi,True
1,Arsenal,DEF,,52,44,1156,0.2,8.5,2.9,0.0,...,0,0,4,187.8,76.9,103.0,37.0,1.0,Héctor Bellerín,False
2,Arsenal,DEF,,52,55,1694,1.0,10.6,2.1,0.2,...,0,0,1,269.6,182.5,81.0,53.0,0.5,Sead Kolasinac,False
3,Arsenal,DEF,,45,41,1382,0.5,9.1,2.0,0.1,...,1,0,3,301.8,182.0,58.0,53.6,2.2,Ainsley Maitland-Niles,False
4,Arsenal,DEF,,48,57,1696,0.0,11.9,3.0,0.0,...,0,0,5,436.2,36.8,110.0,58.5,1.3,Sokratis Papastathopoulos,False


In [19]:
# Rearranging order to get name as the first column
players_df = players_df[['player_name', 'team','position','unavailable','now_cost', 'total_points', 'minutes',
                   'form',  'value_season', 'points_per_game', 'value_form',
                    'goals_scored', 'assists', 'dreamteam_count','clean_sheets', 
                   'goals_conceded', 'own_goals','penalties_saved', 'penalties_missed',
                   'yellow_cards', 'red_cards', 'saves', 'bonus',
                   'influence', 'creativity', 'threat', 'ict_index', 'selected_by_percent'
                  ]]

In [20]:
players_df.head()

Unnamed: 0,player_name,team,position,unavailable,now_cost,total_points,minutes,form,value_season,points_per_game,...,penalties_missed,yellow_cards,red_cards,saves,bonus,influence,creativity,threat,ict_index,selected_by_percent
0,Shkodran Mustafi,Arsenal,DEF,True,51,43,1205,0.2,8.4,2.9,...,0,2,0,0,2,277.2,45.5,155.0,47.9,0.4
1,Héctor Bellerín,Arsenal,DEF,False,52,44,1156,0.2,8.5,2.9,...,0,2,0,0,4,187.8,76.9,103.0,37.0,1.0
2,Sead Kolasinac,Arsenal,DEF,False,52,55,1694,1.0,10.6,2.1,...,0,4,0,0,1,269.6,182.5,81.0,53.0,0.5
3,Ainsley Maitland-Niles,Arsenal,DEF,False,45,41,1382,0.5,9.1,2.0,...,0,4,1,0,3,301.8,182.0,58.0,53.6,2.2
4,Sokratis Papastathopoulos,Arsenal,DEF,False,48,57,1696,0.0,11.9,3.0,...,0,6,0,0,5,436.2,36.8,110.0,58.5,1.3


# Creating DataFrames of "top players" and players with high ROI

Next I'll create two DataFrames:
- "Top players". I.e. players that get the most points.
- "Players with high ROI". I.e. players that get the most points based on their costs.

Both of these dataframes will be placed in descending order. We can use these dataframes to choose players from in our automatic team selection.

In [21]:
# Creating a dataframe for "Top performing players":
most_points = players_df[['player_name', 'team', 'position', 'total_points', 'now_cost', 'unavailable']]
most_points = most_points.sort_values(by = 'total_points', ascending = False)
most_points.head()

Unnamed: 0,player_name,team,position,total_points,now_cost,unavailable
338,Kevin De Bruyne,Man City,MID,251,106,False
305,Mohamed Salah,Liverpool,MID,233,125,False
306,Sadio Mané,Liverpool,MID,221,121,False
270,Jamie Vardy,Leicester,FWD,210,97,False
296,Trent Alexander-Arnold,Liverpool,DEF,210,77,False


In [22]:
# Creating dataframe for ROI-players
roi_players = players_df[['player_name', 'team', 'position', 'total_points', 'now_cost', 'unavailable']]
roi_players['roi'] = roi_players.apply(lambda row: row.total_points / row.now_cost, axis=1)
roi_players = roi_players.sort_values(by= 'roi', ascending = False)
roi_players.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,player_name,team,position,total_points,now_cost,unavailable,roi
145,Nick Pope,Burnley,GKP,170,52,False,3.269231
467,John Lundstram,Sheffield Utd,DEF,144,46,False,3.130435
480,Dean Henderson,Sheffield Utd,GKP,160,52,False,3.076923
644,Rui Pedro dos Santos Patrício,Wolves,GKP,153,53,False,2.886792
112,Mathew Ryan,Brighton,GKP,135,47,False,2.87234


# Program for automatic team selection

Next we'll create a function for choosing players from these two dataframes:

In [26]:
def choose_team():
    roi_team = []
    total_points = 0
    budget = 1000
    top_performer_limit = 2
    position_dict = {"GKP": 2, "DEF": 5, "MID": 5, "FWD": 3}
    team_dict = {'Man City': 3, 'Liverpool': 3, 'Leicester': 3,
                  'Man Utd': 3, 'Wolves': 3, 'Southampton': 3,
                  'Arsenal': 3, 'Burnley': 3, 'Chelsea': 3,
                  'Spurs': 3, 'Everton': 3, 'Sheffield Utd': 3,
                  'Newcastle': 3, 'Aston Villa': 3, 'Norwich': 3,
                  'Watford': 3, 'Crystal Palace': 3, 'Brighton': 3,
                  'Bournemouth': 3, 'West Ham': 3}
    
    # Choosing 2 top performers from the "top players"-dataframe
    for idx, row in most_points.iterrows():
        if budget >= row.now_cost and len(roi_team) < top_performer_limit and row.unavailable == False and position_dict[row.position] != 0 and team_dict[row.team] != 0:
            roi_team.append(row.player_name)
            budget -= row.now_cost #Deducting cost from budget
            position_dict[row.position] -= 1 # Deducting position from position dictionary
            team_dict[row.team] -= 1 # Deducting player from team dictionary
            total_points += row.total_points # adding to point score
            print("Player choosen from 'top players' " + str(row.player_name))
            
        # Choosing remaining team from "ROI"-dataframe
        else:
            for idx, row in roi_players.iterrows():
                if row.player_name not in roi_team and budget >= row.now_cost and row.unavailable == False and position_dict[row.position] != 0 and team_dict[row.team] != 0:
                    roi_team.append(row.player_name)
                    budget -= row.now_cost
                    position_dict[row.position] -= 1 # Deducting position from position dictionary
                    team_dict[row.team] -= 1 # Deducting player from team dictionary
                    total_points += row.total_points # adding to point score
    
    print("\nTeam chosen: " + str(roi_team))
    print("Remaining budget: " + str((budget/10)) + "M.")
    print("Total points from choosen team: " + str(total_points) + ".")
    print(position_dict)

In [27]:
#Running function to choose team
choose_team()

Player choosen from 'top players' Kevin De Bruyne
Player choosen from 'top players' Mohamed Salah

Team chosen: ['Kevin De Bruyne', 'Mohamed Salah', 'Nick Pope', 'John Lundstram', 'Dean Henderson', 'George Baldock', 'Virgil van Dijk', 'Trent Alexander-Arnold', 'Lewis Dunk', 'Danny Ings', 'Jordan Ayew', 'Jack Grealish', 'Todd Cantwell', 'Raúl Jiménez', 'Adama Traoré']
Remaining budget: 1.2M.
Total points from choosen team: 2530.
{'GKP': 0, 'DEF': 0, 'MID': 0, 'FWD': 0}


From the above we can see that we have managed to use up most of our budget, that all positions have been filled and we have a total team score of 2530 points (which is pretty decent).