In [17]:
import requests, json
from pprint import pprint
from tqdm.notebook import tqdm
tqdm.pandas()

In [18]:
# Base API endpoint URL
base_url = 'https://fantasy.premierleague.com/api/'

# master data
r = requests.get(base_url+'bootstrap-static/').json()

# Printing fields
pprint(r, indent=2, depth=1, compact=True)

{ 'element_stats': [...],
  'element_types': [...],
  'elements': [...],
  'events': [...],
  'game_settings': {...},
  'phases': [...],
  'teams': [...],
  'total_players': 10613150}


In [19]:
# get player data from 'elements' field
players = r['elements']

# show data for first player
pprint(players[4])

{'assists': 1,
 'bonus': 6,
 'bps': 351,
 'chance_of_playing_next_round': 100,
 'chance_of_playing_this_round': 75,
 'clean_sheets': 6,
 'clean_sheets_per_90': 0.33,
 'code': 226597,
 'corners_and_indirect_freekicks_order': None,
 'corners_and_indirect_freekicks_text': '',
 'cost_change_event': 0,
 'cost_change_event_fall': 0,
 'cost_change_start': 0,
 'cost_change_start_fall': 0,
 'creativity': '84.8',
 'creativity_rank': 240,
 'creativity_rank_type': 69,
 'direct_freekicks_order': None,
 'direct_freekicks_text': '',
 'dreamteam_count': 2,
 'element_type': 2,
 'ep_next': '9.5',
 'ep_this': '7.5',
 'event_points': 2,
 'expected_assists': '0.39',
 'expected_assists_per_90': 0.02,
 'expected_goal_involvements': '2.50',
 'expected_goal_involvements_per_90': 0.14,
 'expected_goals': '2.11',
 'expected_goals_conceded': '15.47',
 'expected_goals_conceded_per_90': 0.85,
 'expected_goals_per_90': 0.12,
 'first_name': 'Gabriel',
 'form': '9.5',
 'form_rank': 3,
 'form_rank_type': 1,
 'goals_con

In [4]:
import pandas as pd
pd.set_option('display.max_columns', None)

In [5]:
# create players dataframe
players = pd.json_normalize(r['elements'])

# show some information about first five players
players[['id', 'web_name', 'team', 'element_type']].head()

Unnamed: 0,id,web_name,team,element_type
0,1,Balogun,1,4
1,2,Cédric,1,2
2,3,M.Elneny,1,3
3,4,Fábio Vieira,1,3
4,5,Gabriel,1,2


In [6]:
# create teams dataframe
teams = pd.json_normalize(r['teams'])

teams.head()

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,strength,team_division,unavailable,win,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id
0,3,0,,1,0,Arsenal,0,0,0,ARS,4,,False,0,1220,1260,1270,1280,1170,1240,1
1,7,0,,2,0,Aston Villa,0,0,0,AVL,3,,False,0,1130,1235,1120,1220,1130,1250,2
2,91,0,,3,0,Bournemouth,0,0,0,BOU,3,,False,0,1095,1100,1070,1120,1130,1110,127
3,94,0,,4,0,Brentford,0,0,0,BRE,3,,False,0,1120,1155,1120,1130,1120,1180,130
4,36,0,,5,0,Brighton,0,0,0,BHA,3,,False,0,1130,1185,1100,1120,1160,1250,131


In [7]:
# get position information from 'element_types' field
positions = pd.json_normalize(r['element_types'])

positions.head()

Unnamed: 0,id,plural_name,plural_name_short,singular_name,singular_name_short,squad_select,squad_min_play,squad_max_play,ui_shirt_specific,sub_positions_locked,element_count
0,1,Goalkeepers,GKP,Goalkeeper,GKP,2,1,1,True,[12],91
1,2,Defenders,DEF,Defender,DEF,5,3,5,False,[],256
2,3,Midfielders,MID,Midfielder,MID,5,2,5,False,[],346
3,4,Forwards,FWD,Forward,FWD,3,1,3,False,[],102


In [8]:
# join players to teams
df = pd.merge(
    left=players,
    right=teams,
    left_on='team',
    right_on='id'
)

# show joined result
df[['first_name', 'second_name', 'name']].head()

Unnamed: 0,first_name,second_name,name
0,Folarin,Balogun,Arsenal
1,Cédric,Alves Soares,Arsenal
2,Mohamed,Elneny,Arsenal
3,Fábio,Ferreira Vieira,Arsenal
4,Gabriel,dos Santos Magalhães,Arsenal


In [9]:
# join player positions
df = df.merge(
    positions,
    left_on='element_type',
    right_on='id'
)

# rename columns
df = df.rename(
    columns={'name':'team_name', 'singular_name':'position_name'}
)

# show result
df[
    ['first_name', 'second_name', 'team_name', 'position_name']
].head()

Unnamed: 0,first_name,second_name,team_name,position_name
0,Folarin,Balogun,Arsenal,Forward
1,Gabriel,Fernando de Jesus,Arsenal,Forward
2,Eddie,Nketiah,Arsenal,Forward
3,Keinan,Davis,Aston Villa,Forward
4,Jhon,Durán,Aston Villa,Forward


The element-summary endpoint contains three fields at the top level:

    1. fixtures contains upcoming fixture information
    2. history contains previous gameweek player scores
    3. history_past provides summary of previous season totals

In [10]:
# get data from 'element-summary/{PID}/' endpoint for PID=1 (Gabriel)
r = requests.get(base_url + 'element-summary/5/').json()

# show top-level fields for player summary
pprint(r, depth=1)

# show data for first gameweek
pprint(r['history'][0])

{'fixtures': [...], 'history': [...], 'history_past': [...]}
{'assists': 0,
 'bonus': 0,
 'bps': 2,
 'clean_sheets': 0,
 'creativity': '0.0',
 'element': 5,
 'expected_assists': '0.00',
 'expected_goal_involvements': '0.00',
 'expected_goals': '0.00',
 'expected_goals_conceded': '0.02',
 'fixture': 2,
 'goals_conceded': 0,
 'goals_scored': 0,
 'ict_index': '0.0',
 'influence': '0.2',
 'kickoff_time': '2023-08-12T12:00:00Z',
 'minutes': 4,
 'opponent_team': 16,
 'own_goals': 0,
 'penalties_missed': 0,
 'penalties_saved': 0,
 'red_cards': 0,
 'round': 1,
 'saves': 0,
 'selected': 2743150,
 'starts': 0,
 'team_a_score': 1,
 'team_h_score': 2,
 'threat': '0.0',
 'total_points': 1,
 'transfers_balance': 0,
 'transfers_in': 0,
 'transfers_out': 0,
 'value': 50,
 'was_home': True,
 'yellow_cards': 0}


Function with player_id argument which returns a dataframe of previous gameweed points

In [11]:
def get_gameweek_history(player_id):
    
    # API call to element summary (provides fixture and score data)
    r = requests.get(
            base_url + 'element-summary/' + str(player_id) + '/'
    ).json()
    
    # Extract historical data
    df = pd.json_normalize(r['history'])
    
    return df


# show Gabriel's previous gameweek stats
get_gameweek_history(5)[
    [
        'round',
        'total_points',
        'minutes',
        'goals_scored',
        'assists'
    ]
].head()

Unnamed: 0,round,total_points,minutes,goals_scored,assists
0,1,1,4,0,0
1,2,1,20,0,0
2,3,0,0,0,0
3,4,2,90,0,0
4,5,5,90,0,0


Now, for the season stats

In [12]:
def get_season_history(player_id):
    
    
    # API call
    r = requests.get(
            base_url + 'element-summary/' + str(player_id) + '/'
    ).json()
    
    # Extract historical data
    df = pd.json_normalize(r['history_past'])
    
    return df


# show Gabriel's season stats
get_season_history(5)[
    [
        'season_name',
        'total_points',
        'minutes',
        'goals_scored',
        'assists'
    ]
].head(15)

Unnamed: 0,season_name,total_points,minutes,goals_scored,assists
0,2020/21,78,1996,2,0
1,2021/22,146,3063,5,0
2,2022/23,146,3409,3,0


Fantasy Points table 

In [13]:
# select columns of interest from players df
players = players[
    ['id', 'first_name', 'second_name', 'web_name', 'team',
     'element_type']
]

# join team name
players = players.merge(
    teams[['id', 'name']],
    left_on='team',
    right_on='id',
    suffixes=['_player', None]
).drop(['team', 'id'], axis=1)

# join player positions
players = players.merge(
    positions[['id', 'singular_name_short']],
    left_on='element_type',
    right_on='id'
).drop(['element_type', 'id'], axis=1)

players.head()

Unnamed: 0,id_player,first_name,second_name,web_name,name,singular_name_short
0,1,Folarin,Balogun,Balogun,Arsenal,FWD
1,8,Gabriel,Fernando de Jesus,G.Jesus,Arsenal,FWD
2,13,Eddie,Nketiah,Nketiah,Arsenal,FWD
3,39,Keinan,Davis,Davis,Aston Villa,FWD
4,44,Jhon,Durán,Duran,Aston Villa,FWD


In [14]:
# get gameweek histories for each player
points = players['id_player'].progress_apply(get_gameweek_history)

# combine results into single dataframe
points = pd.concat(df for df in points)

# join web_name
points = players[['id_player', 'web_name']].merge(
    points,
    left_on='id_player',
    right_on='element'
)

  0%|          | 0/795 [00:00<?, ?it/s]

## Highest combined goals and assists

In [15]:
from tabulate import tabulate

# get top scoring players
top_scorer = points.groupby(
    ['element', 'web_name']
).agg(
    {'total_points':'sum', 'goals_scored':'sum', 'assists':'sum'}
).reset_index(
).sort_values(
    'total_points', ascending=False
)

# Set 'element' as the index
top_scorer.set_index('element', inplace=True)

# Add a new column 'combined_ga' for combined goals and assists
top_scorer['combined_ga'] = top_scorer['goals_scored'] + top_scorer['assists']

# Display the head(10) using tabulate for nicer formatting
print(tabulate(top_scorer.head(10), headers='keys', tablefmt='pretty'))


+---------+----------+--------------+--------------+---------+-------------+
| element | web_name | total_points | goals_scored | assists | combined_ga |
+---------+----------+--------------+--------------+---------+-------------+
|   308   |  Salah   |     156      |      14      |    8    |     22      |
|   516   |   Son    |     136      |      12      |    5    |     17      |
|   60    | Watkins  |     128      |      10      |   11    |     21      |
|   19    |   Saka   |     120      |      7       |    8    |     15      |
|   526   |  Bowen   |     113      |      11      |    2    |     13      |
|   355   | Haaland  |     112      |      14      |    5    |     19      |
|   412   |  Gordon  |     105      |      7       |    7    |     14      |
|   362   |  Palmer  |     102      |      9       |    5    |     14      |
|   85    | Solanke  |     100      |      12      |    1    |     13      |
|   557   | Hee Chan |     100      |      10      |    3    |     13      |

## Exporting data from main endpoints for field analysis

In [38]:
import requests
import pandas as pd

# Function to fetch data from API and return a DataFrame
def fetch_data(api_url):
    response = requests.get(api_url)
    data = response.json()
    return pd.json_normalize(data)

# Fetch data from the first endpoint
url_bootstrap_static = 'https://fantasy.premierleague.com/api/bootstrap-static/'
data_bootstrap_static = fetch_data(url_bootstrap_static)

# Fetch data from the second endpoint (replace {player-id} with an actual player ID)
url_element_summary = 'https://fantasy.premierleague.com/api/element-summary/4/'
data_element_summary = fetch_data(url_element_summary)


# Export each DataFrame to CSV
data_bootstrap_static.to_excel('bootstrap_static_data.xlsx', index=False)
data_element_summary.to_excel('element_summary_data.xlsx', index=False)

print("Success")


Success


In [42]:
import requests

# Function to fetch data from API and print fields and values
def print_data(api_url):
    response = requests.get(api_url)
    data = response.json()
    
    # Print fields and values
    for key, value in data.items():
        print(f"{key}: {value}")
# Fetch data from the second endpoint (replace {player-id} with an actual player ID)
url_element_summary = 'https://fantasy.premierleague.com/api/element-summary/4/'
print("\nElement Summary Data:")
print_data(url_element_summary)



Element Summary Data:
fixtures: [{'id': 221, 'code': 2367759, 'team_h': 1, 'team_h_score': None, 'team_a': 11, 'team_a_score': None, 'event': 23, 'finished': False, 'minutes': 0, 'provisional_start_time': False, 'kickoff_time': '2024-02-04T16:30:00Z', 'event_name': 'Gameweek 23', 'is_home': True, 'difficulty': 4}, {'id': 239, 'code': 2367776, 'team_h': 19, 'team_h_score': None, 'team_a': 1, 'team_a_score': None, 'event': 24, 'finished': False, 'minutes': 0, 'provisional_start_time': False, 'kickoff_time': '2024-02-11T14:00:00Z', 'event_name': 'Gameweek 24', 'is_home': False, 'difficulty': 3}, {'id': 242, 'code': 2367779, 'team_h': 6, 'team_h_score': None, 'team_a': 1, 'team_a_score': None, 'event': 25, 'finished': False, 'minutes': 0, 'provisional_start_time': False, 'kickoff_time': '2024-02-17T15:00:00Z', 'event_name': 'Gameweek 25', 'is_home': False, 'difficulty': 2}, {'id': 251, 'code': 2367789, 'team_h': 1, 'team_h_score': None, 'team_a': 15, 'team_a_score': None, 'event': 26, 'fi

In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from pulp import LpMaximize, LpProblem, LpVariable, lpSum, LpInteger, LpStatus, LpBinary, LpConstraintVar, LpConstraint, LpAffineExpression
import matplotlib.pyplot as plt

# Predictor Model (Future Implementation)