# FPL Bot Using FPL API

The FPL API contains most of the data that would be useful for building an FPL bot in an extremely easy and quick to access API.

The plan is to aggregate expected points (using points * expected goals/assists/clean sheets) and choose the team with the highest expected points over the past 5 weeks.

Multipliers could b applied for home/away and fixture difficulty (all of which can be found within this API).

## Loading Data

In [162]:
import requests
import pandas as pd
from pprint import pprint

pd.set_option('display.max_columns', None)

url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
response = requests.get(url).json()
pprint(response, indent=2, depth=1, compact=True)

{ 'element_stats': [...],
  'element_types': [...],
  'elements': [...],
  'events': [...],
  'game_settings': {...},
  'phases': [...],
  'teams': [...],
  'total_players': 10546903}


In [163]:
df_players = pd.json_normalize(response['elements'])
df_players.head()

Unnamed: 0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,ep_next,ep_this,event_points,first_name,form,id,in_dreamteam,news,news_added,now_cost,photo,points_per_game,second_name,selected_by_percent,special,squad_number,status,team,team_code,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,web_name,region,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,influence_rank,influence_rank_type,creativity_rank,creativity_rank_type,threat_rank,threat_rank_type,ict_index_rank,ict_index_rank_type,corners_and_indirect_freekicks_order,corners_and_indirect_freekicks_text,direct_freekicks_order,direct_freekicks_text,penalties_order,penalties_text,expected_goals_per_90,saves_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,goals_conceded_per_90,now_cost_rank,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90
0,0.0,0.0,438098,0,0,-1,1,0,3,0.0,0.0,0,Fábio,0.0,1,False,Has joined Portuguese side FC Porto on loan fo...,2024-08-29T11:06:25.241953Z,54,438098.jpg,0.0,Ferreira Vieira,0.0,False,,u,1,3,0,439,0,2641,27,0.0,0.0,Fábio Vieira,,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,650,296,649,296,647,295,652,297,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,137,81,646,294,652,297,619,274,0.0,0.0
1,100.0,100.0,205651,0,0,-2,2,0,4,1.8,2.3,1,Gabriel,0.8,2,False,,2024-08-25T11:00:05.312580Z,68,205651.jpg,0.6,Fernando de Jesus,0.9,False,,a,1,3,3,45602,3017,197889,3416,0.1,0.4,G.Jesus,30.0,78,0,0,0,2,0,0,0,2,0,0,0,18,14.0,32.3,33.0,7.9,1,0.49,0.06,0.55,1.29,347,28,216,24,185,34,281,29,,,,,,,0.57,0.0,0.07,0.64,1.49,2.31,33,14,300,33,415,45,203,38,1.15,0.0
2,,,226597,0,0,2,-2,1,2,7.2,7.7,2,Gabriel,6.2,3,True,,,62,226597.jpg,5.6,dos Santos Magalhães,27.2,False,,a,1,3,39,1862068,85181,479301,29566,1.0,6.3,Gabriel,30.0,630,2,0,3,6,0,0,0,1,0,0,4,131,194.8,36.6,165.0,39.6,7,1.47,0.15,1.62,7.45,19,4,199,51,31,1,39,4,,,,,,,0.21,0.0,0.02,0.23,1.06,0.86,68,3,15,3,21,2,12,4,1.0,0.43
3,75.0,,219847,1,-1,3,-3,1,4,4.1,6.0,8,Kai,4.5,4,False,Knee injury - 75% chance of playing,2024-10-07T17:00:06.956493Z,83,219847.jpg,5.7,Havertz,20.7,False,,d,1,3,40,1859768,77776,1202393,57435,0.5,4.8,Havertz,80.0,630,4,1,3,6,0,0,0,0,0,0,7,156,209.2,83.8,326.0,61.9,7,4.29,0.2,4.49,7.45,16,3,98,6,4,2,9,2,,,,,3.0,,0.61,0.0,0.03,0.64,1.06,0.86,10,4,43,11,18,6,16,3,1.0,0.43
4,0.0,0.0,463748,0,0,0,0,0,1,0.0,0.0,0,Karl,0.0,5,False,Loaned to Real Valladolid,2024-08-14T08:31:46.556082Z,40,463748.jpg,0.0,Hein,0.0,False,,u,1,3,0,0,0,3253,83,0.0,0.0,Hein,,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,465,53,449,33,406,27,469,53,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,630,65,421,48,472,53,538,66,0.0,0.0


In [164]:
df_teams = pd.json_normalize(response['teams'])
df_teams.head()

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,strength,team_division,unavailable,win,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id
0,3,0,,1,0,Arsenal,0,0,0,ARS,5,,False,0,1350,1380,1370,1370,1330,1390,1
1,7,0,,2,0,Aston Villa,0,0,0,AVL,3,,False,0,1120,1245,1110,1140,1130,1350,2
2,91,0,,3,0,Bournemouth,0,0,0,BOU,3,,False,0,1100,1100,1075,1100,1130,1105,127
3,94,0,,4,0,Brentford,0,0,0,BRE,3,,False,0,1100,1100,1105,1095,1100,1110,130
4,36,0,,5,0,Brighton,0,0,0,BHA,3,,False,0,1100,1100,1100,1105,1100,1100,131


In [165]:
df_positions = pd.json_normalize(response['element_types'])
df_positions

Unnamed: 0,id,plural_name,plural_name_short,singular_name,singular_name_short,squad_select,squad_min_select,squad_max_select,squad_min_play,squad_max_play,ui_shirt_specific,sub_positions_locked,element_count
0,1,Goalkeepers,GKP,Goalkeeper,GKP,2,,,1,1,True,[12],70
1,2,Defenders,DEF,Defender,DEF,5,,,3,5,False,[],219
2,3,Midfielders,MID,Midfielder,MID,5,,,2,5,False,[],303
3,4,Forwards,FWD,Forward,FWD,3,,,1,3,False,[],74


## Selecting Useful Data

### Player Data

In [166]:
df_players.columns

Index(['chance_of_playing_next_round', 'chance_of_playing_this_round', 'code',
       'cost_change_event', 'cost_change_event_fall', 'cost_change_start',
       'cost_change_start_fall', 'dreamteam_count', 'element_type', 'ep_next',
       'ep_this', 'event_points', 'first_name', 'form', 'id', 'in_dreamteam',
       'news', 'news_added', 'now_cost', 'photo', 'points_per_game',
       'second_name', 'selected_by_percent', 'special', 'squad_number',
       'status', 'team', 'team_code', 'total_points', 'transfers_in',
       'transfers_in_event', 'transfers_out', 'transfers_out_event',
       'value_form', 'value_season', 'web_name', 'region', 'minutes',
       'goals_scored', 'assists', 'clean_sheets', 'goals_conceded',
       'own_goals', 'penalties_saved', 'penalties_missed', 'yellow_cards',
       'red_cards', 'saves', 'bonus', 'bps', 'influence', 'creativity',
       'threat', 'ict_index', 'starts', 'expected_goals', 'expected_assists',
       'expected_goal_involvements', 'expected

In [167]:
players_cols_to_keep = [
    'id',
    'element_type',
    'now_cost',
    'team',
    'web_name',
    'chance_of_playing_next_round'
]

df_players_red = df_players[players_cols_to_keep]

### Team Data

In [168]:
df_teams.columns

Index(['code', 'draw', 'form', 'id', 'loss', 'name', 'played', 'points',
       'position', 'short_name', 'strength', 'team_division', 'unavailable',
       'win', 'strength_overall_home', 'strength_overall_away',
       'strength_attack_home', 'strength_attack_away', 'strength_defence_home',
       'strength_defence_away', 'pulse_id'],
      dtype='object')

In [169]:
teams_cols_to_keep = [
    'id',
    'name',
    'strength',
]

df_teams_red = df_teams[teams_cols_to_keep]

### Position Data

In [170]:
df_positions.columns

Index(['id', 'plural_name', 'plural_name_short', 'singular_name',
       'singular_name_short', 'squad_select', 'squad_min_select',
       'squad_max_select', 'squad_min_play', 'squad_max_play',
       'ui_shirt_specific', 'sub_positions_locked', 'element_count'],
      dtype='object')

In [171]:
positions_cols_to_keep = [
    'id',
    'singular_name_short'
]

df_positions_red = df_positions[positions_cols_to_keep]

## Merge Data

In [172]:
df = df_players_red.merge(
    df_positions_red,
    how='inner',
    left_on='element_type',
    right_on='id',
    suffixes=['_player','_position']
).merge(
    df_teams_red,
    how='inner',
    left_on='team',
    right_on='id',
    suffixes=['','_team']
)

cols_to_drop = ['element_type','id_position','id']
df.drop(columns=cols_to_drop, inplace=True)

df.head()

Unnamed: 0,id_player,now_cost,team,web_name,chance_of_playing_next_round,singular_name_short,name,strength
0,1,54,1,Fábio Vieira,0.0,MID,Arsenal,5
1,2,68,1,G.Jesus,100.0,FWD,Arsenal,5
2,3,62,1,Gabriel,,DEF,Arsenal,5
3,4,83,1,Havertz,75.0,FWD,Arsenal,5
4,5,40,1,Hein,0.0,GKP,Arsenal,5


In [173]:
from tqdm.auto import tqdm
tqdm.pandas()

def get_gameweek_history(player_id):
    '''get all gameweek info for a given player_id'''
    
    # send GET request to
    url = f'https://fantasy.premierleague.com/api/element-summary/{player_id}/'
    response = requests.get(url).json()

    # extract 'history' data from response into dataframe
    df = pd.json_normalize(response['history'])
    return df

# get gameweek histories for each player
points = df['id_player'].progress_apply(get_gameweek_history)
print(type(points))

# combine results into single dataframe
points = pd.concat(df for df in points)

# join web_name
points = df[['id_player', 'web_name', 'name', 'strength','chance_of_playing_next_round']].merge(
    points,
    left_on='id_player',
    right_on='element'
).rename(columns={'name':'team_name', 'strength':'team_strength'})

points

100%|██████████| 666/666 [01:06<00:00,  9.97it/s]


<class 'pandas.core.series.Series'>


Unnamed: 0,id_player,web_name,team_name,team_strength,chance_of_playing_next_round,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out
0,1,Fábio Vieira,Arsenal,5,0.0,1,2,20,0,True,2024-08-17T14:00:00Z,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,55,0,2923,0,0
1,1,Fábio Vieira,Arsenal,5,0.0,1,11,2,0,False,2024-08-24T16:30:00Z,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,55,-790,2321,84,874
2,1,Fábio Vieira,Arsenal,5,0.0,1,21,5,0,True,2024-08-31T11:30:00Z,1,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,54,-279,2397,355,634
3,1,Fábio Vieira,Arsenal,5,0.0,1,39,18,0,False,2024-09-15T13:00:00Z,0,1,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,54,-747,1650,0,747
4,1,Fábio Vieira,Arsenal,5,0.0,1,47,13,0,False,2024-09-22T15:30:00Z,2,2,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,54,-174,1494,0,174
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4536,655,Forbs,Wolves,3,,655,41,2,0,False,2024-09-21T14:00:00Z,3,1,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,55,79,856,192,113
4537,655,Forbs,Wolves,3,,655,60,12,0,True,2024-09-28T16:30:00Z,1,2,6,38,0,0,0,1,0,0,0,1,0,0,0,-1,0.0,0.8,9.0,0.9,0,0.11,0.01,0.12,0.98,55,-92,996,89,181
4538,655,Forbs,Wolves,3,,655,63,4,1,False,2024-10-05T14:00:00Z,5,3,7,67,0,0,0,4,0,0,0,1,0,0,0,14,14.2,16.7,4.0,3.5,1,0.00,0.07,0.07,2.69,55,153,1382,318,165
4539,664,Pond,Wolves,3,,664,60,12,0,True,2024-09-28T16:30:00Z,1,2,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,40,0,0,0,0


In [174]:
max_round = points['round'].max()

points = points[points['round'] > max_round-5]

## Selecting Useful Data

In [175]:
points.columns

Index(['id_player', 'web_name', 'team_name', 'team_strength',
       'chance_of_playing_next_round', 'element', 'fixture', 'opponent_team',
       'total_points', 'was_home', 'kickoff_time', 'team_h_score',
       'team_a_score', 'round', 'minutes', 'goals_scored', 'assists',
       'clean_sheets', 'goals_conceded', 'own_goals', 'penalties_saved',
       'penalties_missed', 'yellow_cards', 'red_cards', 'saves', 'bonus',
       'bps', 'influence', 'creativity', 'threat', 'ict_index', 'starts',
       'expected_goals', 'expected_assists', 'expected_goal_involvements',
       'expected_goals_conceded', 'value', 'transfers_balance', 'selected',
       'transfers_in', 'transfers_out'],
      dtype='object')

In [176]:
points_cols_to_keep = [
    'id_player', 
    'web_name', 
    'round', 
    'expected_goals', 
    'expected_assists',
    'expected_goals_conceded', 
    'value',
    'minutes',
    'team_name',
    'chance_of_playing_next_round'
]

points_red = points[points_cols_to_keep]

points_red = points_red.merge(df[['id_player','singular_name_short']],
                              how='inner',
                              left_on='id_player',
                              right_on='id_player'
                              )

points_red

Unnamed: 0,id_player,web_name,round,expected_goals,expected_assists,expected_goals_conceded,value,minutes,team_name,chance_of_playing_next_round,singular_name_short
0,1,Fábio Vieira,3,0.00,0.00,0.00,54,0,Arsenal,0.0,MID
1,1,Fábio Vieira,4,0.00,0.00,0.00,54,0,Arsenal,0.0,MID
2,1,Fábio Vieira,5,0.00,0.00,0.00,54,0,Arsenal,0.0,MID
3,1,Fábio Vieira,6,0.00,0.00,0.00,54,0,Arsenal,0.0,MID
4,1,Fábio Vieira,7,0.00,0.00,0.00,54,0,Arsenal,0.0,MID
...,...,...,...,...,...,...,...,...,...,...,...
3293,655,Forbs,5,0.00,0.00,0.00,55,0,Wolves,,MID
3294,655,Forbs,6,0.11,0.01,0.98,55,38,Wolves,,MID
3295,655,Forbs,7,0.00,0.07,2.69,55,67,Wolves,,MID
3296,664,Pond,6,0.00,0.00,0.00,40,0,Wolves,,DEF


In [177]:
import numpy as np

points_red['exp_cs_ind'] = np.where(points_red['expected_goals_conceded'].astype(float) < 1, 1, 0)
points_red['played'] = np.where(points_red['minutes'] > 0, 1, 0)

In [178]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd

# Step 1: Fetch the website
url = f'https://understat.com/league/EPL/2024'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Step 2: Find the specific <script> tag containing the JSON data
scripts = soup.find_all('script')
json_data = None

# Step 3: Look for the script containing the teamsData
for script in scripts:
    if 'datesData' in script.text:
        # Extract the content of the script tag as text
        json_text = script.text
        # Step 4: Isolate the JSON data by splitting the string
        json_text = json_text.split("JSON.parse('")[1].split("')")[0]
        # Step 5: Decode the string by replacing escape characters
        json_text = json_text.encode('utf-8').decode('unicode_escape')
        # Step 6: Load the decoded text as a Python dictionary
        json_data = json.loads(json_text)
        break

data = []
for match in json_data:
    data.append({
        'home_team': match['h']['title'],
        'away_team': match['a']['title'],
        'home_goals': match['goals']['h'],
        'away_goals': match['goals']['a'],
        'xG_home': match['xG']['h'],
        'xG_away': match['xG']['a'],
        'datetime': match['datetime']
    })

df_dates = pd.DataFrame(data)
df_dates

Unnamed: 0,home_team,away_team,home_goals,away_goals,xG_home,xG_away,datetime
0,Manchester United,Fulham,1,0,2.04268,0.418711,2024-08-16 19:00:00
1,Ipswich,Liverpool,0,2,0.342601,3.92906,2024-08-17 11:30:00
2,Arsenal,Wolverhampton Wanderers,2,0,1.6283,0.575835,2024-08-17 14:00:00
3,Everton,Brighton,0,3,0.405325,1.79083,2024-08-17 14:00:00
4,Newcastle United,Southampton,1,0,0.433489,1.95483,2024-08-17 14:00:00
...,...,...,...,...,...,...,...
375,Southampton,Arsenal,,,,,2025-05-25 15:00:00
376,Wolverhampton Wanderers,Brentford,,,,,2025-05-25 15:00:00
377,Bournemouth,Leicester,,,,,2025-05-25 15:00:00
378,Ipswich,West Ham,,,,,2025-05-25 15:00:00


In [179]:
from datetime import datetime, timedelta
current_dt = datetime.now()
current_dt_plus_3wks = current_dt + timedelta(days=30)

df_upcoming = df_dates[(pd.to_datetime(df_dates['datetime']) >= current_dt) & (pd.to_datetime(df_dates['datetime']) < current_dt_plus_3wks)]
df_upcoming = df_upcoming.merge(df_teams[['name','strength']],
                                how='left',
                                left_on='home_team',
                                right_on='name').rename(columns={'strength':'home_strength'})

df_upcoming = df_upcoming.merge(df_teams[['name','strength']],
                                how='left',
                                left_on='away_team',
                                right_on='name').rename(columns={'strength':'away_strength'})

print(df_upcoming['home_team'].unique(), df_upcoming['away_team'].unique())

df_upcoming_home = df_upcoming.groupby(by=['home_team']).agg({
    'away_team':'nunique',
    'away_strength':'sum',
})

df_upcoming_away = df_upcoming.groupby(by=['away_team']).agg({
    'home_team':'nunique',
    'home_strength':'sum',
})

df_upcoming_combined = df_upcoming_home.merge(df_upcoming_away,
                                              how='outer',
                                              left_index=True,
                                              right_index=True
                                              )

df_upcoming_combined.fillna(0,inplace=True)

df_upcoming_combined['mean_strength'] = (df_upcoming_combined['home_strength'] + df_upcoming_combined['away_strength']) / (df_upcoming_combined['home_team'] + df_upcoming_combined['away_team'])
df_upcoming_combined['team_name'] = df_upcoming_combined.index
df_upcoming_combined = remap_team_names_df(df_upcoming_combined)
df_upcoming_combined['team_name'].unique()


['Tottenham' 'Fulham' 'Ipswich' 'Manchester United' 'Newcastle United'
 'Nottingham Forest' 'Southampton' 'Liverpool' 'Bournemouth'
 'Wolverhampton Wanderers' 'Leicester' 'Manchester City' 'Aston Villa'
 'Brentford' 'Brighton' 'Everton' 'Chelsea' 'Crystal Palace' 'West Ham'
 'Arsenal'] ['West Ham' 'Aston Villa' 'Everton' 'Brentford' 'Brighton'
 'Crystal Palace' 'Leicester' 'Chelsea' 'Arsenal' 'Manchester City'
 'Nottingham Forest' 'Southampton' 'Bournemouth' 'Ipswich'
 'Wolverhampton Wanderers' 'Fulham' 'Newcastle United' 'Tottenham'
 'Manchester United' 'Liverpool']


array(['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton',
       'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Ipswich',
       'Leicester', 'Liverpool', 'Man City', 'Man Utd', 'Newcastle',
       "Nott'm Forest", 'Southampton', 'Spurs', 'West Ham', 'Wolves'],
      dtype=object)

In [180]:
df_upcoming_combined.index

Index(['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton',
       'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Ipswich',
       'Leicester', 'Liverpool', 'Manchester City', 'Manchester United',
       'Newcastle United', 'Nottingham Forest', 'Southampton', 'Tottenham',
       'West Ham', 'Wolverhampton Wanderers'],
      dtype='object', name='home_team')

In [181]:
from datetime import datetime
current_dt = datetime.now()

df_dates = df_dates[pd.to_datetime(df_dates['datetime']) <= current_dt]
df_dates['xG_home'] = df_dates['xG_home'].astype(float)
df_dates['xG_away'] = df_dates['xG_away'].astype(float)


df_xgc_h = df_dates.groupby(by=['home_team']).agg({
    'away_team':'nunique',
    'xG_away':'mean',
})

df_xgc_a = df_dates.groupby(by=['away_team']).agg({
    'home_team':'nunique',
    'xG_home':'mean',
})

df_xgc = df_xgc_h.merge(df_xgc_a,
                        how='inner',
                        left_index=True,
                        right_index=True)

df_xgc['total_xgc'] = df_xgc['away_team'] * df_xgc['xG_away'] + df_xgc['home_team'] * df_xgc['xG_home']
df_xgc['team_xgc_per_game'] = df_xgc['total_xgc'] / (df_xgc['away_team'] + df_xgc['home_team'])
df_xgc

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dates['xG_home'] = df_dates['xG_home'].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dates['xG_away'] = df_dates['xG_away'].astype(float)


Unnamed: 0_level_0,away_team,xG_away,home_team,xG_home,total_xgc,team_xgc_per_game
home_team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arsenal,4,1.001089,3,1.621705,8.869469,1.267067
Aston Villa,4,0.808701,3,1.808507,8.660323,1.237189
Bournemouth,3,1.173888,4,1.787428,10.671377,1.524482
Brentford,4,1.092393,3,2.722123,12.535941,1.790849
Brighton,4,1.117226,3,2.160565,10.950598,1.564371
Chelsea,4,1.128437,3,1.571226,9.227425,1.318204
Crystal Palace,4,1.90249,3,1.822553,13.07762,1.868231
Everton,4,1.915442,3,2.369373,14.76989,2.109984
Fulham,3,0.879135,4,1.18273,7.368323,1.052618
Ipswich,3,2.08761,4,3.031823,18.39012,2.62716


In [182]:
df_xgc.index.unique()

Index(['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton',
       'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Ipswich',
       'Leicester', 'Liverpool', 'Manchester City', 'Manchester United',
       'Newcastle United', 'Nottingham Forest', 'Southampton', 'Tottenham',
       'West Ham', 'Wolverhampton Wanderers'],
      dtype='object', name='home_team')

In [183]:
points_red['team_name'].unique()

array(['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton',
       'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Ipswich',
       'Leicester', 'Liverpool', 'Man City', 'Man Utd', 'Newcastle',
       "Nott'm Forest", 'Southampton', 'Spurs', 'West Ham', 'Wolves'],
      dtype=object)

In [184]:
mydict = {'a':1, 'b':2}
type(mydict.keys)

builtin_function_or_method

In [185]:
def remap_team_names_row(row):
    teams_to_rename = {
        'Manchester City':'Man City',
        'Manchester United':'Man Utd',
        'Newcastle United':'Newcastle',
        'Nottingham Forest':"Nott'm Forest",
        'Tottenham':'Spurs',
        'Wolverhampton Wanderers':'Wolves',
    }

    if row['team_name'] in (teams_to_rename.keys()):
        row['team_name'] = teams_to_rename[row['team_name']]

    return row

def remap_team_names_df(df):
    df['team_name'] = df.index
    df = df.apply(remap_team_names_row, axis=1)
    return df

df_xgc = remap_team_names_df(df_xgc)
df_xgc['team_name'].unique()

array(['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton',
       'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Ipswich',
       'Leicester', 'Liverpool', 'Man City', 'Man Utd', 'Newcastle',
       "Nott'm Forest", 'Southampton', 'Spurs', 'West Ham', 'Wolves'],
      dtype=object)

In [186]:
df_xgc

Unnamed: 0_level_0,away_team,xG_away,home_team,xG_home,total_xgc,team_xgc_per_game,team_name
home_team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Arsenal,4,1.001089,3,1.621705,8.869469,1.267067,Arsenal
Aston Villa,4,0.808701,3,1.808507,8.660323,1.237189,Aston Villa
Bournemouth,3,1.173888,4,1.787428,10.671377,1.524482,Bournemouth
Brentford,4,1.092393,3,2.722123,12.535941,1.790849,Brentford
Brighton,4,1.117226,3,2.160565,10.950598,1.564371,Brighton
Chelsea,4,1.128437,3,1.571226,9.227425,1.318204,Chelsea
Crystal Palace,4,1.90249,3,1.822553,13.07762,1.868231,Crystal Palace
Everton,4,1.915442,3,2.369373,14.76989,2.109984,Everton
Fulham,3,0.879135,4,1.18273,7.368323,1.052618,Fulham
Ipswich,3,2.08761,4,3.031823,18.39012,2.62716,Ipswich


In [187]:
points_red = points_red.merge(df_xgc[['team_name','team_xgc_per_game']],
                              how='inner',
                              left_on='team_name',
                              right_on='team_name',
                              suffixes=['','_dupe'])

points_red.head()

Unnamed: 0,id_player,web_name,round,expected_goals,expected_assists,expected_goals_conceded,value,minutes,team_name,chance_of_playing_next_round,singular_name_short,exp_cs_ind,played,team_xgc_per_game
0,1,Fábio Vieira,3,0.0,0.0,0.0,54,0,Arsenal,0.0,MID,1,0,1.267067
1,1,Fábio Vieira,4,0.0,0.0,0.0,54,0,Arsenal,0.0,MID,1,0,1.267067
2,1,Fábio Vieira,5,0.0,0.0,0.0,54,0,Arsenal,0.0,MID,1,0,1.267067
3,1,Fábio Vieira,6,0.0,0.0,0.0,54,0,Arsenal,0.0,MID,1,0,1.267067
4,1,Fábio Vieira,7,0.0,0.0,0.0,54,0,Arsenal,0.0,MID,1,0,1.267067


In [188]:
cols_to_group = [
    'id_player',
    'web_name',
    'singular_name_short',
    'team_name'
]

cols_to_drop = [
    'round',
]

#points_red.drop(columns=cols_to_drop, inplace=True)

for col in points_red:
    if col in cols_to_group:
        continue
    elif col in ['minutes']:
        continue
    else:
        points_red[col] = points_red[col].astype(float)

points_grp = points_red.groupby(by=cols_to_group,as_index=False).mean()

points_grp.head()

Unnamed: 0,id_player,web_name,singular_name_short,team_name,round,expected_goals,expected_assists,expected_goals_conceded,value,minutes,chance_of_playing_next_round,exp_cs_ind,played,team_xgc_per_game
0,1,Fábio Vieira,MID,Arsenal,5.0,0.0,0.0,0.0,54.0,0.0,0.0,1.0,0.0,1.267067
1,2,G.Jesus,FWD,Arsenal,5.0,0.098,0.012,0.228,68.0,14.6,100.0,1.0,0.8,1.267067
2,3,Gabriel,DEF,Arsenal,5.0,0.27,0.022,1.14,60.6,90.0,,0.6,1.0,1.267067
3,4,Havertz,FWD,Arsenal,5.0,0.726,0.026,1.14,81.4,90.0,75.0,0.6,1.0,1.267067
4,5,Hein,GKP,Arsenal,5.0,0.0,0.0,0.0,40.0,0.0,0.0,1.0,0.0,1.267067


In [189]:
import math

def calculate_exp_points(row):
    points_by_pos = {
        'GKP':{'goal':10, 'ass':3, 'cs':4, 'gc':-0.5},
        'DEF':{'goal':6, 'ass':3, 'cs':4, 'gc':-0.5},
        'MID':{'goal':5, 'ass':3, 'cs':1, 'gc':0},
        'FWD':{'goal':4, 'ass':3, 'cs':0, 'gc':0}
    }

    if row['minutes'] > 0:
        if row['minutes'] >= 60:
            expected_mins_points = 2
        else:
            expected_mins_points = 1
    else:
        expected_mins_points = 0

    expected_goal_points = row['expected_goals'] * points_by_pos[row['singular_name_short']]['goal']
    expected_ass_points = row['expected_assists'] * points_by_pos[row['singular_name_short']]['ass']
    expected_cs_points = math.exp(-row['team_xgc_per_game']) * points_by_pos[row['singular_name_short']]['cs']
    expected_gc_points_lost = row['expected_assists'] * points_by_pos[row['singular_name_short']]['gc']
    
    row['expected_points'] = expected_mins_points + expected_goal_points + expected_ass_points + expected_cs_points + expected_gc_points_lost
    return row

points_grp = points_grp.progress_apply(calculate_exp_points, axis=1)
points_grp.head()

100%|██████████| 666/666 [00:00<00:00, 1482.39it/s]


Unnamed: 0,id_player,web_name,singular_name_short,team_name,round,expected_goals,expected_assists,expected_goals_conceded,value,minutes,chance_of_playing_next_round,exp_cs_ind,played,team_xgc_per_game,expected_points
0,1,Fábio Vieira,MID,Arsenal,5.0,0.0,0.0,0.0,54.0,0.0,0.0,1.0,0.0,1.267067,0.281657
1,2,G.Jesus,FWD,Arsenal,5.0,0.098,0.012,0.228,68.0,14.6,100.0,1.0,0.8,1.267067,1.428
2,3,Gabriel,DEF,Arsenal,5.0,0.27,0.022,1.14,60.6,90.0,,0.6,1.0,1.267067,4.801626
3,4,Havertz,FWD,Arsenal,5.0,0.726,0.026,1.14,81.4,90.0,75.0,0.6,1.0,1.267067,4.982
4,5,Hein,GKP,Arsenal,5.0,0.0,0.0,0.0,40.0,0.0,0.0,1.0,0.0,1.267067,1.126626


In [190]:
points_grp.sort_values(by='expected_points', ascending=False).head()

Unnamed: 0,id_player,web_name,singular_name_short,team_name,round,expected_goals,expected_assists,expected_goals_conceded,value,minutes,chance_of_playing_next_round,exp_cs_ind,played,team_xgc_per_game,expected_points
181,182,Palmer,MID,Chelsea,5.0,0.666,0.29,0.95,106.2,84.6,100.0,0.6,1.0,1.318204,6.467616
327,328,M.Salah,MID,Liverpool,5.0,0.554,0.2,0.8,127.0,86.4,,0.6,1.0,0.81692,5.81179
98,99,Mbeumo,MID,Brentford,5.0,0.546,0.282,1.86,71.8,90.0,,0.2,1.0,1.790849,5.742819
350,351,Haaland,FWD,Man City,5.0,0.864,0.066,1.302,152.6,90.0,100.0,0.6,1.0,1.275054,5.654
179,180,N.Jackson,FWD,Chelsea,5.0,0.856,0.032,0.824,77.0,77.8,,0.8,1.0,1.318204,5.52


# Refining Process

Starting with `points` dataframe

In [197]:
points_cols_to_keep = [
    'id_player', 
    'expected_goals', 
    'expected_assists',
    'expected_goals_conceded', 
    'minutes',
    'value',
    'team_name',
    'chance_of_playing_next_round'
]

points_red = points[points_cols_to_keep]
points_temp = points_red.copy()

points_red = points_red[points_red['chance_of_playing_next_round'] >= 75]

points_red = points_red.merge(df[['id_player','singular_name_short']],
                              how='inner',
                              left_on='id_player',
                              right_on='id_player'
                              )


points_red = points_red.merge(df_xgc[['team_name','team_xgc_per_game']],
                              how='inner',
                              left_on='team_name',
                              right_on='team_name',
                              suffixes=['','_xgc'])

points_red = points_red.merge(df_upcoming_combined[['team_name','mean_strength']],
                              how='inner',
                              left_on='team_name',
                              right_on='team_name',
                              suffixes=['','_upcoming'])

cols_to_group = [
    'id_player',
    'singular_name_short',
]

cols_to_drop = [
    'value',
    'team_name'
]

points_red.drop(columns=cols_to_drop, inplace=True)

for col in points_red:
    if col in cols_to_group:
        continue
    elif col in ['minutes']:
        continue
    else:
        points_red[col] = points_red[col].astype(float)

points_grp = points_red.groupby(by=cols_to_group,as_index=False).mean()

import math

def calculate_exp_points(row):
    points_by_pos = {
        'GKP':{'goal':10, 'ass':3, 'cs':4, 'gc':-0.5},
        'DEF':{'goal':6, 'ass':3, 'cs':4, 'gc':-0.5},
        'MID':{'goal':5, 'ass':3, 'cs':1, 'gc':0},
        'FWD':{'goal':4, 'ass':3, 'cs':0, 'gc':0}
    }

    # if row['minutes'] > 0:
    #     if row['minutes'] >= 60:
    #         expected_mins_points = 2
    #     else:
    #         expected_mins_points = 1
    # else:
    #     row['expected_points'] = 0
    #     return row

    minutes_multiplier = row['minutes'] / 90


    expected_goal_points = row['expected_goals'] * points_by_pos[row['singular_name_short']]['goal']
    expected_ass_points = row['expected_assists'] * points_by_pos[row['singular_name_short']]['ass']
    expected_cs_points = math.exp(-row['team_xgc_per_game']) * points_by_pos[row['singular_name_short']]['cs']
    expected_gc_points_lost = row['expected_assists'] * points_by_pos[row['singular_name_short']]['gc']

    if row['mean_strength'] >= 3.5:
        fixture_multiplier = 0.9
    elif row['mean_strength'] <= 2.5:
        fixture_multiplier = 1.1
    else:
        fixture_multiplier = 1
    
    row['expected_points'] = fixture_multiplier * minutes_multiplier * (expected_goal_points + expected_ass_points + expected_cs_points + expected_gc_points_lost)
    return row

df_exp_points = points_grp.progress_apply(calculate_exp_points, axis=1)
df_exp_points.sort_values(by='expected_points',ascending=False).head()

100%|██████████| 124/124 [00:00<00:00, 1403.49it/s]


Unnamed: 0,id_player,singular_name_short,expected_goals,expected_assists,expected_goals_conceded,minutes,chance_of_playing_next_round,team_xgc_per_game,mean_strength,expected_points
39,182,MID,0.666,0.29,0.95,84.6,100.0,1.318204,2.25,4.619515
73,351,FWD,0.864,0.066,1.302,90.0,100.0,1.275054,2.0,4.0194
5,17,MID,0.32,0.536,0.81,80.0,100.0,1.267067,2.5,3.412109
1,4,FWD,0.726,0.026,1.14,90.0,75.0,1.267067,2.5,3.2802
14,58,FWD,0.538,0.066,0.626,81.2,100.0,1.237189,2.5,2.332244


In [198]:
df_exp_points = df_exp_points.merge(df_players[['id','now_cost','team','web_name']],
                                    how='left',
                                    left_on='id_player',
                                    right_on='id',).merge(
                                        df_teams[['id','name']],
                                        how='left',
                                        left_on='team',
                                        right_on='id'
                                    )

df_exp_points

Unnamed: 0,id_player,singular_name_short,expected_goals,expected_assists,expected_goals_conceded,minutes,chance_of_playing_next_round,team_xgc_per_game,mean_strength,expected_points,id_x,now_cost,team,web_name,id_y,name
0,2,FWD,0.098,0.012,0.228,14.6,100.0,1.267067,2.50,0.076374,2,68,1,G.Jesus,1,Arsenal
1,4,FWD,0.726,0.026,1.140,90.0,75.0,1.267067,2.50,3.280200,4,83,1,Havertz,1,Arsenal
2,6,DEF,0.052,0.030,0.858,69.8,75.0,1.267067,2.50,1.291291,6,55,1,J.Timber,1,Arsenal
3,15,GKP,0.000,0.002,1.140,90.0,100.0,1.267067,2.50,1.244789,15,56,1,Raya,1,Arsenal
4,16,MID,0.046,0.092,0.642,63.6,100.0,1.267067,2.50,0.612272,16,63,1,Rice,1,Arsenal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119,595,MID,0.000,0.000,0.000,0.6,100.0,1.564371,1.00,0.001534,595,54,5,Gruda,5,Brighton
120,609,FWD,0.000,0.000,0.000,0.0,100.0,1.121329,2.75,0.000000,609,45,18,Lankshear,18,Spurs
121,610,MID,0.008,0.000,0.038,4.2,100.0,1.121329,2.75,0.017073,610,45,18,Moore,18,Spurs
122,628,DEF,0.006,0.042,1.024,27.6,100.0,1.564371,1.00,0.329871,628,45,5,F.Kadıoğlu,5,Brighton


# Optimising Team Choice

In [199]:
import pandas as pd
from pulp import LpProblem, LpVariable, LpMaximize, lpSum, LpStatus, PULP_CBC_CMD

def select_fpl_squad(df,
                     metric,
                     num_gks=2,
                     num_defs=5,
                     num_mids=5,
                     num_atts=3,
                     max_value=1000):
    """
    Selects the optimal 15-man squad based on the given constraints.
    
    Parameters:
    df (pd.DataFrame): DataFrame containing player data with columns 'Player', 'value', 'P', 'Team', 'Pos'.
    
    Returns:
    pd.DataFrame: DataFrame containing the selected squad.
    """
    # Reset index to ensure it ranges from 0 to N-1
    df = df.reset_index(drop=True)
    df.set_index('id_player',inplace=True)
    player_ids = df.index

    # Convert 'value' and 'P' columns to numeric
    df['now_cost'] = pd.to_numeric(df['now_cost'])
    df[metric] = pd.to_numeric(df[metric])

    # Define the LP problem
    prob = LpProblem("FPL_Squad_Selection", LpMaximize)

    # Define binary decision variables for each player
    x = LpVariable.dicts('x', player_ids, cat='Binary')

    # Objective function: maximize total points
    prob += lpSum(df.loc[i, metric] * x[i] for i in player_ids), "Total_Points"

    # Total value constraint
    prob += lpSum(df.loc[i, 'now_cost'] * x[i] for i in player_ids) <= max_value, "Total_value"

    # Position constraints
    positions = {'GKP': num_gks, 'DEF': num_defs, 'MID': num_mids, 'FWD': num_atts}
    for pos, count in positions.items():
        pos_ids = df[df['singular_name_short'] == pos].index.tolist()
        prob += lpSum(x[i] for i in pos_ids) == count, f"Total_{pos.upper()}"

    # Team constraints: no more than 3 players from each team
    for team in df['team'].unique():
        team_ids = df[df['team'] == team].index.tolist()
        prob += lpSum(x[i] for i in team_ids) <= 3, f"Team_{team}"

    # Add constraints to ensure each player is selected at most once
    for i in player_ids:
        prob += x[i] <= 1, f"Select_{i}_At_Most_Once"

    # Solve the problem
    prob.solve(PULP_CBC_CMD(msg=0))

    # Check if an optimal solution was found
    if LpStatus[prob.status] != 'Optimal':
        print("No optimal solution found.")
        return None

    # Get the selected players
    selected_ids = [i for i in player_ids if x[i].varValue == 1]
    selected_squad = df.loc[selected_ids].reset_index(drop=True)

    return selected_squad

In [200]:
metric = 'expected_points'
optimal_squad_no_subs = select_fpl_squad(df_exp_points,metric)

In [201]:
cols_to_keep = [
    'web_name',
    'name',
    'singular_name_short',
    'expected_points',
    'now_cost',
    'minutes',
    'expected_goals',
    'expected_assists',
    'expected_goals_conceded'
]

optimal_squad_no_subs['singular_name_short'] = pd.Categorical(optimal_squad_no_subs['singular_name_short'], ['GKP','DEF','MID','FWD'])
optimal_squad_no_subs[cols_to_keep].sort_values(by=['singular_name_short','name'])

Unnamed: 0,web_name,name,singular_name_short,expected_points,now_cost,minutes,expected_goals,expected_assists,expected_goals_conceded
1,Raya,Arsenal,GKP,1.244789,56,90.0,0.0,0.002,1.14
5,Verbruggen,Brighton,GKP,0.920566,45,90.0,0.0,0.0,1.898
3,Konsa,Aston Villa,DEF,1.303921,45,74.2,0.042,0.01,0.638
10,Aina,Nott'm Forest,DEF,1.44268,45,90.0,0.004,0.104,1.244
11,Pedro Porro,Spurs,DEF,2.068503,55,89.4,0.084,0.11,1.176
12,Udogie,Spurs,DEF,1.492547,49,81.0,0.05,0.022,1.052
13,Aït-Nouri,Wolves,DEF,1.724971,44,82.8,0.182,0.062,1.942
2,Saka,Arsenal,MID,3.412109,101,80.0,0.32,0.536,0.81
4,O.Dango,Bournemouth,MID,0.711364,50,34.8,0.226,0.164,0.356
6,Palmer,Chelsea,MID,4.619515,108,84.6,0.666,0.29,0.95


In [202]:
optimal_squad_3_subs = select_fpl_squad(df_exp_points,
                                        metric,
                                        num_gks=1,
                                        num_defs=4,
                                        num_mids=4,
                                        num_atts=3,
                                        max_value=877)

optimal_squad_3_subs['singular_name_short'] = pd.Categorical(optimal_squad_3_subs['singular_name_short'], ['GKP','DEF','MID','FWD'])
optimal_squad_3_subs[cols_to_keep].sort_values(by=['singular_name_short','name'])

Unnamed: 0,web_name,name,singular_name_short,expected_points,now_cost,minutes,expected_goals,expected_assists,expected_goals_conceded
1,Raya,Arsenal,GKP,1.244789,56,90.0,0.0,0.002,1.14
7,Aina,Nott'm Forest,DEF,1.44268,45,90.0,0.004,0.104,1.244
8,Pedro Porro,Spurs,DEF,2.068503,55,89.4,0.084,0.11,1.176
9,Udogie,Spurs,DEF,1.492547,49,81.0,0.05,0.022,1.052
10,Aït-Nouri,Wolves,DEF,1.724971,44,82.8,0.182,0.062,1.942
2,Saka,Arsenal,MID,3.412109,101,80.0,0.32,0.536,0.81
4,Palmer,Chelsea,MID,4.619515,108,84.6,0.666,0.29,0.95
6,Kovačić,Man City,MID,1.116465,55,76.8,0.11,0.12,1.294
11,Mario Jr.,Wolves,MID,1.338017,50,90.0,0.176,0.074,2.154
0,Havertz,Arsenal,FWD,3.2802,83,90.0,0.726,0.026,1.14


In [211]:
from pprint import pprint

print(tabulate(df, headers='keys', tablefmt='psql'))

+-----+-------------+------------+--------+------------------+--------------------------------+-----------------------+----------------+------------+
|     |   id_player |   now_cost |   team | web_name         |   chance_of_playing_next_round | singular_name_short   | name           |   strength |
|-----+-------------+------------+--------+------------------+--------------------------------+-----------------------+----------------+------------|
|   0 |           1 |         54 |      1 | Fábio Vieira     |                              0 | MID                   | Arsenal        |          5 |
|   1 |           2 |         68 |      1 | G.Jesus          |                            100 | FWD                   | Arsenal        |          5 |
|   2 |           3 |         62 |      1 | Gabriel          |                            nan | DEF                   | Arsenal        |          5 |
|   3 |           4 |         83 |      1 | Havertz          |                             75 | FWD 

In [205]:
pip install tabulate

Collecting tabulate
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.9.0
Note: you may need to restart the kernel to use updated packages.


In [219]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
from datetime import datetime, timedelta
import math
from pulp import LpProblem, LpVariable, LpMaximize, lpSum, LpStatus, PULP_CBC_CMD
from tabulate import tabulate

def get_gameweek_history(player_id):
    url = f'https://fantasy.premierleague.com/api/element-summary/{player_id}/'
    response = requests.get(url).json()
    df_history = pd.json_normalize(response['history'])
    return df_history

def get_fpl_data():
    url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
    response = requests.get(url).json()
    
    df_players = pd.json_normalize(response['elements'])
    df_teams = pd.json_normalize(response['teams'])
    df_positions = pd.json_normalize(response['element_types'])

    df_master = df_players.merge(
        df_positions,
        how='inner',
        left_on='element_type',
        right_on='id',
        suffixes=['_player','_position']
    ).merge(
        df_teams,
        how='inner',
        left_on='team',
        right_on='id',
        suffixes=['','_team']
    )

    df_points = df_master['id_player'].apply(get_gameweek_history)
    df_points = pd.concat(df for df in df_points)

    
    df_points = df_master[['id_player', 'web_name', 'name', 'strength']].merge(
        df_points,
        left_on='id_player',
        right_on='element'
    )
    return df_players, df_teams, df_points

def get_date_data():
    # Step 1: Fetch the website
    url = f'https://understat.com/league/EPL/2024'
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Step 2: Find the specific <script> tag containing the JSON data
    scripts = soup.find_all('script')
    json_data = None

    # Step 3: Look for the script containing the teamsData
    for script in scripts:
        if 'datesData' in script.text:
            # Extract the content of the script tag as text
            json_text = script.text
            # Step 4: Isolate the JSON data by splitting the string
            json_text = json_text.split("JSON.parse('")[1].split("')")[0]
            # Step 5: Decode the string by replacing escape characters
            json_text = json_text.encode('utf-8').decode('unicode_escape')
            # Step 6: Load the decoded text as a Python dictionary
            json_data = json.loads(json_text)
            break

    data = []
    for match in json_data:
        data.append({
            'home_team': match['h']['title'],
            'away_team': match['a']['title'],
            'home_goals': match['goals']['h'],
            'away_goals': match['goals']['a'],
            'xG_home': match['xG']['h'],
            'xG_away': match['xG']['a'],
            'datetime': match['datetime']
        })

    df_dates = pd.DataFrame(data)
    return df_dates

def _remap_team_names_row(row):
    teams_to_rename = {
        'Manchester City':'Man City',
        'Manchester United':'Man Utd',
        'Newcastle United':'Newcastle',
        'Nottingham Forest':"Nott'm Forest",
        'Tottenham':'Spurs',
        'Wolverhampton Wanderers':'Wolves',
    }

    if row['team_name'] in (teams_to_rename.keys()):
        row['team_name'] = teams_to_rename[row['team_name']]

    return row

def _remap_team_names_df(df):
    df['team_name'] = df.index
    df = df.apply(_remap_team_names_row, axis=1)
    return df

def calc_upcoming_fixture_difficulty(df_dates, df_teams):
    current_dt = datetime.now()
    current_dt_plus_3wks = current_dt + timedelta(days=30)

    df_upcoming = df_dates[(pd.to_datetime(df_dates['datetime']) >= current_dt) & (pd.to_datetime(df_dates['datetime']) < current_dt_plus_3wks)]
    
    df_upcoming = df_upcoming.merge(df_teams[['name','strength']],
                                    how='left',
                                    left_on='home_team',
                                    right_on='name').rename(columns={'strength':'home_strength'})

    df_upcoming = df_upcoming.merge(df_teams[['name','strength']],
                                    how='left',
                                    left_on='away_team',
                                    right_on='name').rename(columns={'strength':'away_strength'})

    df_upcoming_home = df_upcoming.groupby(by=['home_team']).agg({
        'away_team':'nunique',
        'away_strength':'sum',
    })

    df_upcoming_away = df_upcoming.groupby(by=['away_team']).agg({
        'home_team':'nunique',
        'home_strength':'sum',
    })

    df_upcoming_combined = df_upcoming_home.merge(df_upcoming_away,
                                         how='outer',
                                         left_index=True,
                                         right_index=True
                                         ).fillna(0,inplace=True)

    df_upcoming_combined['mean_strength'] = (df_upcoming_combined['home_strength'] + df_upcoming_combined['away_strength']) / (df_upcoming_combined['home_team'] + df_upcoming_combined['away_team'])
    df_upcoming_combined['team_name'] = df_upcoming_combined.index
    df_upcoming_combined = _remap_team_names_df(df_upcoming_combined)
    return df_upcoming_combined

def calc_exp_goals_conceded(df_dates):
    current_dt = datetime.now()

    df_dates = df_dates[pd.to_datetime(df_dates['datetime']) <= current_dt]
    df_dates['xG_home'] = df_dates['xG_home'].astype(float)
    df_dates['xG_away'] = df_dates['xG_away'].astype(float)

    df_xgc_h = df_dates.groupby(by=['home_team']).agg({
        'away_team':'nunique',
        'xG_away':'mean',
    })

    df_xgc_a = df_dates.groupby(by=['away_team']).agg({
        'home_team':'nunique',
        'xG_home':'mean',
    })

    df_xgc = df_xgc_h.merge(df_xgc_a,
                            how='inner',
                            left_index=True,
                            right_index=True)

    df_xgc['total_xgc'] = df_xgc['away_team'] * df_xgc['xG_away'] + df_xgc['home_team'] * df_xgc['xG_home']
    df_xgc['team_xgc_per_game'] = df_xgc['total_xgc'] / (df_xgc['away_team'] + df_xgc['home_team'])
    df_xgc = _remap_team_names_df(df_xgc)
    return df_xgc

def group_points_data(df_points, df_xgc, df_upcoming):
    points_cols_to_keep = [
        'id_player', 
        'expected_goals', 
        'expected_assists',
        'expected_goals_conceded', 
        'minutes',
        'value',
        'team_name',
        'chance_of_playing_next_round'
    ]

    df_points = df_points[points_cols_to_keep]
    df_points = df_points[df_points['chance_of_playing_next_round'] >= 75]

    df_points = df_points.merge(df_xgc[['team_name','team_xgc_per_game']],
                                how='inner',
                                left_on='team_name',
                                right_on='team_name',
                                suffixes=['','_xgc'])

    df_points = df_points.merge(df_upcoming[['team_name','mean_strength']],
                                how='inner',
                                left_on='team_name',
                                right_on='team_name',
                                suffixes=['','_upcoming'])

    cols_to_group = [
        'id_player',
        'singular_name_short',
    ]

    cols_to_drop = [
        'value',
        'team_name'
    ]

    df_points.drop(columns=cols_to_drop, inplace=True)

    for col in df_points:
        if col in cols_to_group:
            continue
        elif col in ['minutes']:
            continue
        else:
            df_points[col] = df_points[col].astype(float)

    df_points_grp = df_points.groupby(by=cols_to_group,as_index=False).mean()
    return df_points_grp

def calculate_exp_points(row):
    points_by_pos = {
        'GKP':{'goal':10, 'ass':3, 'cs':4, 'gc':-0.5},
        'DEF':{'goal':6, 'ass':3, 'cs':4, 'gc':-0.5},
        'MID':{'goal':5, 'ass':3, 'cs':1, 'gc':0},
        'FWD':{'goal':4, 'ass':3, 'cs':0, 'gc':0}
    }

    # if row['minutes'] > 0:
    #     if row['minutes'] >= 60:
    #         expected_mins_points = 2
    #     else:
    #         expected_mins_points = 1
    # else:
    #     row['expected_points'] = 0
    #     return row

    minutes_multiplier = row['minutes'] / 90


    expected_goal_points = row['expected_goals'] * points_by_pos[row['singular_name_short']]['goal']
    expected_ass_points = row['expected_assists'] * points_by_pos[row['singular_name_short']]['ass']
    expected_cs_points = math.exp(-row['team_xgc_per_game']) * points_by_pos[row['singular_name_short']]['cs']
    expected_gc_points_lost = row['expected_assists'] * points_by_pos[row['singular_name_short']]['gc']

    if row['mean_strength'] >= 3.5:
        fixture_multiplier = 0.9
    elif row['mean_strength'] <= 2.5:
        fixture_multiplier = 1.1
    else:
        fixture_multiplier = 1
    
    row['expected_points'] = fixture_multiplier * minutes_multiplier * (expected_goal_points + expected_ass_points + expected_cs_points + expected_gc_points_lost)
    return row

def apply_exp_goals_calcs(df_points):
    df_exp_points = df_points.apply(calculate_exp_points, axis=1)
    return df_exp_points

def add_additional_cols(df_exp_points, df_players, df_teams):
    df_exp_points = df_exp_points.merge(
                                        df_players[['id','now_cost','team','web_name']],
                                        how='left',
                                        left_on='id_player',
                                        right_on='id',
                                        ).merge(
                                        df_teams[['id','name']],
                                        how='left',
                                        left_on='team',
                                        right_on='id'
                                        )
    return df_exp_points

def select_fpl_squad(df,
                     metric,
                     num_gks=2,
                     num_defs=5,
                     num_mids=5,
                     num_atts=3,
                     max_value=1000):
    """
    Selects the optimal 15-man squad based on the given constraints.
    
    Parameters:
    df (pd.DataFrame): DataFrame containing player data with columns 'Player', 'value', 'P', 'Team', 'Pos'.
    
    Returns:
    pd.DataFrame: DataFrame containing the selected squad.
    """
    # Reset index to ensure it ranges from 0 to N-1
    df = df.reset_index(drop=True)
    df.set_index('id_player',inplace=True)
    player_ids = df.index

    # Convert 'value' and 'P' columns to numeric
    df['now_cost'] = pd.to_numeric(df['now_cost'])
    df[metric] = pd.to_numeric(df[metric])

    # Define the LP problem
    prob = LpProblem("FPL_Squad_Selection", LpMaximize)

    # Define binary decision variables for each player
    x = LpVariable.dicts('x', player_ids, cat='Binary')

    # Objective function: maximize total points
    prob += lpSum(df.loc[i, metric] * x[i] for i in player_ids), "Total_Points"

    # Total value constraint
    prob += lpSum(df.loc[i, 'now_cost'] * x[i] for i in player_ids) <= max_value, "Total_value"

    # Position constraints
    positions = {'GKP': num_gks, 'DEF': num_defs, 'MID': num_mids, 'FWD': num_atts}
    for pos, count in positions.items():
        pos_ids = df[df['singular_name_short'] == pos].index.tolist()
        prob += lpSum(x[i] for i in pos_ids) == count, f"Total_{pos.upper()}"

    # Team constraints: no more than 3 players from each team
    for team in df['team'].unique():
        team_ids = df[df['team'] == team].index.tolist()
        prob += lpSum(x[i] for i in team_ids) <= 3, f"Team_{team}"

    # Add constraints to ensure each player is selected at most once
    for i in player_ids:
        prob += x[i] <= 1, f"Select_{i}_At_Most_Once"

    # Solve the problem
    prob.solve(PULP_CBC_CMD(msg=0))

    # Check if an optimal solution was found
    if LpStatus[prob.status] != 'Optimal':
        print("No optimal solution found.")
        return None

    # Get the selected players
    selected_ids = [i for i in player_ids if x[i].varValue == 1]
    selected_squad = df.loc[selected_ids].reset_index(drop=True)

    return selected_squad

def save_selected_squad(squad):
    cols_to_keep = [
        'web_name',
        'name',
        'singular_name_short',
        'now_cost',
        'minutes',
        'expected_points',        
        'expected_goals',
        'expected_assists',
        'expected_goals_conceded'
    ]

    cols_to_rename = {
        'web_name':'player_name',
        'name':'team_name',
        'singular_name_short':'position',
        'now_cost':'value',
        'minutes':'avg_minutes',
    }

    squad['singular_name_short'] = pd.Categorical(squad['singular_name_short'], ['GKP','DEF','MID','FWD'])
    squad = squad[cols_to_keep].rename(columns=cols_to_rename)
    squad = squad.sort_values(by=['singular_name_short','name'])
    print(tabulate(squad, 
                   headers='keys', 
                   tablefmt='psql'
                   ))
    
    current_date = datetime.now().date()
    squad.to_csv(f'squad_{current_date}.csv')

def main():
    df_players, df_teams, df_points = get_fpl_data()
    df_dates = get_date_data()
    
    df_upcoming = calc_upcoming_fixture_difficulty(df_dates, df_teams)
    df_xgc = calc_exp_goals_conceded(df_dates)
    
    df_points_grp = group_points_data(df_points, df_xgc, df_upcoming)
    df_exp_points = apply_exp_goals_calcs(df_points_grp)
    df_exp_points = add_additional_cols(df_exp_points, df_players, df_teams)

    metric = 'expected_points'
    selected_squad = select_fpl_squad(df_exp_points, metric)
    save_selected_squad(selected_squad)

main()



TypeError: 'NoneType' object is not subscriptable

In [217]:
print(f"{datetime.now().date()}_date")

2024-10-11_date
