# Initial Data Search

## Setup

In [1]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import subplots
import numpy as np

## FPL 2023/2024 Data

### Reading Data

In [2]:
gw_url = "https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/2023-24/gws/merged_gw.csv"

df_og = pd.read_csv(gw_url)
print(df_og.shape)
print(df_og.columns)

# Saving a original df
df_og["value"] = df_og["value"]/ 10
df = df_og.copy()

(29725, 41)
Index(['name', 'position', 'team', 'xP', 'assists', 'bonus', 'bps',
       'clean_sheets', 'creativity', 'element', 'expected_assists',
       'expected_goal_involvements', 'expected_goals',
       'expected_goals_conceded', 'fixture', 'goals_conceded', 'goals_scored',
       'ict_index', 'influence', 'kickoff_time', 'minutes', 'opponent_team',
       'own_goals', 'penalties_missed', 'penalties_saved', 'red_cards',
       'round', 'saves', 'selected', 'starts', 'team_a_score', 'team_h_score',
       'threat', 'total_points', 'transfers_balance', 'transfers_in',
       'transfers_out', 'value', 'was_home', 'yellow_cards', 'GW'],
      dtype='object')


In [3]:
club_url = "https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/master_team_list.csv"
df_club = pd.read_csv(club_url)

df = df.merge(df_club[df_club["season"] == "2023-24"], left_on= "opponent_team", right_on= "team", suffixes=["", "_y"])
df.head()

Unnamed: 0,name,position,team,xP,assists,bonus,bps,clean_sheets,creativity,element,...,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,GW,season,team_y,team_name
0,Femi Seriki,DEF,Sheffield Utd,0.5,0,0,0,0,0.0,653,...,0,0,0,4.0,True,0,1,2023-24,8,Crystal Palace
1,Jack Hinshelwood,MID,Brighton,1.5,0,0,0,0,0.0,621,...,0,0,0,4.5,True,0,1,2023-24,12,Luton
2,Jadon Sancho,MID,Man Utd,3.0,0,0,4,0,11.3,397,...,0,0,0,7.0,True,0,1,2023-24,20,Wolves
3,Rhys Norrington-Davies,DEF,Sheffield Utd,0.1,0,0,0,0,0.0,487,...,0,0,0,4.0,True,0,1,2023-24,8,Crystal Palace
4,Vitaly Janelt,MID,Brentford,2.1,0,0,6,0,11.5,105,...,0,0,0,5.5,True,0,1,2023-24,18,Spurs


In [4]:
df.drop(columns= ["season", "team_y", "opponent_team"], inplace= True)
df.rename(columns= {"team_name": "opponent_team"}, inplace= True)

df.head()

Unnamed: 0,name,position,team,xP,assists,bonus,bps,clean_sheets,creativity,element,...,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,GW,opponent_team
0,Femi Seriki,DEF,Sheffield Utd,0.5,0,0,0,0,0.0,653,...,0.0,0,0,0,0,4.0,True,0,1,Crystal Palace
1,Jack Hinshelwood,MID,Brighton,1.5,0,0,0,0,0.0,621,...,0.0,0,0,0,0,4.5,True,0,1,Luton
2,Jadon Sancho,MID,Man Utd,3.0,0,0,4,0,11.3,397,...,8.0,1,0,0,0,7.0,True,0,1,Wolves
3,Rhys Norrington-Davies,DEF,Sheffield Utd,0.1,0,0,0,0,0.0,487,...,0.0,0,0,0,0,4.0,True,0,1,Crystal Palace
4,Vitaly Janelt,MID,Brentford,2.1,0,0,6,0,11.5,105,...,17.0,2,0,0,0,5.5,True,0,1,Spurs


### Data Exploration

In [5]:
df.columns

Index(['name', 'position', 'team', 'xP', 'assists', 'bonus', 'bps',
       'clean_sheets', 'creativity', 'element', 'expected_assists',
       'expected_goal_involvements', 'expected_goals',
       'expected_goals_conceded', 'fixture', 'goals_conceded', 'goals_scored',
       'ict_index', 'influence', 'kickoff_time', 'minutes', 'own_goals',
       'penalties_missed', 'penalties_saved', 'red_cards', 'round', 'saves',
       'selected', 'starts', 'team_a_score', 'team_h_score', 'threat',
       'total_points', 'transfers_balance', 'transfers_in', 'transfers_out',
       'value', 'was_home', 'yellow_cards', 'GW', 'opponent_team'],
      dtype='object')

In [6]:
cols_to_drop = ['transfers_balance', 'transfers_in',
       'transfers_out', 'value', 'kickoff_time', 'round', 'fixture', 'round', 'GW']

# cols_look = ['fixture', "kickoff_time", "opponent_team", "round", "selected", "starts", 'team_a_score', 'team_h_score',
#        'threat', 'total_points', "GW"]

# df[df["name"] == "Cole Palmer"][cols_look]

df.drop(columns= cols_to_drop, inplace= True)


In [7]:
cole = df["name"] == "Cole Palmer"
chels = df["team"] == "Chelsea"

df[cole & chels].head()

Unnamed: 0,name,position,team,xP,assists,bonus,bps,clean_sheets,creativity,element,...,saves,selected,starts,team_a_score,team_h_score,threat,total_points,was_home,yellow_cards,opponent_team
2505,Cole Palmer,MID,Chelsea,0.5,0,0,7,0,17.3,362,...,0,16665,0,1,0,4.0,1,True,0,Nott'm Forest
3218,Cole Palmer,MID,Chelsea,0.7,0,0,4,0,11.6,362,...,0,21699,0,0,0,19.0,1,False,0,Bournemouth
3936,Cole Palmer,MID,Chelsea,0.8,0,0,5,0,22.8,362,...,0,21434,0,1,0,7.0,1,True,0,Aston Villa
4723,Cole Palmer,MID,Chelsea,2.3,1,0,17,1,22.4,362,...,0,25621,1,2,0,9.0,5,False,1,Fulham
5465,Cole Palmer,MID,Chelsea,6.5,1,2,36,0,26.3,362,...,0,63575,1,4,1,28.0,12,False,0,Burnley


In [8]:
df.columns

Index(['name', 'position', 'team', 'xP', 'assists', 'bonus', 'bps',
       'clean_sheets', 'creativity', 'element', 'expected_assists',
       'expected_goal_involvements', 'expected_goals',
       'expected_goals_conceded', 'goals_conceded', 'goals_scored',
       'ict_index', 'influence', 'minutes', 'own_goals', 'penalties_missed',
       'penalties_saved', 'red_cards', 'saves', 'selected', 'starts',
       'team_a_score', 'team_h_score', 'threat', 'total_points', 'was_home',
       'yellow_cards', 'opponent_team'],
      dtype='object')

### Plotting

In [9]:
mid_fwd_df = df[df["position"].isin(["MID", "FWD"])]

att_returns_player_grouped_df = mid_fwd_df.groupby("name")[['expected_assists',
       'expected_goal_involvements', 'expected_goals']].sum()

att_returns_player_grouped_df.reset_index(inplace= True)
att_returns_player_grouped_df = att_returns_player_grouped_df.merge(df_og[df_og["GW"] == df_og["GW"].max()][["name", "value"]], how= "left", on= "name")

att_returns_player_grouped_df["egi_per_val"] = att_returns_player_grouped_df["expected_goal_involvements"] / att_returns_player_grouped_df["value"]
att_returns_player_grouped_df.head()

Unnamed: 0,name,expected_assists,expected_goal_involvements,expected_goals,value,egi_per_val
0,Aaron Connolly,0.0,0.0,0.0,4.5,0.0
1,Aaron Ramsey,0.48,0.82,0.35,5.0,0.164
2,Abdoulaye Doucouré,1.51,10.21,8.7,5.5,1.856364
3,Adam Lallana,1.6,2.43,0.83,4.9,0.495918
4,Adam Wharton,1.5,1.87,0.38,5.0,0.374


In [10]:
PRICE_MAX = 7.5

plot_df = att_returns_player_grouped_df[att_returns_player_grouped_df["value"] <= PRICE_MAX]

px.scatter(plot_df, x= "expected_goals", y= "expected_assists",
           hover_name= "name", color= "egi_per_val", color_continuous_scale=px.colors.sequential.Viridis)

## FPL 2024/2025 Data

### Reading Data

In [11]:
gw_url = "https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/2024-25/gws/merged_gw.csv"

df_og = pd.read_csv(gw_url)
print(df_og.shape)
print(df_og.columns)

# Saving a original df
df_og["value"] = df_og["value"]/ 10
df = df_og.copy()

(6552, 41)
Index(['name', 'position', 'team', 'xP', 'assists', 'bonus', 'bps',
       'clean_sheets', 'creativity', 'element', 'expected_assists',
       'expected_goal_involvements', 'expected_goals',
       'expected_goals_conceded', 'fixture', 'goals_conceded', 'goals_scored',
       'ict_index', 'influence', 'kickoff_time', 'minutes', 'opponent_team',
       'own_goals', 'penalties_missed', 'penalties_saved', 'red_cards',
       'round', 'saves', 'selected', 'starts', 'team_a_score', 'team_h_score',
       'threat', 'total_points', 'transfers_balance', 'transfers_in',
       'transfers_out', 'value', 'was_home', 'yellow_cards', 'GW'],
      dtype='object')


In [12]:
# club_url = "https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/2024-25/teams.csv"
# df_club = pd.read_csv(club_url)

# df = df.merge(df_club[df_club["season"] == "2023-24"], left_on= "opponent_team", right_on= "team", suffixes=["", "_y"])
# df.head()


In [13]:
# df.drop(columns= ["season", "team_y", "opponent_team"], inplace= True)
# df.rename(columns= {"team_name": "opponent_team"}, inplace= True)

# df.head()

### Data Exploration

In [14]:
df.columns

Index(['name', 'position', 'team', 'xP', 'assists', 'bonus', 'bps',
       'clean_sheets', 'creativity', 'element', 'expected_assists',
       'expected_goal_involvements', 'expected_goals',
       'expected_goals_conceded', 'fixture', 'goals_conceded', 'goals_scored',
       'ict_index', 'influence', 'kickoff_time', 'minutes', 'opponent_team',
       'own_goals', 'penalties_missed', 'penalties_saved', 'red_cards',
       'round', 'saves', 'selected', 'starts', 'team_a_score', 'team_h_score',
       'threat', 'total_points', 'transfers_balance', 'transfers_in',
       'transfers_out', 'value', 'was_home', 'yellow_cards', 'GW'],
      dtype='object')

In [15]:
cols_to_drop = ['transfers_balance', 'transfers_in',
       'transfers_out', 'value', 'kickoff_time', 'round', 'fixture', 'round', 'GW']

# cols_look = ['fixture', "kickoff_time", "opponent_team", "round", "selected", "starts", 'team_a_score', 'team_h_score',
#        'threat', 'total_points', "GW"]

# df[df["name"] == "Cole Palmer"][cols_look]

df.drop(columns= cols_to_drop, inplace= True)


In [16]:
cole = df["name"] == "Cole Palmer"
chels = df["team"] == "Chelsea"

df[cole & chels].head()

Unnamed: 0,name,position,team,xP,assists,bonus,bps,clean_sheets,creativity,element,...,red_cards,saves,selected,starts,team_a_score,team_h_score,threat,total_points,was_home,yellow_cards
13,Cole Palmer,MID,Chelsea,2.4,0,0,18,0,37.8,182,...,0,0,3210033,1,2,0,10.0,2,True,0
629,Cole Palmer,MID,Chelsea,9.5,3,2,61,0,43.8,182,...,0,0,3258422,1,6,2,21.0,17,False,1
1256,Cole Palmer,MID,Chelsea,8.3,1,2,29,0,62.2,182,...,0,0,4154208,1,1,1,14.0,6,True,1
1904,Cole Palmer,MID,Chelsea,7.0,0,0,15,1,13.3,182,...,0,0,4019324,1,1,0,0.0,3,False,0
2563,Cole Palmer,MID,Chelsea,8.5,0,0,27,1,16.4,182,...,0,0,4094666,1,3,0,21.0,8,False,0


### Plotting

In [17]:
mid_fwd_df = df[df["position"].isin(["MID", "FWD"])]

att_returns_player_grouped_df = mid_fwd_df.groupby("name")[[ "total_points", "xP", "goals_scored", "assists", 'expected_assists', 'expected_goals',
       'expected_goal_involvements', "minutes"]].sum()

att_returns_player_grouped_df.reset_index(inplace= True)
att_returns_player_grouped_df = att_returns_player_grouped_df.merge(df_og[df_og["GW"] == df_og["GW"].max()][["name", "value"]], how= "left", on= "name")

mins_calc = np.where(att_returns_player_grouped_df["minutes"] > 0, 90 / att_returns_player_grouped_df["minutes"], 0)

att_returns_player_grouped_df["goal_involvements"] = att_returns_player_grouped_df["goals_scored"] + att_returns_player_grouped_df["assists"]
att_returns_player_grouped_df["efficiency"] = att_returns_player_grouped_df["goal_involvements"] / att_returns_player_grouped_df["expected_goal_involvements"]
att_returns_player_grouped_df["egi_per_val"] = att_returns_player_grouped_df["expected_goal_involvements"] / att_returns_player_grouped_df["value"]
att_returns_player_grouped_df["egi_per_val"] = att_returns_player_grouped_df["expected_goal_involvements"] / att_returns_player_grouped_df["value"]
att_returns_player_grouped_df["xP_per_val"] = att_returns_player_grouped_df["xP"] / att_returns_player_grouped_df["value"]
att_returns_player_grouped_df["P_per_val"] = att_returns_player_grouped_df["total_points"] / att_returns_player_grouped_df["value"]

att_returns_player_grouped_df["egi_per_90"] = att_returns_player_grouped_df["expected_goal_involvements"] * mins_calc
att_returns_player_grouped_df["xP_per_90"] = att_returns_player_grouped_df["xP"] * mins_calc
att_returns_player_grouped_df["P_per_90"] = att_returns_player_grouped_df["total_points"] * mins_calc


att_returns_player_grouped_df[att_returns_player_grouped_df["name"].str.contains("Dibling")].head()


Unnamed: 0,name,total_points,xP,goals_scored,assists,expected_assists,expected_goals,expected_goal_involvements,minutes,value,goal_involvements,efficiency,egi_per_val,xP_per_val,P_per_val,egi_per_90,xP_per_90,P_per_90
360,Tyler Dibling,20,12.2,1,0,0.61,0.92,1.53,521,4.6,1,0.653595,0.332609,2.652174,4.347826,0.264299,2.107486,3.454894


In [18]:
att_returns_player_grouped_df[att_returns_player_grouped_df["name"].isin(["Mavididi"])]

Unnamed: 0,name,total_points,xP,goals_scored,assists,expected_assists,expected_goals,expected_goal_involvements,minutes,value,goal_involvements,efficiency,egi_per_val,xP_per_val,P_per_val,egi_per_90,xP_per_90,P_per_90


In [19]:
### INPUTS:

AM_PRICE_MAX = 11
AM_ATTR_LIST = ["value", "total_points", "egi_per_val", "xP_per_val", "P_per_val", "egi_per_90", "xP_per_90", "P_per_90", "efficiency"]
AM_IDX = 0

# Plotting Code

plot_df = att_returns_player_grouped_df[(att_returns_player_grouped_df["value"] <= AM_PRICE_MAX) & (att_returns_player_grouped_df["minutes"] > 90)]
MASK = plot_df[AM_ATTR_LIST[AM_IDX]] > 0

color = px.colors.sequential.Viridis
if AM_ATTR_LIST[AM_IDX] == "value":
    color = px.colors.sequential.Viridis_r

am_fig = px.scatter(plot_df[MASK], x= "expected_goals", y= "expected_assists",
           hover_name= "name", color= AM_ATTR_LIST[AM_IDX], color_continuous_scale= color, title= "Attacking Players Plot", width= 1200, height= 600)

am_fig.show()

In [20]:
def_df = df[df["position"] == "DEF"]

def_returns_player_grouped_df = def_df.groupby("name")[["expected_goal_involvements", "expected_goals_conceded", "goals_conceded",
                                                        "xP", "total_points"]].sum()

def_returns_player_grouped_df.reset_index(inplace= True)
def_returns_player_grouped_df = def_returns_player_grouped_df.merge(df_og[df_og["GW"] == df_og["GW"].max()][["name", "value"]], how= "left", on= "name")

def_returns_player_grouped_df["efficiency"] = def_returns_player_grouped_df["expected_goals_conceded"] / def_returns_player_grouped_df["goals_conceded"]
def_returns_player_grouped_df["egi_per_val"] = def_returns_player_grouped_df["expected_goal_involvements"] / def_returns_player_grouped_df["value"]
def_returns_player_grouped_df["xP_per_val"] = def_returns_player_grouped_df["xP"] / def_returns_player_grouped_df["value"]
def_returns_player_grouped_df["P_per_val"] = def_returns_player_grouped_df["total_points"] / def_returns_player_grouped_df["value"]



In [21]:
### INPUTS:

DEF_PRICE_MAX = 20
DEF_ATTR_LIST = ["value", "egi_per_val", "xP_per_val", "P_per_val", "efficiency", "expected_goal_involvements"]
DEF_IDX = -3

# Plotting Code
plot_df = def_returns_player_grouped_df[def_returns_player_grouped_df["value"] <= PRICE_MAX]
MASK = plot_df[DEF_ATTR_LIST[DEF_IDX]] > 4

color = px.colors.sequential.Viridis
if AM_ATTR_LIST[AM_IDX] == "value":
    color = px.colors.sequential.Viridis_r
    
def_fig = px.scatter(plot_df[MASK], x= "expected_goals_conceded", y= "goals_conceded",
           hover_name= "name", color= DEF_ATTR_LIST[DEF_IDX], color_continuous_scale= color, title= "Defensive Players Plot", width= 1200, height= 600)

def_fig.show()