In [2]:
#import modules
import pandas as pd
import numpy as np
import requests
from sklearn.linear_model import Lasso
pd.options.mode.chained_assignment = None  # default='warn'


<h5>1. Create weekly player stats dataframe


In [3]:
def player_data(df):
    #import data from FPL API endpoint
    data = requests.get('https://fantasy.premierleague.com/api/bootstrap-static/').json()

    
    #extract relevant columns and player IDs
    players_df = pd.DataFrame(data['elements']).dropna(axis=1)
    players_df = players_df[players_df.minutes > 0]
    players_attr_df = players_df[['id','web_name','element_type','team',]]
    player_ids = players_df['id']

    if df == 'totals':
        return players_df
    elif df == 'weekly':
        #create empty dataframe to store weekly player data
        weekly_df = pd.DataFrame()
        #loop through all player IDs, download their weekly data, and append the data ro the weekly_df
        for pid in player_ids:
            weekly_data = requests.get(f'https://fantasy.premierleague.com/api/element-summary/{pid}/').json()
            weekly_player_df = pd.DataFrame(weekly_data['history'])
            weekly_df = pd.concat([weekly_df,weekly_player_df],ignore_index=True)

        #add player details from players_df and reorganize columns
        weekly_df = weekly_df.merge(players_attr_df, how='left', left_on='element',right_on='id')
        weekly_df = weekly_df.reindex(columns=['element', 'web_name', 'element_type', 'team','fixture', 'opponent_team', 'total_points', 'was_home',
            'kickoff_time', 'team_h_score', 'team_a_score', 'round', 'minutes',
            'goals_scored', 'assists', 'clean_sheets', 'goals_conceded',
            'own_goals', 'penalties_saved', 'penalties_missed', 'yellow_cards',
            'red_cards', 'saves', 'bonus', 'bps', 'influence', 'creativity',
            'threat', 'ict_index', 'starts', 'expected_goals', 'expected_assists',
            'expected_goal_involvements', 'expected_goals_conceded', 'value',
            'transfers_balance', 'selected', 'transfers_in', 'transfers_out'])
        return weekly_df

In [4]:
weekly_df = player_data('weekly')

In [5]:
#create weekly dataframes for each position type
gkp_df = weekly_df[weekly_df.element_type == 1] 
def_df = weekly_df[weekly_df.element_type == 2] 
mid_df = weekly_df[weekly_df.element_type == 3] 
fwd_df = weekly_df[weekly_df.element_type == 4]

position_dfs = {1: {'df': gkp_df, 'position':'gkp','element_type': 1}, 2: {'df': def_df, 'position':'def','element_type': 2}, 3: { 'df': mid_df,'position':'mid','element_type': 3}, 4: {'df': fwd_df,'position':'fwd','element_type': 4}}

<h5>2. Run lasso regression for each position and store feature coefficients in dataframe

In [6]:
#Run lasso regression for each position and store feature coefficients in dataframe

feat_cols = ['minutes','influence', 'creativity','threat','expected_goals','expected_assists','expected_goal_involvements', 'expected_goals_conceded', 'value']
lasso = Lasso()
all_coefs = pd.DataFrame()

for i in position_dfs:
    X = position_dfs[i]['df'][feat_cols].values
    y = position_dfs[i]['df']['total_points'].values
    position = position_dfs[i]['position']
    element_type = position_dfs[i]['element_type']
    lasso_coef = lasso.fit(X,y).coef_
    this_df = pd.DataFrame({'feature':feat_cols,'lasso_coef': lasso_coef,'element_type': element_type, 'position':position})
    all_coefs = pd.concat([all_coefs,this_df], ignore_index=True).sort_values(by='element_type')

In [9]:
all_coefs.feature.unique()

array(['minutes', 'expected_goals_conceded', 'expected_goal_involvements',
       'expected_assists', 'value', 'threat', 'creativity', 'influence',
       'expected_goals'], dtype=object)

In [215]:
totals_df = player_data('totals')
totals_cols = ['id','web_name','element_type' ,'minutes','influence', 'creativity','threat','expected_goals','expected_assists','expected_goal_involvements', 'expected_goals_conceded', 'value_form']
totals_df = totals_df[totals_cols].rename(columns={'value_form':'value'})


In [209]:
totals_df.head()

Unnamed: 0,id,web_name,element_type,minutes,influence,creativity,threat,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,value,quality_score
0,3,Xhaka,3,2293,504.6,557.0,398.0,2.73,3.2,5.93,24.57,0.8,0
1,4,Elneny,3,111,4.6,5.4,0.0,0.0,0.04,0.04,1.29,0.0,0
2,5,Holding,2,107,23.8,1.4,7.0,0.03,0.01,0.04,0.88,0.1,0
3,6,Partey,3,1861,438.0,357.8,216.0,2.37,1.66,4.04,20.94,0.6,0
4,7,Ødegaard,3,2311,704.0,919.4,731.0,7.83,6.42,14.29,26.05,0.6,0


In [10]:
quality_scores = []

for i in totals_df.index:
    weighted_values = []
    element_type = totals_df.loc[i]['element_type']
    for c in feat_cols:
        coef = float(all_coefs[(all_coefs['feature'] == c) & (all_coefs['element_type'] == element_type)]['lasso_coef'].values)
        value = float(totals_df.loc[i][c])
        weighted_value = coef * value
        weighted_values.append(weighted_value)
    qs = sum(weighted_values)
    quality_scores.append({'id': totals_df.loc[i]['id'], 'name': totals_df.loc[i]['web_name'], 'element_type':element_type, 'total_points': totals_df.loc[i]['total_points'], 'quality_score' : qs})

NameError: name 'totals_df' is not defined

In [19]:
max(weekly_df['round'])+1

29