# Fantasy premier league optimizaion

import packages used in the team selection process

In [3]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import pulp

The data is extracted from fantasyfootballscout, where each player is given a projected point for the first 6 gameweeks.

In [5]:
with open("data.html", 'r') as f:
    soup = BeautifulSoup(f.read())

stats = []
keys = ['player', 'team', 'position', 'price', 'gw1', 'gw2', 'gw3', 'gw4', 'gw5', 'gw6', 'gw1_6']
for tr in soup.select("tr"):
    
    values = [_.text for _ in tr.select("td")]
    
    if len(values) == 0:
        continue
        
    data = {}
    
    for key, value in zip(keys, values):
        data[key] = value
        
    stats.append(data)
    
df_stats = pd.DataFrame(stats)

for key in keys[3:]:
    df_stats[key] = df_stats[key].astype(float)
    
df_stats['team'] = df_stats['team'].str.strip()

# Generate both subtotal points for gw1-gw3 and gw1-gw6
df_stats['gw1_3'] = df_stats[['gw1', 'gw2', 'gw3']].sum(axis=1)
df_stats['gw1_6'] = df_stats[['gw1', 'gw2', 'gw3', 'gw4', 'gw5', 'gw6']].sum(axis=1)

del stats, keys, data

In [6]:
df_stats.head(5)

Unnamed: 0,player,team,position,price,gw1,gw2,gw3,gw4,gw5,gw6,gw1_6,gw1_3
0,Salah,LIV,MID,12.5,7.5,7.3,5.4,6.3,7.7,6.9,41.1,20.2
1,Fernandes,MUN,MID,12.0,6.1,5.7,5.4,6.8,5.5,6.2,35.7,17.2
2,Mane,LIV,MID,12.0,6.3,6.1,4.6,5.3,6.4,5.8,34.5,17.0
3,De Bruyne,MCI,MID,12.0,4.9,7.0,5.4,5.1,6.0,4.3,32.7,17.3
4,Kane,TOT,FWD,12.5,4.4,5.1,6.6,5.9,4.6,4.9,31.5,16.1


In [7]:
def optimize(
    of, budget=100.0, defenders=5, midfielders=5, forwards=3, goalkeepers=2,
    team_restrictions=[], position_restrictions=[], player_selection=[], player_banned=[]
) -> pd.DataFrame:
    """
    of: Gameweek to optimize for
    position_restrictions: [('team', [max_gk_count], [max_def_count], [max_mid_count], [max_fwd_count])]
    team_restrictions: [('team', [max_player_count])]
    player_selection: [('team', 'player_name')]    
    """
    s = df_stats
    # Total team count should be the sum of field counds
    player_count = sum([defenders, midfielders, forwards, goalkeepers])
    
    dtr = 2
    tr = {}
    for team, players in team_restrictions:
        tr[team] = players
        
    dpr = 1
    pr = {}
    for team, g, d, m, f in position_restrictions:
        pr[team] = {
            "gk": g, "def": d, "mid": m, "fwd": f
        }
        
    model = pulp.LpProblem("FPL_team_optimization", pulp.LpMaximize)
    
    player_name = s.player.to_list()
    player_index = s.index.to_list()
    player_prices = s.price.to_list()
    player_expected_points = s[of].to_list()
    player_position = s.position.to_list()
    player_team = s.team.to_list()
    teams = s.team.unique()
    
    players = pulp.LpVariable.dicts("FLP_players",
                                     (i for i in player_index),
                                     lowBound=0,
                                     upBound=1,
                                     cat='Integer')
    
    # Objective function, maximize expected points
    model += (
        pulp.lpSum([player_expected_points[i]*players[i] for i in player_index])
    )

    # Set inequality constraints on both the budget and the player count
    model += pulp.lpSum([player_prices[i]*players[i] for i in player_index]) <= budget
    model += pulp.lpSum([players[i] for i in player_index]) == player_count
    
    # Set inequality constraints on team combinations, following the (2,5,5,3) lineup.
    model += pulp.lpSum([int(player_position[i]=='GK' )*players[i] for i in player_index]) <= goalkeepers
    model += pulp.lpSum([int(player_position[i]=='DEF')*players[i] for i in player_index]) <= defenders
    model += pulp.lpSum([int(player_position[i]=='MID')*players[i] for i in player_index]) <= midfielders
    model += pulp.lpSum([int(player_position[i]=='FWD')*players[i] for i in player_index]) <= forwards
    
    
    # Team restrictions
    for team in teams:
        
        team_poition_restrictions = pr.get(team, {})
        
        # Get the indexed for all the players that are in this team
        team_index = [i for i, t in zip(player_index, player_team) if t == team]
               
        model += pulp.lpSum([players[i] for i in team_index]) <= tr.get(team, dtr)
        
        model += pulp.lpSum([int(player_position[i]=='GK' )*players[i] for i in team_index]) <= team_poition_restrictions.get('gk', dpr)
        model += pulp.lpSum([int(player_position[i]=='DEF')*players[i] for i in team_index]) <= team_poition_restrictions.get('def', dpr)
        model += pulp.lpSum([int(player_position[i]=='MID')*players[i] for i in team_index]) <= team_poition_restrictions.get('mid', dpr)
        model += pulp.lpSum([int(player_position[i]=='FWD')*players[i] for i in team_index]) <= team_poition_restrictions.get('fwd', dpr)

    # Player restrictions
    for i, team, player in zip(player_index, player_team, player_name):
        if (team, player) in player_selection:
            model += players[i] == 1
        elif (team, player) in player_banned:
            model += players[i] == 0
      
    # Solve the problem and write the formulas to 
    model.writeLP("FPL.lp")
    model.solve()
        
    team = s[np.array([players[_].varValue for _ in player_index]) == 1]

    print(pulp.LpStatus[model.status])
    #print("team total cost:", team.price.sum())
    #print("expected points, sub captain:", [team[_].sort_values(ascending=False).head(11).sum().round() for _ in ['gw1', 'gw2', 'gw3', 'gw4', 'gw5', 'gw6']])
    return team

## Optimize for the first 3 gameweeks

In [8]:
team = optimize('gw1_3')
team

Optimal


Unnamed: 0,player,team,position,price,gw1,gw2,gw3,gw4,gw5,gw6,gw1_6,gw1_3
0,Salah,LIV,MID,12.5,7.5,7.3,5.4,6.3,7.7,6.9,41.1,20.2
5,Alexander-Arnold,LIV,DEF,7.5,5.4,5.7,3.7,4.3,5.8,5.1,30.0,14.8
14,Antonio,WHM,FWD,7.5,4.4,4.2,4.8,4.1,3.9,4.1,25.5,13.4
15,Gundogan,MCI,MID,7.5,3.8,5.3,4.2,3.9,4.6,3.3,25.1,13.3
18,Watkins,AVL,FWD,7.5,4.5,4.7,4.7,3.3,4.2,3.5,24.9,13.9
21,Rodriguez,EVE,MID,7.0,4.2,3.7,3.7,4.4,3.7,4.8,24.5,11.6
25,Martinez,AVL,GK,5.5,4.2,4.4,4.3,3.3,4.0,3.4,23.6,12.9
29,Raphinha,LEE,MID,6.5,3.3,4.0,4.0,3.5,4.2,4.1,23.1,11.3
33,Rodrigo,LEE,FWD,6.5,3.3,4.0,4.0,3.5,4.1,4.1,23.0,11.3
35,Sanchez,BRI,GK,4.5,3.9,4.1,3.7,3.8,3.5,3.8,22.8,11.7


## Optimize for the first 3 gameweeks with restrictions

We want to limit the number of Manchester City players to one, because of the pep roulette.  
Allow Liverpool to have up to two midfielders.  
Pre-select Salah, Ronaldo and Alexander Arnold.  


In [9]:
team = optimize(
    'gw1_3', budget=100.0, 
    team_restrictions=[('MCI', 1)], 
    position_restrictions=[('LIV', 1, 2, 1, 1)],
    player_selection=[('LIV', 'Salah'), ('MUN', 'Ronaldo'),('LIV', 'Alexander-Arnold') ]
)

Optimal


In [10]:
team.sort_values('position')

Unnamed: 0,player,team,position,price,gw1,gw2,gw3,gw4,gw5,gw6,gw1_6,gw1_3
5,Alexander-Arnold,LIV,DEF,7.5,5.4,5.7,3.7,4.3,5.8,5.1,30.0,14.8
40,Stones,MCI,DEF,5.5,3.3,4.7,3.8,3.4,4.3,2.8,22.3,11.8
55,James,CHE,DEF,5.5,4.9,3.6,2.7,4.3,3.4,2.8,21.7,11.2
85,Coufal,WHM,DEF,5.0,3.6,3.1,4.1,3.2,2.7,2.8,19.5,10.8
90,Veltman,BRI,DEF,4.5,3.3,3.7,2.9,3.1,2.6,3.2,18.8,9.9
14,Antonio,WHM,FWD,7.5,4.4,4.2,4.8,4.1,3.9,4.1,25.5,13.4
18,Watkins,AVL,FWD,7.5,4.5,4.7,4.7,3.3,4.2,3.5,24.9,13.9
33,Rodrigo,LEE,FWD,6.5,3.3,4.0,4.0,3.5,4.1,4.1,23.0,11.3
25,Martinez,AVL,GK,5.5,4.2,4.4,4.3,3.3,4.0,3.4,23.6,12.9
35,Sanchez,BRI,GK,4.5,3.9,4.1,3.7,3.8,3.5,3.8,22.8,11.7
