In [19]:
import numpy as np
import cvxpy as cp
import pandas as pd
from footbot.data import utils

In [22]:
client = utils.set_up_bigquery(
    secrets_path='../secrets/service_account.json'
)

current_sql = \
'''
SELECT
  element,
  element_type,
  value,
  team,
  total_points,
  minutes,
  COALESCE(SAFE_DIVIDE(total_points, minutes), 0) AS points_per_minute,
  safe_web_name
FROM (
  SELECT
    element,
    element_type,
    now_cost AS value,
    team,
    total_points,
    minutes,
    safe_web_name,
    ROW_NUMBER() OVER(PARTITION BY element ORDER BY datetime DESC) AS is_most_recent
  FROM
    `footbot-001.fpl.element_data_1920` )
WHERE
  is_most_recent = 1
'''

current_df = client.query(current_sql).to_dataframe()
current_df.head()

Unnamed: 0,element,element_type,value,team,total_points,minutes,points_per_minute,safe_web_name
0,2,2,54,1,0,0,0.0,bellerin
1,3,2,54,1,2,109,0.018349,kolasinac
2,6,2,50,1,10,270,0.037037,monreal
3,9,2,45,1,0,0,0.0,jenkinson
4,10,2,45,1,0,0,0.0,holding


In [24]:
players = current_df.to_dict('records')

In [69]:
player_elements = np.array([i['element'] for i in players])
existing_team = [4, 5, 7]
[1 if i in existing_team else 0 for i in player_elements]

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [100]:
def select_team(
    players,
    total_budget=1000,
    optimise_key='predicted_total_points',
    captain_factor=2,
    bench_factor=0.1,
    existing_squad_elements=None,
    transfer_penalty=4,
    transfer_limit=15
):
    '''
    solve team selection from scratch
    players is an array of dicts
    '''

    # munge player data
    player_elements = np.array([i['element'] for i in players])
    player_points = np.array([i[optimise_key] for i in players])
    player_costs = np.array([[i['value'] for i in players]])
    player_position = np.array([i['element_type'] for i in players])
    player_club = np.array([i['team'] for i in players])
    
    if existing_squad_elements:
        existing_squad = \
        np.array([1 if i in existing_squad_elements else 0 for i in player_elements])
        
        if sum(existing_squad) != 15:
            raise Exception

    # weight matrix for player positions
    player_position_weights = np.zeros((4, len(players)))
    for i in range(0, 4):
        for j in range(0, len(players)):
            if player_position[j] == i+1:
                player_position_weights[i, j] = 1
            else:
                player_position_weights[i, j] = 0
    
    # weight matrix for player clubs
    player_club_weights = np.zeros((20, len(players)))
    for i in range(0, 20):
        for j in range(0, len(players)):
            if player_club[j] == i+1:
                player_club_weights[i, j] = 1
            else:
                player_club_weights[i, j] = 0

    # overall weight matrix
    player_weights = np.concatenate((
        player_costs,
        player_club_weights
    ), axis=0)
    
    # capacity vector
    squad_cost_capacity = [total_budget]
    squad_club_capacity = [3]*20
    squad_capacity = np.array(
        squad_cost_capacity
        + squad_club_capacity
    )
    
    # variables for objective function
    first_team = cp.Variable(len(players), boolean=True)
    captain = cp.Variable(len(players), boolean=True)
    bench = cp.Variable(len(players), boolean=True)
    
    # objective function (no existing squad)
    objective = \
    player_points@first_team + captain_factor*player_points@captain + bench_factor*player_points@bench
    
    # optimisation constraints (no existing squad)
    constraints = [
        # cost and club constraints
        player_weights@(first_team + bench) <= squad_capacity,
        # position constraints
        player_position_weights@(first_team + bench) == [2, 5, 5, 3],
        player_position_weights@first_team >= [1, 3, 3, 1],
        player_position_weights@first_team <= [1, 5, 5, 3],
        # player number contraints
        np.ones(len(players))@first_team == 11,
        np.ones(len(players))@captain == 1,
        np.ones(len(players))@bench == 4,
        # selected players not on both first team and bench
        first_team + bench <= np.ones(len(players)),
        # first team contains captain
        first_team - captain >= np.zeros(len(players))
        ]
    
    # update objective function and constraints if existing squad
    if existing_squad_elements:
        objective = \
        objective - transfer_penalty*(15 - existing_squad@(first_team + bench))
        
        constraints.append(
            15 - existing_squad@(first_team + bench) <= transfer_limit
        )
        
    # optimisation problem
    squad_prob = cp.Problem(
        cp.Maximize(objective),
        constraints
    )
    
    # solve optimisation problem
    squad_prob.solve(
        #solver='GLPK_MI'
    )
    
    # get first team elements
    first_team_selection = [int(round(j)) for j in first_team.value]
    first_team_selection_indices = [i for i, j in enumerate(first_team_selection) if j == 1]
    first_team_selection_elements = player_elements[first_team_selection_indices]
    # get captain element
    captain_selection = [int(round(j)) for j in captain.value]
    captain_selection_indices = [i for i, j in enumerate(captain_selection) if j == 1]
    captain_selection_elements = player_elements[captain_selection_indices]
    # get bench elements
    bench_selection = [int(round(j)) for j in bench.value]
    bench_selection_indices = [i for i, j in enumerate(bench_selection) if j == 1]
    bench_selection_elements = player_elements[bench_selection_indices]
    
    return first_team_selection_elements, captain_selection_elements, bench_selection_elements

In [101]:
first_team_elements, captain_elements, bench_elements = \
select_team(
    players,
    optimise_key='total_points'
)
print(first_team_elements)
print(captain_elements)
print(bench_elements)

[191 123 141 210 219 278 287 411 460 215 304]
[210]
[ 21 128 297 319]


In [102]:
existing_squad_elements = [
    191,
    212,
    214,
    141,
    181,
    205,
    278,
    287,
    48,
    130,
    183,
    215,
    234,
    271,
    126
]

In [107]:
first_team_elements, captain_elements, bench_elements = \
select_team(
    players,
    optimise_key='total_points',
    existing_squad_elements=existing_squad_elements,
    transfer_limit=4
)
print(first_team_elements)
print(captain_elements)
print(bench_elements)

[191 214 123 141 278 287 411 460 183 215 304]
[278]
[ 48 126 234 271]


In [108]:
current_df[current_df['element'].isin(first_team_elements)].sort_values('element_type')

Unnamed: 0,element,element_type,value,team,total_points,minutes,points_per_minute,safe_web_name
309,411,1,51,20,20,360,0.055556,patricio
212,123,2,56,7,24,360,0.066667,van aanholt
221,141,2,61,8,23,342,0.067251,digne
413,183,2,65,10,19,360,0.052778,van dijk
448,304,2,50,16,22,360,0.061111,vestergaard
66,191,3,125,10,36,348,0.103448,salah
78,214,3,123,11,37,360,0.102778,sterling
276,287,3,48,14,21,347,0.060519,cantwell
419,215,3,98,11,36,326,0.110429,de bruyne
273,278,4,70,14,37,352,0.105114,pukki


In [109]:
current_df[current_df['element'].isin(captain_elements)]

Unnamed: 0,element,element_type,value,team,total_points,minutes,points_per_minute,safe_web_name
273,278,4,70,14,37,352,0.105114,pukki


In [110]:
current_df[current_df['element'].isin(bench_elements)].sort_values('element_type')

Unnamed: 0,element,element_type,value,team,total_points,minutes,points_per_minute,safe_web_name
364,48,1,40,4,0,0,0.0,button
393,126,2,45,7,18,360,0.05,ward
437,271,3,45,13,13,360,0.036111,hayden
425,234,4,45,12,4,47,0.085106,greenwood
