### **Imports**

In [9]:
import pandas as pd
import numpy as np
!pip install pulp
import pulp
from sklearn import linear_model
pd.options.mode.chained_assignment = None



### **Predictions**
Calculate predictions with individual models trained for each position

In [10]:
data = pd.read_csv('C:/Users/Raunak/Desktop/ML Projects/FPL/Fantasy-Football-Team-Predictions/Data/Player_Data_1.csv', index_col = 0)
train_data = data[data.season != 2122]
test_data = data[data.season == 2122]

gk_train = train_data[train_data.position == 1]
gk_test = test_data[test_data.position == 1]

def_train = train_data[train_data.position == 2]
def_test = test_data[test_data.position == 2]

mid_train = train_data[train_data.position == 3]
mid_test = test_data[test_data.position == 3]

fwd_train = train_data[train_data.position == 4]
fwd_test = test_data[test_data.position == 4]

features = ['opp_diff','was_home','minutes_sum','bps_sum',
              'influence_sum','threat_sum','ict_sum','creat_sum',
              'yel_sum','red_sum','selected_by','tran_sum',
              'goals_sum','assists_sum','points_sum','value',
              'saves_sum','goals_con_sum','clean_sheets_sum']

gk_model = linear_model.LinearRegression(False,False,True,None)
gk_model.fit(gk_train[features],gk_train.points)

def_model = linear_model.LinearRegression(False,False,True,None)
def_model.fit(def_train[features],def_train.points)

mid_model = linear_model.LinearRegression(False,False,True,None)
mid_model.fit(mid_train[features],mid_train.points)

fwd_model = linear_model.LinearRegression(False,False,True,None)
fwd_model.fit(fwd_train[features],fwd_train.points)

gk_test['prediction'] = gk_model.predict(gk_test[features])
gk_test['prediction_error'] = abs(gk_test.prediction - gk_test.points)

def_test['prediction'] = def_model.predict(def_test[features])
def_test['prediction_error'] = abs(def_test.prediction - def_test.points)

mid_test['prediction'] = mid_model.predict(mid_test[features])
mid_test['prediction_error'] = abs(mid_test.prediction - mid_test.points)

fwd_test['prediction'] = fwd_model.predict(fwd_test[features])
fwd_test['prediction_error'] = abs(fwd_test.prediction - fwd_test.points)

all_predictions = pd.concat([gk_test, def_test, mid_test, fwd_test])

print('GK  Mean Error: ' + str(round(gk_test.prediction_error.mean(),3)))
print('DEF Mean Error: ' + str(round(def_test.prediction_error.mean(),3)))
print('MID Mean Error: ' + str(round(mid_test.prediction_error.mean(),3)))
print('FWD Mean Error: ' + str(round(fwd_test.prediction_error.mean(),3)) + '\n')
print('Total Mean Error: ' + str(round((all_predictions.prediction_error.mean()),3)))

GK  Mean Error: 2.339
DEF Mean Error: 2.376
MID Mean Error: 1.778
FWD Mean Error: 2.372

Total Mean Error: 2.105


### **Select Team Functions**
Linear optimisation used to calculate the best legal team for each gameweek

In [36]:
# Select a team for a given gameweek
def select(gw, data_in, print_output=False):
    sub_factor = 0.1
    data_in = data_in[data_in.GW == gw]
    first_team, captain, subs, cal_points = select_team(data_in, 100, sub_factor)

    real_points_total = 0
    predicted_points_total = 0
    total_cost = 0

    if(print_output):
        print('Starting team')

    for i in range(data_in.shape[0]):

        if captain[i].value() != 0:
            if(print_output):
                print(print_player(data_in.iloc[i]) + ' (Captain)')
            predicted_points_total += (data_in.iloc[i].prediction * 2)
            real_points_total += (data_in.iloc[i].points * 2)
            total_cost += data_in.iloc[i].value

        elif first_team[i].value() != 0:
            if(print_output):
                print(print_player(data_in.iloc[i]))

            predicted_points_total += data_in.iloc[i].prediction
            real_points_total += data_in.iloc[i].points
            total_cost += data_in.iloc[i].value

    if(print_output):
        print('\n' + 'Substitutes')

    for i in range(data_in.shape[0]):
        if subs[i].value() != 0:
            if(print_output):
                print(print_player(data_in.iloc[i]))
            total_cost += data_in.iloc[i].value

    error = abs(real_points_total - predicted_points_total)

    if(print_output):
        print('\n' + 'Predicted Points    ' + str(round(predicted_points_total,2)))
        print('Real Points         '        + str(real_points_total))
        print('Error               '        + str(abs(round(error,2))))
        print('Cost                '        + '£' + str(round(total_cost/10.0,2)) + 'M\n')  

    return predicted_points_total, real_points_total, total_cost/10.0, error

def print_player(player):
    return  ' [' + str(player.player_id) + '] ' + ' (Pred:' + str(round(player.prediction,1)) + ' | Real:' + str(round(player.points,2)) + ') ' +  player.first_name + ' ' + player.second_name

# Runs selection over a range of gameweeks
def select_range(start_gw, end_gw, data_in):
    total_error = 0
    points = 0
    real_points = 0
    for gw in range(start_gw, end_gw):
        predicted_score, real_score, _, error = select(gw, data_in, False)
        print('GW' + str(gw) + '---------------------------')
        print('Predicted Score : ' + str(round(predicted_score,2)) + ' Points')
        print('Real Score      : ' + str(real_score) + '.00 Points')
        total_error += abs(error)
        points += predicted_score
        real_points += real_score

    print('\nPredicted       ' + str(round(points,2)))
    print('Real            ' + str(round(real_points,2)))
    print('Total Error     ' + str(round(total_error,2)))
    print('Average Error   ' + str(round(total_error / (end_gw - start_gw),2)))

def select_team(player_data, budget, sub_factor):
    num_players = len(player_data)
    model = pulp.LpProblem("Constrained_value_maximisation", pulp.LpMaximize)

    # Array to store players selected for the starting team
    decisions = [ pulp.LpVariable("x{}".format(i), lowBound=0, upBound=1, cat='Integer') for i in range(num_players)]

    # Array to captain decision
    captain_decisions = [pulp.LpVariable("y{}".format(i), lowBound=0, upBound=1, cat='Integer')for i in range(num_players)]

    # Array to store sub decisions
    sub_decisions = [pulp.LpVariable("z{}".format(i), lowBound=0, upBound=1, cat='Integer') for i in range(num_players)]

    # objective function
    model += sum((captain_decisions[i] + decisions[i] + sub_decisions[i]*sub_factor) * player_data.iloc[i].prediction
                    for i in range(num_players)), "Objective"

    # cost constraint
    model += sum((decisions[i] + sub_decisions[i]) * (player_data.iloc[i].value / 10.0)
                  for i in range(num_players)) <= budget  # total cost

    # position constraints
    # 1 starting goalkeeper
    model += sum(decisions[i] for i in range(num_players) if player_data.iloc[i].position == 1) == 1
    # 2 total goalkeepers
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if player_data.iloc[i].position == 1) == 2
    
    # Select the starting defenders
    # Must be between 3 and 5 starting defenders
    model += sum(decisions[i] for i in range(num_players) if player_data.iloc[i].position == 2) >= 3

    model += sum(decisions[i] for i in range(num_players) if player_data.iloc[i].position == 2) <= 5

    # Select all defenders
    # Must be 5 defenders selected
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if player_data.iloc[i].position == 2) == 5

    # Select midfielders
    # Must be between 3 and 5 starting midfielders selected
    model += sum(decisions[i] for i in range(num_players) if player_data.iloc[i].position == 3) >= 3
    model += sum(decisions[i] for i in range(num_players) if player_data.iloc[i].position == 3) <= 5


    # 5 all midfielders
    # Must be 5 midfielders selected
    model += sum(decisions[i] + sub_decisions[i]
                   for i in range(num_players) if player_data.iloc[i].position == 3) == 5

    # Select forwards
    # Must be between 1 and 3 starting forwards
    model += sum(decisions[i] for i in range(num_players) if player_data.iloc[i].position == 4) >= 1
    model += sum(decisions[i] for i in range(num_players) if player_data.iloc[i].position == 4) <= 3

    # Must be 3 forwards selected
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if player_data.iloc[i].position == 4) == 3

    # Only 3 players can be selected from a single club
    team_codes = np.unique(player_data.team_code)
    for team_id in np.unique(team_codes):
        model += sum(decisions[i] + sub_decisions[i]
        for i in range(num_players) if player_data.iloc[i].team_code == team_id) <= 3  # max 3 players

    # 11 starting players must be selected
    model += sum(decisions) == 11

    # 1 of the starting players must be selected as captain
    model += sum(captain_decisions) == 1  # 1 captain
  
    # Check player selections are valid
    for i in range(num_players):  
        # Captain has to be present in starting team
        model += (decisions[i] - captain_decisions[i]) >= 0
        # Subs cannot be present in starting team
        model += (decisions[i] + sub_decisions[i]) <= 1 

    model.solve()

    return decisions, captain_decisions, sub_decisions, model.objective.value()

### **Generate Predictions**
gw = the gameweek to make predictions for

In [39]:
gw = 8
predicted_points, real_points, cost,_ = select(gw,all_predictions,True)

Starting team
 [30]  (Pred:4.7 | Real:1) Emiliano Martínez
 [56]  (Pred:4.0 | Real:5) Shane Duffy
 [38]  (Pred:4.0 | Real:0) Tyrone Mings
 [256]  (Pred:5.4 | Real:6) João Pedro Cavaco Cancelo
 [229]  (Pred:5.1 | Real:6) Virgil van Dijk
 [262]  (Pred:5.6 | Real:1) Rúben Santos Gato Alves Dias
 [251]  (Pred:6.7 | Real:11) Kevin De Bruyne
 [233]  (Pred:7.2 | Real:13) Mohamed Salah (Captain)
 [359]  (Pred:5.9 | Real:9) Heung-Min Son
 [307]  (Pred:4.6 | Real:2) Allan Saint-Maximin
 [63]  (Pred:4.3 | Real:2) Neal Maupay

Substitutes
 [242]  (Pred:3.1 | Real:6) Caoimhin Kelleher
 [309]  (Pred:2.8 | Real:0) Sean Longstaff
 [50]  (Pred:3.3 | Real:3) Douglas Luiz Soares de Paulo
 [577]  (Pred:1.1 | Real:1) Joe Gelhardt

Predicted Points    64.69
Real Points         69
Error               4.31
Cost                £100.0M



In [40]:
select_range(4,8, all_predictions)

GW4---------------------------
Predicted Score : 65.38 Points
Real Score      : 62.00 Points
GW5---------------------------
Predicted Score : 65.55 Points
Real Score      : 79.00 Points
GW6---------------------------
Predicted Score : 65.24 Points
Real Score      : 53.00 Points
GW7---------------------------
Predicted Score : 66.98 Points
Real Score      : 39.00 Points

Predicted       263.15
Real            233
Total Error     57.05
Average Error   14.26
