In [1]:
import json
import pandas as pd
import numpy as np
import pulp
import sys

sys.path.append('..')

In [2]:
from helper import get_database_session, init_mysql_engine
from models import Player

In [4]:
with open('../config.json', 'r') as f:
  CONFIG = json.load(f)

engine = init_mysql_engine(CONFIG['MYSQL_USER'],
                           CONFIG['MYSQL_PWD'],
                           CONFIG['MYSQL_HOST'],
                           CONFIG['MYSQL_PORT'],
                           CONFIG['MYSQL_DB'])

In [5]:
df = pd.read_sql_table('players', con=engine)

In [6]:
COLS_POS = ['ls', 'st', 'rs', 
            'lw', 'lf', 'cf', 'rf', 'rw', 
            'lam', 'cam', 'ram', 
            'lm', 'lcm', 'cm', 'rcm', 'rm', 
            'lwb', 'ldm', 'cdm', 'rdm', 'rwb',
            'lb', 'lcb', 'cb', 'rcb', 'rb',
            'gk']

FORMATION_4_4_2 = [
    'gk',
    'rb', 'rcb', 'lcb', 'lb',
    'rm', 'rcm', 'lcm', 'lm',
    'rs', 'ls',
]

FORMATION_5_3_2 = [
    'gk',
    'rcb', 'cb', 'lcb',
    'rwb', 'lwb',
    'rcm', 'lcm',
    'cam',
    'rs', 'ls',
]

In [7]:
FORMATION = FORMATION_5_3_2
BUDGET = 200 * 10**6

TEMP = df[['id', 'position', 'value', 'overall']] \
        .dropna(subset=['position']) \
        .query(f'position in {FORMATION} and value > 0')

In [8]:
def compute_best_lineup(df, formation, budget):

    # problem definition
    prob = pulp.LpProblem('BestLineup', pulp.LpMaximize)

    # get unique identifiers
    ids = df['id'].tolist()
    
    # parameters
    overalls = pd.Series(df['overall'].values, index=ids).to_dict()
    values = pd.Series(df['value'].values, index=ids).to_dict()

    ## dynamic paramters: selected positions
    ### convert position-strings into binary variables
    for pos in formation:
        df[f'is_{pos}'] = np.where(df['position'] == pos, 1, 0)
    
    ### extract positional parameters
    positions = {}
    for pos in formation:
        positions[pos] = pd.Series(df[f'is_{pos}'].values, index=ids).to_dict()

    # define the decision variable
    players = pulp.LpVariable.dicts("Player", ids, cat='Binary')

    # set objective
    prob += pulp.lpSum([overalls[i] * players[i] for i in ids]), "Total Rating of Lineup"

    # set constraints
    prob += pulp.lpSum([players[i] for i in ids]) == 11, "Pick_11_Players"
    prob += pulp.lpSum([values[i] * players[i] for i in ids]) <= budget, "Total_Value_Under_Budget"
    ## check if required position is picked
    for pos in formation:
        prob += pulp.lpSum([positions[pos][i] * players[i] for i in ids]) == 1, f"Pick_{pos.upper()}"

    result = prob.solve()

    picked_player_ids = [int(i.name.split('_')[1]) for i in prob.variables() if i.varValue > 0]
    
    return prob, picked_player_ids

In [9]:
prob, ids = compute_best_lineup(TEMP, FORMATION, BUDGET)

In [10]:
df[['id', 'position', 'name', 'overall', 'nationality', 'age', 'club']].query(f"id in {ids}")


Unnamed: 0,id,position,name,overall,nationality,age,club
6,1179,gk,G. Buffon,88,Italy,40,Paris Saint-Germain
61,41236,rs,Z. Ibrahimović,85,Sweden,36,LA Galaxy
63,45186,ls,Joaquín,81,Spain,36,Real Betis
325,138956,lcb,G. Chiellini,89,Italy,33,Juventus
917,164240,rcb,Thiago Silva,88,Brazil,33,Paris Saint-Germain
1275,171919,cb,Naldo,85,Brazil,35,FC Schalke 04
1613,177413,lcm,A. Witsel,84,Belgium,29,Borussia Dortmund
1829,179846,rcm,S. Khedira,85,Germany,31,Juventus
1853,180216,rwb,S. Coleman,80,Republic of Ireland,29,Everton
1967,181872,cam,A. Vidal,85,Chile,31,FC Barcelona


In [15]:
session = get_database_session(engine)

rows = session.query(Player).filter(Player.id.in_(ids))

In [16]:
records = { r.position: dict(id=r.id, name=r.name, position=r.position, nationality=r.nationality,
                                             flag=r.flag, club=r.club, age=r.age, photo=r.photo,
                                              value=r.value, overall=r.overall )
                            for r in rows}

In [22]:
sum([r.value for r in rows])
sum([r.overall for r in rows])

930

In [None]:
for pos in FORMATION:
    TEMP[f'is_{pos}'] = np.where(TEMP['position'] == pos, 1, 0)

In [None]:
prob = pulp.LpProblem('BestLineup', pulp.LpMaximize)

ids = TEMP['id'].tolist()

overalls = pd.Series(TEMP['overall'].values, index=ids).to_dict()
costs = pd.Series(TEMP['value'].values, index=ids).to_dict()

positions = {}

for pos in FORMATION:
    positions[pos] = pd.Series(TEMP[f'is_{pos}'].values, index=ids).to_dict()
    
player_vars = pulp.LpVariable.dicts("Player", ids, cat='Binary')

prob += pulp.lpSum([overalls[i] * player_vars[i] for i in ids]), "Total Rating of Lineup"

In [None]:
prob += pulp.lpSum([player_vars[f] for f in ids]) == 11, "11_Players"

In [None]:
prob += pulp.lpSum([costs[f] * player_vars[f] for f in ids]) <= BUDGET, "MaxBudget"

In [None]:
for pos in FORMATION:
    prob += pulp.lpSum([positions[pos][f] * player_vars[f] for f in ids]) == 1, f"{pos.upper()}"

In [None]:
prob.solve()

In [None]:
IDs = [int(i.name.split('_')[1]) for i in prob.variables() if i.varValue > 0]
display(
    f"{TEMP[TEMP.id.isin(IDs)].value.sum():,}",
    TEMP[TEMP.id.isin(IDs)].overall.sum(),
)
df[['id', 'position', 'name', 'overall', 'nationality', 'age', 'club']].query(f"id in {IDs}")

In [None]:
# mkr = munkres.Munkres()

# MATRIX = TEMP[FORMATION_4_4_2].T.values
# INDICES = mkr.compute(munkres.make_cost_matrix(MATRIX, lambda x: 100-x))

# VALUES = [ MATRIX[row][col] for row, col in INDICES ]
# RESULT = TEMP.loc[[ col for row, col in INDICES], ['id', 'name', 'value']]
# RESULT['position'] = FORMATION_4_4_2
# RESULT['rating'] = VALUES

# df[['id', 'name', 'value', *FORMATION_4_4_2]] \
#     .merge(RESULT[['id', 'name', 'position', 'rating']], on=['id', 'name'])