In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pulp import LpProblem, LpMaximize, LpVariable, lpSum, LpStatus, PULP_CBC_CMD

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
df = pd.read_csv("fifa21_raw_data.csv",low_memory=False)
df.head()

Unnamed: 0,photoUrl,LongName,playerUrl,Nationality,Positions,Name,Age,↓OVA,POT,Team & Contract,ID,Height,Weight,foot,BOV,BP,Growth,Joined,Loan Date End,Value,Wage,Release Clause,Attacking,Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Skill,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Movement,Acceleration,Sprint Speed,Agility,Reactions,Balance,Power,Shot Power,Jumping,Stamina,Strength,Long Shots,Mentality,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Defending,Marking,Standing Tackle,Sliding Tackle,Goalkeeping,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Total Stats,Base Stats,W/F,SM,A/W,D/W,IR,PAC,SHO,PAS,DRI,DEF,PHY,Hits
0,https://cdn.sofifa.com/players/158/023/21_60.png,Lionel Messi,http://sofifa.com/player/158023/lionel-messi/2...,Argentina,RW ST CF,L. Messi,33,93,93,\n\n\n\nFC Barcelona\n2004 ~ 2021\n\n,158023,"5'7""",159lbs,Left,93,RW,0,"Jul 1, 2004",,€67.5M,€560K,€138.4M,429,85,95,70,91,88,470,96,93,94,91,96,451,91,80,91,94,95,389,86,68,72,69,94,347,44,40,93,95,75,96,91,32,35,24,54,6,11,15,14,8,2231,466,4 ★,4★,Medium,Low,5 ★,85,92,91,95,38,65,\n372
1,https://cdn.sofifa.com/players/020/801/21_60.png,C. Ronaldo dos Santos Aveiro,http://sofifa.com/player/20801/c-ronaldo-dos-s...,Portugal,ST LW,Cristiano Ronaldo,35,92,92,\n\n\n\nJuventus\n2018 ~ 2022\n\n,20801,"6'2""",183lbs,Right,92,ST,0,"Jul 10, 2018",,€46M,€220K,€75.9M,437,84,95,90,82,86,414,88,81,76,77,92,431,87,91,87,95,71,444,94,95,84,78,93,353,63,29,95,82,84,95,84,28,32,24,58,7,11,15,14,11,2221,464,4 ★,5★,High,Low,5 ★,89,93,81,89,35,77,\n344
2,https://cdn.sofifa.com/players/200/389/21_60.png,Jan Oblak,http://sofifa.com/player/200389/jan-oblak/210005/,Slovenia,GK,J. Oblak,27,91,93,\n\n\n\nAtlético Madrid\n2014 ~ 2023\n\n,200389,"6'2""",192lbs,Right,91,GK,2,"Jul 16, 2014",,€75M,€125K,€159.4M,95,13,11,15,43,13,109,12,13,14,40,30,307,43,60,67,88,49,268,59,78,41,78,12,140,34,19,11,65,11,68,57,27,12,18,437,87,92,78,90,90,1413,489,3 ★,1★,Medium,Medium,3 ★,87,92,78,90,52,90,\n86
3,https://cdn.sofifa.com/players/192/985/21_60.png,Kevin De Bruyne,http://sofifa.com/player/192985/kevin-de-bruyn...,Belgium,CAM CM,K. De Bruyne,29,91,91,\n\n\n\nManchester City\n2015 ~ 2023\n\n,192985,"5'11""",154lbs,Right,91,CAM,0,"Aug 30, 2015",,€87M,€370K,€161M,407,94,82,55,94,82,441,88,85,83,93,92,398,77,76,78,91,76,408,91,63,89,74,91,408,76,66,88,94,84,91,186,68,65,53,56,15,13,5,10,13,2304,485,5 ★,4★,High,High,4 ★,76,86,93,88,64,78,\n163
4,https://cdn.sofifa.com/players/190/871/21_60.png,Neymar da Silva Santos Jr.,http://sofifa.com/player/190871/neymar-da-silv...,Brazil,LW CAM,Neymar Jr,28,91,91,\n\n\n\nParis Saint-Germain\n2017 ~ 2022\n\n,190871,"5'9""",150lbs,Right,91,LW,0,"Aug 3, 2017",,€90M,€270K,€166.5M,408,85,87,62,87,87,448,95,88,89,81,95,453,94,89,96,91,83,357,80,62,81,50,84,356,51,36,87,90,92,93,94,35,30,29,59,9,9,15,15,11,2175,451,5 ★,5★,High,Medium,5 ★,91,85,86,94,36,59,\n273


In [4]:
def clean_value_wage_clause(value):
    # Remove the '€' symbol
    value = value.replace('€', '')
    # Check if the value ends with 'M' and handle it
    if value.endswith('M'):
        value = value.replace('M', '')
        value = float(value) * 1e6  # Convert to numeric value, assuming 'M' means million
    elif value.endswith('K'):
        value = value.replace('K', '')
        value = float(value) * 1e3  # Convert to numeric value, assuming 'K' means thousand
    else:
        value = float(value)  # Convert to float if there's no 'M' or 'K'
    return value

In [5]:
df['Positions'] = df['Positions'].str.split().str[0]

# Dictionary for replacement
replacement_dict = {
    'RM': 'CM',
    'LM': 'CM',
    'RWB': 'RB',
    'LWB': 'LB',
    'CAM':'CM',
    'CF':'ST'
}

# Replace values in the DataFrame
df['Positions'] = df['Positions'].replace(replacement_dict)

columns = ['Value','Wage','Release Clause']
# Apply the function to each column in the list
for column in columns:
    df[column] = df[column].apply(clean_value_wage_clause)
    df[column] = df[column].astype(int)

In [6]:
df.rename(columns={"↓OVA": "Overall Rating"},inplace=True)

In [9]:
LP_data = df[['Name','Positions','Value','Age','Overall Rating','Height','Weight','foot','PAC','SHO','PAS','DRI','DEF','PHY']]

In [7]:
from pulp import LpProblem, LpMaximize, LpVariable, lpSum, LpStatus, PULP_CBC_CMD

def get_optimized_team(df, max_budget, user_constraints):
    # Initialize variables and problem
    players_ids = df.index.tolist()
    player_stats = ['PAC', 'SHO', 'PAS', 'DRI', 'DEF', 'PHY']
    stats = {stat: dict(zip(players_ids, df[stat])) for stat in player_stats}
    values = dict(zip(players_ids, df['Value']))
    
    players_vars = LpVariable.dicts("Player", players_ids, cat='Binary')
    prob = LpProblem("OptimizedTeam", LpMaximize)
    
    # Budget constraint
    prob += lpSum([values[i] * players_vars[i] for i in players_ids]) <= max_budget
    
    # User constraints for positions and stats
    for pos, pos_constraints in user_constraints.items():
        player_in_pos = [i for i in players_ids if df.loc[i, 'Positions'] == pos]
        count = pos_constraints.get('count', 0)
        
        # Apply position count constraint
        prob += lpSum([players_vars[i] for i in player_in_pos]) == count
        
        # Apply overall rating constraints if they exist
        if 'min_overall' in pos_constraints and 'max_overall' in pos_constraints:
            min_overall = pos_constraints['min_overall']
            max_overall = pos_constraints['max_overall']
            for i in player_in_pos:
                prob += (players_vars[i] * df.loc[i, 'Overall Rating'] >= min_overall * players_vars[i])
                prob += (players_vars[i] * df.loc[i, 'Overall Rating'] <= max_overall * players_vars[i])
        
        # Apply individual stats constraints if they exist
        for stat in player_stats:
            if f'min_{stat}' in pos_constraints and f'max_{stat}' in pos_constraints:
                min_stat = pos_constraints[f'min_{stat}']
                max_stat = pos_constraints[f'max_{stat}']
                for i in player_in_pos:
                    prob += (players_vars[i] * df.loc[i, stat] >= min_stat * players_vars[i])
                    prob += (players_vars[i] * df.loc[i, stat] <= max_stat * players_vars[i])
    
    # Solve the problem using CBC solver
    prob.solve(PULP_CBC_CMD(msg=1))
    
    # Get the results
    selected_players = [int(player_id) for player_id in players_ids if players_vars[player_id].varValue == 1]
    selected_players_data = df.loc[selected_players]

    return LpStatus[prob.status], selected_players_data

In [13]:
# As requested, I will call the function with all the max and min constraints for demonstration.
# However, I'll create a sample `user_constraints` dictionary with hypothetical values 
# since I do not have the actual user input.

user_constraints = {
    'ST': {
        'count': 2,
        'min_overall': 80,
        'max_overall': 90,
        'min_PAC': 70,
        'max_PAC': 90,
        'min_SHO': 75,
        'max_SHO': 95,
        'min_PAS': 65,
        'max_PAS': 85,
        'min_DRI': 70,
        'max_DRI': 90,
        'min_DEF': 30,
        'max_DEF': 50,
        'min_PHY': 70,
        'max_PHY': 90,
    },
    'RB': {
        'count': 1,
        'min_overall': 70,
        'max_overall': 75,
        'min_PAC': 70,
        'max_PAC': 90,
        'min_SHO': 50,
        'max_SHO': 65,
        'min_PAS': 60,
        'max_PAS': 80,
        'min_DRI': 65,
        'max_DRI': 80,
        'min_DEF': 70,
        'max_DEF': 90,
        'min_PHY': 65,
        'max_PHY': 85,
    }
}

# Assuming 'df' is your DataFrame containing the player data and the columns match those used in 'user_constraints'
max_budget = 200000000  # 200 million

# Now call the function with the DataFrame, budget, and constraints
status, selected_players_data = get_optimized_team(LP_data, max_budget, user_constraints)

status, selected_players_data

# Please replace 'df' with your actual DataFrame variable.
# The code will not be executed here as it's a demonstration of how you would call the function.
# You should run this code in your local Python environment where your data and the pulp library are available.

('Optimal',
              Name Positions     Value  Age  Overall Rating Height  Weight  \
 329     E. Zahavi        ST  14000000   32              81   6'0"  157lbs   
 485   Diego Costa        ST  13500000   31              80   6'2"  185lbs   
 3898    Y. Valery        RB   3500000   21              71  5'11"  187lbs   
 
        foot  PAC  SHO  PAS  DRI  DEF  PHY  
 329   Right   79   82   75   83   39   73  
 485   Right   73   79   65   74   48   86  
 3898  Right   71   52   60   69   70   71  )

In [14]:
LP_data[LP_data['Name'] == 'E. Zahavi']

Unnamed: 0,Name,Positions,Value,Age,Overall Rating,Height,Weight,foot,PAC,SHO,PAS,DRI,DEF,PHY
329,E. Zahavi,ST,14000000,32,81,"6'0""",157lbs,Right,79,82,75,83,39,73
