# Imports

In [1]:
import random
import pandas as pd
import numpy as np
from copy import deepcopy

# Load Data

In [2]:
df = pd.read_csv("data/players.csv")
df = df.drop(columns=['Unnamed: 0']) # drop the index column

# Problem Configuration

In [3]:
TEAM_SIZE = 7
NUM_TEAMS = 5
BUDGET_LIMIT = 750
TEAM_STRUCTURE = {"GK": 1, "DEF": 2, "MID": 2, "FWD": 2}
POPULATION_SIZE = 10

In [4]:
# create a dictionary to group players by position
players_by_position = {
    "GK": df[df['Position'] == "GK"].to_dict('records'),
    "DEF": df[df['Position'] == "DEF"].to_dict('records'),
    "MID": df[df['Position'] == "MID"].to_dict('records'),
    "FWD": df[df['Position'] == "FWD"].to_dict('records')
}

In [5]:
# prints just for visualization
# print("Total de jogadores:", len(df))
# print("Distribuição por posição:")
# print(df['Position'].value_counts())


In [6]:
# print players by each position
for position, players in players_by_position.items():
    print(f"\n--- {position} ({len(players)} players) ---")
    for player in players:
        print(f"Name: {player['Name']}, Skill: {player['Skill']}, Salary: €{player['Salary (€M)']}M")



--- GK (5 players) ---
Name: Alex Carter, Skill: 85, Salary: €90M
Name: Jordan Smith, Skill: 88, Salary: €100M
Name: Ryan Mitchell, Skill: 83, Salary: €85M
Name: Chris Thompson, Skill: 80, Salary: €80M
Name: Blake Henderson, Skill: 87, Salary: €95M

--- DEF (10 players) ---
Name: Daniel Foster, Skill: 90, Salary: €110M
Name: Lucas Bennett, Skill: 85, Salary: €90M
Name: Owen Parker, Skill: 88, Salary: €100M
Name: Ethan Howard, Skill: 80, Salary: €70M
Name: Mason Reed, Skill: 82, Salary: €75M
Name: Logan Brooks, Skill: 86, Salary: €95M
Name: Caleb Fisher, Skill: 84, Salary: €85M
Name: Maxwell Flores, Skill: 81, Salary: €72M
Name: Jaxon Griffin, Skill: 79, Salary: €65M
Name: Brayden Hughes, Skill: 87, Salary: €100M

--- MID (10 players) ---
Name: Nathan Wright, Skill: 92, Salary: €120M
Name: Connor Hayes, Skill: 89, Salary: €105M
Name: Dylan Morgan, Skill: 91, Salary: €115M
Name: Hunter Cooper, Skill: 83, Salary: €85M
Name: Austin Torres, Skill: 82, Salary: €80M
Name: Gavin Richardson, S

In [7]:
# create a valid team
def create_valid_team(available_players):
    team = []
    used_ids = set()
    for pos, count in TEAM_STRUCTURE.items():
        candidates = [p for p in available_players[pos] if id(p) not in used_ids]
        if len(candidates) < count:
            return None, set()  # não há jogadores suficientes
        selected = random.sample(candidates, count)
        for player in selected:
            used_ids.add(id(player))
            team.append(player)
    return team, used_ids

In [8]:
# print a valid team just for testing

# team, used_ids = create_valid_team(players_by_position)
# if team:
#     for player in team:
#         print(f"{player['Position']}: {player['Name']} | Skill: {player['Skill']} | Salary: €{player['Salary (€M)']}M")
# else:
#     print("Failed to create a valid team.")

In [9]:
# create a league = one individual = one possible solution

def create_league():
    all_players = deepcopy(players_by_position)
    league = []
    used_ids_total = set() # tracks all players used so far across all teams to avoid duplication
    for _ in range(NUM_TEAMS):
        team, used_ids = create_valid_team(all_players)
        if team is None or sum(p['Salary (€M)'] for p in team) > BUDGET_LIMIT:
            return None  # invalid team
        league.append(team)
        used_ids_total.update(used_ids)
        for pos in players_by_position:
            all_players[pos] = [p for p in all_players[pos] if id(p) not in used_ids_total] # remove used players
    return league

In [10]:
# print a league just for testing

# league = create_league() # a league corresponds to one individual, the same as a possible solution
#  
# if league is None:
#     print("Failed to generate a valid league.")
# else:
#     for team_index, team in enumerate(league, start=1):
#         print(f"\nTeam {team_index}")
#         for player in team:
#             print(f"  {player['Position']} - {player['Name']} | Skill: {player['Skill']} | Salary: €{player['Salary (€M)']}M")


In [11]:
# check if a team meets the criteria
def is_valid_team(team):
    if len(team) != TEAM_SIZE:
        return False
    pos_counts = {"GK": 0, "DEF": 0, "MID": 0, "FWD": 0}
    total_salary = 0
    ids = set()
    for p in team:
        pos_counts[p['Position']] += 1
        total_salary += p['Salary (€M)']
        ids.add(p['Name'])
    return pos_counts == TEAM_STRUCTURE and total_salary <= BUDGET_LIMIT and len(ids) == TEAM_SIZE

# Fitness Function

In [12]:
# fitness function

def fitness(league):
    if league is None:
        return float('inf')
    
    means = []
    used_names = set()

    for team in league:

        if not is_valid_team(team):
            return float('inf')
        
        for p in team:
            if p['Name'] in used_names:
                return float('inf')  # player already used in another team
            used_names.add(p['Name'])

        avg_skill = np.mean([p['Skill'] for p in team]) # calculate the average skill of the current team
        means.append(avg_skill)

    return np.std(means)  # the lower the standard deviation, the better

In [None]:
# generate initial population

def generate_initial_population(size=POPULATION_SIZE, max_attempts=1000): # max_attempts is to avoid infinite loop if valid league cannot be created
    population = [] # poplutaion = list of individuals (leagues) = list of possible solutions

    while len(population) < size:
        indiv = create_league()

        if indiv is not None:
            population.append(indiv)

    return population

In [14]:
# Generate initial population
pop = generate_initial_population()

# Check and print details of each individual (league)
for i, indiv in enumerate(pop):
    print(f"\n==============================")
    print(f"🏟️  League (Individual) {i+1}")
    print(f"==============================")
    
    team_means = []
    valid = True
    all_players = set()
    
    for j, team in enumerate(indiv):
        print(f"\n  🏆 Team {j+1}")
        names = []
        skill_list = []
        salary = 0
        
        for p in team:
            print(f"    - {p['Position']}: {p['Name']} | Skill: {p['Skill']} | Salary: €{p['Salary (€M)']}M")
            names.append(p['Name'])
            skill_list.append(p['Skill'])
            salary += p['Salary (€M)']
        
        skill_mean = np.mean(skill_list)
        team_means.append(skill_mean)
        all_players.update(names)
        
        print(f"    📊 Avg Skill: {skill_mean:.2f} | 💰 Salary: €{salary}M")
        
        if not is_valid_team(team):
            print("    ⚠️ Invalid team structure or budget!")
            valid = False
    
    if len(all_players) < TEAM_SIZE * NUM_TEAMS:
        print("\n    ⚠️ Some players are used in multiple teams!")
        valid = False

    fitness_score = fitness(indiv)
    print(f"\n  ➤ Fitness (Std Dev of team avg skill): {fitness_score:.4f}")
    print("  ✅ Valid league!" if valid else "  ❌ Invalid league!")



🏟️  League (Individual) 1

  🏆 Team 1
    - GK: Ryan Mitchell | Skill: 83 | Salary: €85M
    - DEF: Caleb Fisher | Skill: 84 | Salary: €85M
    - DEF: Mason Reed | Skill: 82 | Salary: €75M
    - MID: Spencer Ward | Skill: 84 | Salary: €85M
    - MID: Austin Torres | Skill: 82 | Salary: €80M
    - FWD: Landon Powell | Skill: 89 | Salary: €110M
    - FWD: Adrian Collins | Skill: 85 | Salary: €90M
    📊 Avg Skill: 84.14 | 💰 Salary: €610M

  🏆 Team 2
    - GK: Chris Thompson | Skill: 80 | Salary: €80M
    - DEF: Daniel Foster | Skill: 90 | Salary: €110M
    - DEF: Jaxon Griffin | Skill: 79 | Salary: €65M
    - MID: Ashton Phillips | Skill: 90 | Salary: €110M
    - MID: Connor Hayes | Skill: 89 | Salary: €105M
    - FWD: Colton Gray | Skill: 91 | Salary: €125M
    - FWD: Zachary Nelson | Skill: 86 | Salary: €92M
    📊 Avg Skill: 86.43 | 💰 Salary: €687M

  🏆 Team 3
    - GK: Jordan Smith | Skill: 88 | Salary: €100M
    - DEF: Owen Parker | Skill: 88 | Salary: €100M
    - DEF: Brayden Hughes

In [15]:
class LeagueIndividual:
    def __init__(self, league):
        self.league = league  # List of teams
        self.fitness = self.evaluate_fitness()

    def evaluate_fitness(self):
        return fitness(self.league)  # Use fitness function

    def __lt__(self, other):
        return self.fitness < other.fitness  # Allows sorting by fitness

    def __repr__(self):
        return f"<LeagueIndividual fitness={self.fitness:.4f}>"


In [None]:
individual = LeagueIndividual(create_league())
print(individual.fitness)
population = [LeagueIndividual(create_league()) for _ in range(10)]


1.027777471348851
