# **Fantasy Sports League Optimization**

## **Project Description**
This project focuses on optimizing team assignments in a fantasy sports league while ensuring a balanced distribution of talent and adherence to salary constraints. Given a dataset of players with attributes such as skill rating, cost, and position, we aim to assign them to five teams while following strict constraints and achieving a balanced league.

## **Constraints**
- Each team must consist of:
  - 1 Goalkeeper (GK)  
  - 2 Defenders (DEF)  
  - 2 Midfielders (MID)  
  - 2 Forwards (FWD)  
- Each player is assigned to exactly one team.  
- No team may exceed a total budget of **750 million €**.  
- The standard deviation of the average skill rating of all teams should be minimized to ensure fairness.  

## **Objective**
To generate a valid league configuration that follows all constraints and ensures that teams have a similar overall skill level, measured by the standard deviation of their average skill ratings.

## **Group Members**

### **Group X**

| Name                  | Student Number  |
|-----------------------|----------------|
| Philippe Dutranoit   | 20240518        |
| Name 2               | Student Number 2 |
| Name 3               | Student Number 3 |
| Name 4               | Student Number 4 |

# Imports 

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('../Data/players(in).csv')

In [4]:
df.drop(columns=['Unnamed: 0'], inplace=True) # drop a column that as no information 
df.hea


KeyError: "['Unnamed: 0'] not found in axis"

# Defining The constraints

In [5]:
position = {"GK": 1, "DEF": 2, "MID": 2, "FWD": 2}
budget = 750 
total_players = 5 # as 35 rows devided by seven position = 5 teams
team_size = sum(position.values())

In [6]:
# create a dictionary to group players by position
position_groups = {pos: df[df["Position"] == pos].values.tolist() for pos in position}

position_groups

{'GK': [['Alex Carter', 'GK', 85, 90],
  ['Jordan Smith', 'GK', 88, 100],
  ['Ryan Mitchell', 'GK', 83, 85],
  ['Chris Thompson', 'GK', 80, 80],
  ['Blake Henderson', 'GK', 87, 95]],
 'DEF': [['Daniel Foster', 'DEF', 90, 110],
  ['Lucas Bennett', 'DEF', 85, 90],
  ['Owen Parker', 'DEF', 88, 100],
  ['Ethan Howard', 'DEF', 80, 70],
  ['Mason Reed', 'DEF', 82, 75],
  ['Logan Brooks', 'DEF', 86, 95],
  ['Caleb Fisher', 'DEF', 84, 85],
  ['Maxwell Flores', 'DEF', 81, 72],
  ['Jaxon Griffin', 'DEF', 79, 65],
  ['Brayden Hughes', 'DEF', 87, 100]],
 'MID': [['Nathan Wright', 'MID', 92, 120],
  ['Connor Hayes', 'MID', 89, 105],
  ['Dylan Morgan', 'MID', 91, 115],
  ['Hunter Cooper', 'MID', 83, 85],
  ['Austin Torres', 'MID', 82, 80],
  ['Gavin Richardson', 'MID', 87, 95],
  ['Spencer Ward', 'MID', 84, 85],
  ['Bentley Rivera', 'MID', 88, 100],
  ['Dominic Bell', 'MID', 86, 95],
  ['Ashton Phillips', 'MID', 90, 110]],
 'FWD': [['Sebastian Perry', 'FWD', 95, 150],
  ['Xavier Bryant', 'FWD', 90, 

# chatgpt example with only skills into account

In [9]:
import random
import numpy as np
import pandas as pd

# Constants
TEAM_COUNT = 5
TEAM_STRUCTURE = {
    'GK': 1,
    'DEF': 2,
    'MID': 2,
    'FWD': 2
}
POPULATION_SIZE = 100
GENERATIONS = 300
MUTATION_RATE = 0.1
TOURNAMENT_SIZE = 5

# Utility: Generate initial population
def generate_initial_population(position_groups):
    population = []
    for _ in range(POPULATION_SIZE):
        players = []
        for pos, required_per_team in TEAM_STRUCTURE.items():
            total_needed = required_per_team * TEAM_COUNT
            selected = random.sample(position_groups[pos], total_needed)
            players.extend([[p[0], p[1], p[2], p[1]] for p in selected]) # Just to be sure position is there

        random.shuffle(players)
        df = pd.DataFrame(players, columns=['Player', 'Position', 'Skill', 'Pos'])
        df['Team'] = np.repeat(range(TEAM_COUNT), sum(TEAM_STRUCTURE.values()))
        population.append(df)
    return population

# Fitness = standard deviation of average skill per team
def evaluate_fitness(individual):
    team_avgs = individual.groupby('Team')['Skill'].mean()
    return np.std(team_avgs)

# Tournament selection
def tournament_selection(population):
    selected = random.sample(population, TOURNAMENT_SIZE)
    return min(selected, key=evaluate_fitness)

# Crossover: single-point
def crossover(parent1, parent2):
    split = random.randint(1, len(parent1) - 2)
    child = pd.concat([parent1.iloc[:split], parent2.iloc[split:]]).copy()
    child = child.sample(frac=1).reset_index(drop=True)
    child['Team'] = np.repeat(range(TEAM_COUNT), sum(TEAM_STRUCTURE.values()))
    return child

# Mutation: swap same-position players
def mutate(individual):
    if random.random() < MUTATION_RATE:
        idx1, idx2 = random.sample(range(len(individual)), 2)
        if individual.loc[idx1, 'Position'] == individual.loc[idx2, 'Position']:
            individual.loc[idx1, 'Team'], individual.loc[idx2, 'Team'] = (
                individual.loc[idx2, 'Team'],
                individual.loc[idx1, 'Team']
            )
    return individual

# Main GA function
def genetic_algorithm(position_groups):
    population = generate_initial_population(position_groups)
    best_fitness = float('inf')
    best_solution = None

    for generation in range(GENERATIONS):
        new_population = []
        for _ in range(POPULATION_SIZE):
            parent1 = tournament_selection(population)
            parent2 = tournament_selection(population)
            child = crossover(parent1, parent2)
            child = mutate(child)
            new_population.append(child)

        population = new_population
        best_candidate = min(population, key=evaluate_fitness)
        fitness = evaluate_fitness(best_candidate)
        if fitness < best_fitness:
            best_fitness = fitness
            best_solution = best_candidate

        if generation % 20 == 0:
            print(f"Generation {generation}: std = {fitness:.4f}")

    return best_solution

# Run GA
best_teams = genetic_algorithm(position_groups)
print(best_teams.sort_values(by='Team'))


Generation 0: std = 0.5952
Generation 20: std = 0.1565
Generation 40: std = 0.0700
Generation 60: std = 0.0000
Generation 80: std = 0.0000
Generation 100: std = 0.0000
Generation 120: std = 0.0000
Generation 140: std = 0.0000
Generation 160: std = 0.0000
Generation 180: std = 0.0000
Generation 200: std = 0.0000
Generation 220: std = 0.0000
Generation 240: std = 0.0000
Generation 260: std = 0.0000
Generation 280: std = 0.0000
              Player Position  Skill  Pos  Team
0       Logan Brooks      DEF     86  DEF     0
1     Brayden Hughes      DEF     87  DEF     0
2     Brayden Hughes      DEF     87  DEF     0
3       Chase Murphy      FWD     86  FWD     0
4     Brayden Hughes      DEF     87  DEF     0
5     Zachary Nelson      FWD     86  FWD     0
6       Logan Brooks      DEF     86  DEF     0
13    Brayden Hughes      DEF     87  DEF     1
12    Zachary Nelson      FWD     86  FWD     1
11      Logan Brooks      DEF     86  DEF     1
8     Brayden Hughes      DEF     87  DEF  