# Sports League Optimization

This notebook implements and tests optimization algorithms for the Sports League problem, where we need to assign players to teams in a balanced way while respecting position and budget constraints.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time

from solution import LeagueSolution, LeagueHillClimbingSolution, LeagueSASolution
from evolution import (
    hill_climbing, 
    simulated_annealing, 
    genetic_algorithm,
    # Mutation operators
    mutate_swap_constrained,
    mutate_targeted_player_exchange,
    mutate_shuffle_within_team_constrained,
    # Crossover operators
    crossover_one_point_prefer_valid,
    crossover_uniform_prefer_valid,
    # Selection operators
    selection_ranking,
    selection_tournament_variable_k,
    selection_boltzmann
)

# Load player data
players_df = pd.read_csv("players.csv", sep=";")
players_data = players_df.to_dict(orient="records")

In [None]:
# Display the player data
players_data

## 1. Problem Definition

In the Sports League problem, we need to:
- Assign 35 players to 5 teams (7 players per team)
- Each team must have exactly 1 GK, 2 DEF, 2 MID, and 2 FWD
- Each team's total salary must not exceed 750M €
- The goal is to create balanced teams (minimize standard deviation of average team skills)

## 2. Hill Climbing Algorithm

In [None]:
# Create an initial solution for Hill Climbing
hc_solution = LeagueHillClimbingSolution(players=players_data)

# Check if the initial solution is valid
print(f"Initial solution is valid: {hc_solution.is_valid()}")
print(f"Initial fitness: {hc_solution.fitness()}")

In [None]:
# Run Hill Climbing
start_time = time.time()
best_hc_solution, best_hc_fitness, hc_history = hill_climbing(
    hc_solution,
    max_iterations=500,
    max_no_improvement=100,
    verbose=True
)
hc_time = time.time() - start_time

print(f"\nHill Climbing completed in {hc_time:.2f} seconds")
print(f"Best fitness: {best_hc_fitness}")

In [None]:
# Plot the convergence history
plt.figure(figsize=(10, 6))
plt.plot(hc_history)
plt.title('Hill Climbing Convergence')
plt.xlabel('Iterations')
plt.ylabel('Fitness (lower is better)')
plt.grid(True)
plt.show()

## 3. Simulated Annealing Algorithm

In [None]:
# Create an initial solution for Simulated Annealing
sa_solution = LeagueSASolution(players=players_data)

# Check if the initial solution is valid
print(f"Initial solution is valid: {sa_solution.is_valid()}")
print(f"Initial fitness: {sa_solution.fitness()}")

In [None]:
# Run Simulated Annealing
start_time = time.time()
best_sa_solution, best_sa_fitness, sa_history = simulated_annealing(
    sa_solution,
    initial_temperature=200.0,
    cooling_rate=0.95,
    min_temperature=1e-5,
    iterations_per_temp=20,
    verbose=True
)
sa_time = time.time() - start_time

print(f"\nSimulated Annealing completed in {sa_time:.2f} seconds")
print(f"Best fitness: {best_sa_fitness}")

In [None]:
# Plot the convergence history
plt.figure(figsize=(10, 6))
plt.plot(sa_history)
plt.title('Simulated Annealing Convergence')
plt.xlabel('Iterations')
plt.ylabel('Fitness (lower is better)')
plt.grid(True)
plt.show()

## 4. Genetic Algorithm

In [None]:
# Run standard Genetic Algorithm
start_time = time.time()
best_ga_solution, best_ga_fitness, ga_history = genetic_algorithm(
    players_data,
    population_size=100,
    max_generations=50,
    selection_operator=selection_tournament_variable_k,
    selection_params={"k": 3},
    crossover_operator=crossover_one_point_prefer_valid,
    crossover_rate=0.8,
    mutation_operator=mutate_targeted_player_exchange,
    mutation_rate=0.1,
    elitism=True,
    elitism_size=2,
    verbose=True
)
ga_time = time.time() - start_time

print(f"\nGenetic Algorithm completed in {ga_time:.2f} seconds")
print(f"Best fitness: {best_ga_fitness}")

In [None]:
# Plot the convergence history
plt.figure(figsize=(10, 6))
plt.plot(ga_history)
plt.title('Genetic Algorithm Convergence')
plt.xlabel('Generations')
plt.ylabel('Fitness (lower is better)')
plt.grid(True)
plt.show()

In [None]:
# Run hybrid Genetic Algorithm with Hill Climbing
start_time = time.time()
best_hybrid_solution, best_hybrid_fitness, hybrid_history = genetic_algorithm(
    players_data,
    population_size=75,
    max_generations=40,
    selection_operator=selection_tournament_variable_k,
    selection_params={"k": 3},
    crossover_operator=crossover_uniform_prefer_valid,
    crossover_rate=0.85,
    mutation_operator=mutate_targeted_player_exchange,
    mutation_rate=0.15,
    elitism=True,
    elitism_size=1,
    local_search={
        "algorithm": "hill_climbing",
        "frequency": 5,  # Apply HC every 5 generations
        "iterations": 50  # HC iterations per application
    },
    verbose=True
)
hybrid_time = time.time() - start_time

print(f"\nHybrid Genetic Algorithm completed in {hybrid_time:.2f} seconds")
print(f"Best fitness: {best_hybrid_fitness}")

In [None]:
# Plot the convergence history
plt.figure(figsize=(10, 6))
plt.plot(hybrid_history)
plt.title('Hybrid Genetic Algorithm Convergence')
plt.xlabel('Generations')
plt.ylabel('Fitness (lower is better)')
plt.grid(True)
plt.show()

## 5. Algorithm Comparison

In [None]:
# Compare all algorithms
results = {
    'Algorithm': ['Hill Climbing', 'Simulated Annealing', 'Genetic Algorithm', 'Hybrid GA'],
    'Best Fitness': [best_hc_fitness, best_sa_fitness, best_ga_fitness, best_hybrid_fitness],
    'Execution Time (s)': [hc_time, sa_time, ga_time, hybrid_time]
}

results_df = pd.DataFrame(results)
results_df

In [None]:
# Plot fitness comparison
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.bar(results['Algorithm'], results['Best Fitness'])
plt.title('Best Fitness Comparison')
plt.ylabel('Fitness (lower is better)')
plt.xticks(rotation=45)

# Plot time comparison
plt.subplot(1, 2, 2)
plt.bar(results['Algorithm'], results['Execution Time (s)'])
plt.title('Execution Time Comparison')
plt.ylabel('Time (seconds)')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

## 6. Best Solution Analysis

In [None]:
# Determine the best overall solution
all_solutions = [
    (best_hc_solution, best_hc_fitness, "Hill Climbing"),
    (best_sa_solution, best_sa_fitness, "Simulated Annealing"),
    (best_ga_solution, best_ga_fitness, "Genetic Algorithm"),
    (best_hybrid_solution, best_hybrid_fitness, "Hybrid GA")
]

best_overall = min(all_solutions, key=lambda x: x[1])
best_solution, best_fitness, best_algorithm = best_overall

print(f"Best overall solution found by {best_algorithm} with fitness {best_fitness}")

In [None]:
# Analyze the best solution
team_stats = best_solution.get_team_stats()

# Create a DataFrame for team statistics
teams_df = pd.DataFrame([
    {
        'Team': f"Team {stats['team_id']}",
        'Average Skill': stats['avg_skill'],
        'Total Salary': stats['total_salary'],
        'GK': stats['positions']['GK'],
        'DEF': stats['positions']['DEF'],
        'MID': stats['positions']['MID'],
        'FWD': stats['positions']['FWD']
    } for stats in team_stats
])

teams_df

In [None]:
# Plot team skills
plt.figure(figsize=(10, 6))
plt.bar(teams_df['Team'], teams_df['Average Skill'])
plt.title('Average Skill by Team')
plt.ylabel('Average Skill')
plt.axhline(y=teams_df['Average Skill'].mean(), color='r', linestyle='--', label='Mean')
plt.legend()
plt.show()

In [None]:
# Plot team salaries
plt.figure(figsize=(10, 6))
bars = plt.bar(teams_df['Team'], teams_df['Total Salary'])
plt.title('Total Salary by Team')
plt.ylabel('Total Salary (M €)')
plt.axhline(y=750, color='r', linestyle='--', label='Budget Limit')
plt.legend()

# Add value labels on top of bars
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 5,
             f'{height:.1f}',
             ha='center', va='bottom')

plt.show()

In [None]:
# Display detailed team compositions
for i, stats in enumerate(team_stats):
    print(f"\nTeam {i}:")
    print(f"Average Skill: {stats['avg_skill']:.2f}")
    print(f"Total Salary: {stats['total_salary']} M €")
    print("Players:")
    
    # Create a DataFrame for this team's players
    team_df = pd.DataFrame(stats['players'])
    team_df = team_df.sort_values(by='Position')
    display(team_df)