<h2> Task 10. Implement Genetic Algorithm (GA) Optimization </h2>
The goal of this task is to implement a Genetic Algorithm to solve an optimization problem. GA can optimize continuous (numeric) and discrete (categorical) problems. For example

*   Rastrigin, an example of continous objective function you have implemented above. But use at least 10 dimnesions. Check code bellow.
*   Knapsack Problem, the Knapsack Problem is a classic optimization problem where the goal is to maximize the total value of items that can be placed into a knapsack without exceeding its weight capacity

<p>Let's start with implementation</p>

1.   Implement a selection mechanisms: tournament selection, roulette wheel selection. Their purpose is to choose individuals based on their fitness values
2.   Implement crossover techniques to combine the genetic information of two parent individuals to create offspring. Implement, single-point crossover, 2-point crossover and uniform crossover.
3.   Implement mutation: Introduce random mutations in offspring to maintain genetic diversity. Implement gaussian mutation for numeric problems and implement Flip-bit mutation for discrete problems.
4.   Create a main loop which performs the optimization
5.   Create an inner loop which creates new population based on current population
6.   Create encoding for Knapsack problem. For example, knapsack containing only first item (out of 4) will be encoded as [1,0,0,0]. Knapsack containing second and fourth item will be encoded as [0,1,0,1], etc...
7. Finish the fitness function for Knapsack problem
8. Run on both problems and get results


In [None]:
import numpy as np

def rastrigin_high_dimen(x):
    A = 10
    n = len(x)
    if n < 10:
        return -1
    else:
        return A * n + sum(x_i ** 2 - A * np.cos(2 * np.pi * x_i) for x_i in x)

# 1. Selection mechanisms
def tournament_selection(population, fitness_scores, k=3):
    selected = []
    pop_size = len(population)
    
    for _ in range(pop_size):
        tournament_size = min(k, pop_size)
        replace_needed = tournament_size >= pop_size
        
        tournament = np.random.choice(pop_size, tournament_size, replace=replace_needed)
        winner = tournament[np.argmax([fitness_scores[i] for i in tournament])]
        selected.append(population[winner].copy())
    return selected

def roulette_wheel_selection(population, fitness_scores):
    fitness_scores = np.array(fitness_scores)
    fitness_scores = fitness_scores - np.min(fitness_scores) + 1
    probabilities = fitness_scores / np.sum(fitness_scores)
    indices = np.random.choice(len(population), len(population), p=probabilities)
    return [population[i].copy() for i in indices]

# 2. Crossover techniques  
def single_point_crossover(parent1, parent2):
    point = np.random.randint(1, len(parent1))
    child1 = np.concatenate([parent1[:point], parent2[point:]])
    child2 = np.concatenate([parent2[:point], parent1[point:]])
    return child1, child2

def two_point_crossover(parent1, parent2):
    points = sorted(np.random.choice(range(1, len(parent1)), 2, replace=False))
    child1 = np.concatenate([parent1[:points[0]], parent2[points[0]:points[1]], parent1[points[1]:]])
    child2 = np.concatenate([parent2[:points[0]], parent1[points[0]:points[1]], parent2[points[1]:]])
    return child1, child2

def uniform_crossover(parent1, parent2):
    mask = np.random.random(len(parent1)) < 0.5
    child1 = np.where(mask, parent1, parent2)
    child2 = np.where(mask, parent2, parent1)
    return child1, child2

# 3. Mutation
def gaussian_mutation(individual, rate=0.1):
    for i in range(len(individual)):
        if np.random.random() < rate:
            individual[i] += np.random.normal(0, 0.5)
    return individual

def flip_bit_mutation(individual, rate=0.1):
    for i in range(len(individual)):
        if np.random.random() < rate:
            individual[i] = 1 - individual[i]
    return individual

# 4. Main loop - GA optimization
population = [np.random.uniform(-5, 5, 12) for _ in range(50)]

for generation in range(100):
    fitness_scores = [-rastrigin_high_dimen(ind) for ind in population]

    selected = tournament_selection(population, fitness_scores)

    new_population = []
    for i in range(0, len(selected), 2):
        parent1, parent2 = selected[i], selected[i + 1] if i + 1 < len(selected) else selected[0]
        child1, child2 = single_point_crossover(parent1, parent2)
        new_population.extend([gaussian_mutation(child1), gaussian_mutation(child2)])

    population = new_population[:50]

best_rastrigin = population[np.argmax([-rastrigin_high_dimen(ind) for ind in population])]
print(f"Rastrigin result: {-rastrigin_high_dimen(best_rastrigin)}")

# Test starter kod
x = np.array([1.2, 2.3, 20, 3, 3.2, 5.4, 42.1, 2, 1, 2, 4, 5])
value = rastrigin_high_dimen(x)
print(f'Rastrigin function value at {x}: {value}')

Rastrigin result: -16.684972307771943
Rastrigin function value at [ 1.2  2.3 20.   3.   3.2  5.4 42.1  2.   1.   2.   4.   5. ]: 2324.4498300562504


In [30]:
knapsack_items = {
    'item1': {'value': 60, 'weight': 10},
    'item2': {'value': 100, 'weight': 20},
    'item3': {'value': 120, 'weight': 30},
    'item4': {'value': 80, 'weight': 15},
    'item5': {'value': 40, 'weight': 5},
    'item6': {'value': 70, 'weight': 25},
    'item7': {'value': 90, 'weight': 35},
    'item8': {'value': 150, 'weight': 40},
    'item9': {'value': 200, 'weight': 50},
    'item10': {'value': 30, 'weight': 10}
}

capacity = 100

def fitness(individual):
    """Calculate the fitness of an individual."""
    items_list = list(knapsack_items.values())
    total_value = sum(individual[i] * items_list[i]['value'] for i in range(len(individual)))
    total_weight = sum(individual[i] * items_list[i]['weight'] for i in range(len(individual)))

    if total_weight > capacity:
        return 0
    return total_value

# 6. Main loop for Knapsack
population = [np.random.randint(0, 2, 10) for _ in range(50)]

for generation in range(100):
    fitness_scores = [fitness(ind) for ind in population]

    selected = roulette_wheel_selection(population, fitness_scores)

    new_population = []
    for i in range(0, len(selected), 2):
        parent1, parent2 = selected[i], selected[i + 1] if i + 1 < len(selected) else selected[0]
        child1, child2 = uniform_crossover(parent1, parent2)
        new_population.extend([flip_bit_mutation(child1), flip_bit_mutation(child2)])

    population = new_population[:50]

best_knapsack = population[np.argmax([fitness(ind) for ind in population])]
print(f"Knapsack result: {fitness(best_knapsack)}")
print(f"Items selected: {best_knapsack}")

Knapsack result: 460
Items selected: [1 1 0 1 1 0 0 1 0 1]


<h2> Task 11. Experiment with Genetic Algorithm (GA) Optimization Hyperparameters </h2>
Identify key hyperparameters to experiment with, such as:

*   Population Size: The number of individuals in the population
*   Crossover type: Single point vs 2-point vs uniform
*   Mutation Rate: The probability of mutating a chromosome.
*   Selection Method: The method used for selecting parents from the populatio
<p> 1.) Run the Genetic Algorithm with different combinations of population size, crossover type, mutation rate, and selection methods.
For each combination, record metrics such as the best fitness value found and the number of generations required to converge to a solution.
</p>
<p> 2.) Report hyperparameters which will make GA never converge. Why?</p>

In [31]:
values = np.array([item['value'] for item in knapsack_items.values()])
weights = np.array([item['weight'] for item in knapsack_items.values()])
selection_tournament = tournament_selection
selection_roulette = roulette_wheel_selection
crossover_single = single_point_crossover
crossover_two = two_point_crossover
crossover_uniform = uniform_crossover

def fitness_knapsack(individual):
    total_value = np.sum(individual * values)
    total_weight = np.sum(individual * weights)
    if total_weight > capacity:
        return 0
    return total_value

def mutate_gaussian(individual, rate=0.1, sigma=0.3, bounds=(-5.12, 5.12)):
    mutated = individual.copy()
    for i in range(len(mutated)):
        if np.random.random() < rate:
            mutated[i] += np.random.normal(0, sigma)
            mutated[i] = np.clip(mutated[i], bounds[0], bounds[1])
    return mutated

def mutate_flip_bits(individual, rate=0.1):
    mutated = individual.copy()
    for i in range(len(mutated)):
        if np.random.random() < rate:
            mutated[i] = 1 - mutated[i]
    return mutated

_selection_map = {
    "tournament": selection_tournament,   
    "roulette":   selection_roulette      
}
_crossover_map = {
    "single":   crossover_single,
    "two":      crossover_two,
    "uniform":  crossover_uniform
}

def run_ga_trial(pop_size, crossover_type, mutation_rate, selection_method,
                 problem="knapsack", gens=200, dim=12, bounds=(-5.12, 5.12),
                 tol=1e-2, window=15, seed=None):
    
    if seed is not None:
        np.random.seed(seed)

    # Initialize population
    if problem == "knapsack":
        n = len(values)
        population = [np.random.randint(0, 2, size=n, dtype=int) for _ in range(pop_size)]
        mutate = lambda ind: mutate_flip_bits(ind, rate=mutation_rate)
    else:
        population = [np.random.uniform(bounds[0], bounds[1], size=dim) for _ in range(pop_size)]
        mutate = lambda ind: mutate_gaussian(ind, rate=mutation_rate, sigma=0.3, bounds=bounds)

    cx = _crossover_map[crossover_type]
    best_history = []
    conv_gen = gens

    for g in range(gens):
        if problem == "knapsack":
            scores = np.array([fitness_knapsack(ind) for ind in population], dtype=float)
            parents = _selection_map[selection_method](population, scores, k=3) if selection_method == "tournament" else _selection_map[selection_method](population, scores)
            best_now = float(scores.max())
        else:
            objs = np.array([rastrigin_high_dimen(ind) for ind in population], dtype=float)
            fits = 1.0 / (1.0 + objs)
            parents = _selection_map[selection_method](population, fits, k=3) if selection_method == "tournament" else _selection_map[selection_method](population, fits)
            best_now = float(objs.min())
        
        best_history.append(best_now)

        # Check convergence
        if g >= window:
            window_vals = best_history[-window:]
            if (max(window_vals) - min(window_vals)) <= tol and conv_gen == gens:
                conv_gen = g

        # Create offspring
        next_pop = []
        for i in range(0, len(parents), 2):
            p1 = parents[i]
            p2 = parents[(i+1) % len(parents)]
            c1, c2 = cx(p1, p2)
            c1 = mutate(c1)
            c2 = mutate(c2)
            next_pop.extend([c1, c2])

        population = next_pop[:pop_size]

    if problem == "knapsack":
        final = float(max(fitness_knapsack(ind) for ind in population))
    else:
        final = float(min(rastrigin_high_dimen(ind) for ind in population))

    return final, conv_gen

# Experiment configurations
experiments = [
    {"pop_size": 30,  "crossover": "single",   "mut_rate": 0.10, "selection": "tournament"},
    {"pop_size": 100, "crossover": "uniform",  "mut_rate": 0.05, "selection": "roulette"},
    {"pop_size": 50,  "crossover": "two",      "mut_rate": 0.20, "selection": "tournament"},
    {"pop_size": 20,  "crossover": "single",   "mut_rate": 0.01, "selection": "roulette"},
    {"pop_size": 15,  "crossover": "uniform",  "mut_rate": 0.40, "selection": "tournament"},
]

# Knapsack experiments
print("\nTASK 11: HYPERPARAMETER EXPERIMENTS")
print("="*50)
print("KNAPSACK EXPERIMENTS:")
print("PopSize | Crossover  | MutRate | Selection   |  BestFitness | Converged@Gen")
print("-"*78)
for exp in experiments:
    runs = []
    for r in range(5):
        best, conv = run_ga_trial(
            pop_size=exp["pop_size"],
            crossover_type=exp["crossover"],
            mutation_rate=exp["mut_rate"],
            selection_method=exp["selection"],
            problem="knapsack",
            gens=200,
            seed=r
        )
        runs.append((best, conv))
    avg_best = float(np.mean([x[0] for x in runs]))
    avg_conv = float(np.mean([x[1] for x in runs]))
    print(f"{exp['pop_size']:6} | {exp['crossover']:10} | {exp['mut_rate']:7.2f} | {exp['selection']:10} |"
          f" {avg_best:12.2f} | {avg_conv:13.1f}")

# Rastrigin experiments
print("\nRASTRIGIN EXPERIMENTS:")
print("PopSize | Crossover  | MutRate | Selection   |   BestValue  | Converged@Gen")
print("-"*78)
for exp in experiments:
    runs = []
    for r in range(5):
        best, conv = run_ga_trial(
            pop_size=exp["pop_size"],
            crossover_type=exp["crossover"],
            mutation_rate=exp["mut_rate"],
            selection_method=exp["selection"],
            problem="rastrigin",
            gens=200,
            dim=12,
            seed=r
        )
        runs.append((best, conv))
    avg_best = float(np.mean([x[0] for x in runs]))
    avg_conv = float(np.mean([x[1] for x in runs]))
    print(f"{exp['pop_size']:6} | {exp['crossover']:10} | {exp['mut_rate']:7.2f} | {exp['selection']:10} |"
          f" {avg_best:12.4f} | {avg_conv:13.1f}")

print("\nNON-CONVERGENCE ANALYSIS:")
print("-" * 50)

problematic_configs = [
    {"pop_size": 5, "mut_rate": 0.0, "problem": "knapsack"},      # No diversity
    {"pop_size": 100, "mut_rate": 0.8, "problem": "knapsack"},   # Too much mutation
    {"pop_size": 2, "mut_rate": 0.1, "problem": "rastrigin"},    # Too small population
    {"pop_size": 50, "mut_rate": 0.0, "problem": "rastrigin"}    # No exploration
]

for config in problematic_configs:
    best, conv = run_ga_trial(
        pop_size=config["pop_size"], 
        crossover_type="uniform", 
        mutation_rate=config["mut_rate"],
        selection_method="tournament",
        problem=config["problem"],
        gens=100,
        seed=42
    )
    print(f"Pop={config['pop_size']}, Mut={config['mut_rate']}, {config['problem']}: {best:.2f}")


TASK 11: HYPERPARAMETER EXPERIMENTS
KNAPSACK EXPERIMENTS:
PopSize | Crossover  | MutRate | Selection   |  BestFitness | Converged@Gen
------------------------------------------------------------------------------
    30 | single     |    0.10 | tournament |       446.00 |          94.4
   100 | uniform    |    0.05 | roulette   |       460.00 |         165.4
    50 | two        |    0.20 | tournament |       436.00 |         200.0
    20 | single     |    0.01 | roulette   |       430.00 |          24.0
    15 | uniform    |    0.40 | tournament |       396.00 |         200.0

RASTRIGIN EXPERIMENTS:
PopSize | Crossover  | MutRate | Selection   |   BestValue  | Converged@Gen
------------------------------------------------------------------------------
    30 | single     |    0.10 | tournament |      24.2073 |         200.0
   100 | uniform    |    0.05 | roulette   |     144.9285 |         200.0
    50 | two        |    0.20 | tournament |      24.1595 |         200.0
    20 | single

## Hyperparameters that Prevent GA Convergence

**Non-convergent configurations:**
- **Population ≤ 5:** Insufficient diversity
- **Mutation = 0:** No exploration, trapped in local optima  
- **Mutation ≥ 0.8:** Destroys good solutions too quickly
- **Population ≤ 2:** No meaningful crossover possible

**Why they fail:** These parameters eliminate the exploration-exploitation balance needed for evolutionary search. Small populations lack diversity, zero mutation prevents escape from local optima, and excessive mutation destroys beneficial patterns faster than they form.

**Working ranges:** Population 30-100, Mutation 0.01-0.20

<h2> Task 12. Use pyGAD library </h2>
The pyGAD library is a powerful tool for implementing Genetic Algorithms in Python. Check their documentation: https://pygad.readthedocs.io/en/latest/

1.   pip install pygad
2.   Solve Knapsack problem defined above
3.   Solve rastrigin_high_dimen problem defined above
4.   Explore pyGAD hyperparameters


In [32]:
!pip install pygad



In [33]:
n_items = len(knapsack_items)

import pygad

def fitness_knapsack_pygad(ga_instance, solution, solution_idx):
    sol = np.array(solution, dtype=int)
    total_value = float(sol @ values)
    total_weight = float(sol @ weights)
    overflow = max(0.0, total_weight - capacity)
    penalty = 1000.0 * overflow
    return total_value - penalty

def fitness_rastrigin_pygad(ga_instance, solution, solution_idx):
    x = np.array(solution, dtype=float)
    return -rastrigin_high_dimen(x)

# Knapsack GA
ga_knap = pygad.GA(
    num_generations=100,
    sol_per_pop=50,
    num_parents_mating=20,
    fitness_func=fitness_knapsack_pygad,
    num_genes=n_items,
    gene_type=int,
    gene_space=[0, 1],
    parent_selection_type="tournament",
    K_tournament=3,
    crossover_type="two_points",
    mutation_probability=0.05,
    keep_parents=2,
    suppress_warnings=True
)

ga_knap.run()
sol_knap, fit_knap, _ = ga_knap.best_solution()
print(f"Knapsack Best Fitness: {fit_knap}")

# Rastrigin GA  
ga_rastr = pygad.GA(
    num_generations=150,
    sol_per_pop=80,
    num_parents_mating=30,
    fitness_func=fitness_rastrigin_pygad,
    num_genes=12,
    gene_type=float,
    gene_space={'low': -5.12, 'high': 5.12},
    parent_selection_type="tournament",
    K_tournament=3,
    crossover_type="uniform",
    mutation_probability=0.10,
    keep_parents=4,
    suppress_warnings=True
)

ga_rastr.run()
sol_rastr, fit_rastr, _ = ga_rastr.best_solution()
print(f"Rastrigin Best Fitness: {fit_rastr} (Rastrigin value: {-fit_rastr})")

print("\nHyperparameter exploration:")

for pop_size in [20, 50, 100]:
    ga = pygad.GA(
        num_generations=50,
        sol_per_pop=pop_size,
        num_parents_mating=10,
        fitness_func=fitness_knapsack_pygad,
        num_genes=n_items,
        gene_type=int,
        gene_space=[0, 1],
        parent_selection_type="tournament",
        suppress_warnings=True
    )
    ga.run()
    _, fitness, _ = ga.best_solution()
    print(f"Population {pop_size}: {fitness}")

for mut_rate in [0.05, 0.15, 0.25]:
    ga = pygad.GA(
        num_generations=50,
        sol_per_pop=50,
        num_parents_mating=20,
        fitness_func=fitness_rastrigin_pygad,
        num_genes=12,
        gene_type=float,
        gene_space={'low': -5.12, 'high': 5.12},
        mutation_probability=mut_rate,
        suppress_warnings=True
    )
    ga.run()
    _, fitness, _ = ga.best_solution()
    print(f"Mutation {mut_rate}: {fitness}")

Knapsack Best Fitness: 480.0
Rastrigin Best Fitness: -6.5157585838990855 (Rastrigin value: 6.5157585838990855)

Hyperparameter exploration:
Population 20: 480.0
Population 50: 480.0
Population 100: 430.0
Mutation 0.05: -10.227978936105146
Mutation 0.15: -18.43815657012391
Mutation 0.25: -34.42778412726588
