# Genetic Algorithm (Knapsack Problem)

This project implements a genetic algorithm to solve the knapsack problem. The objective is to select a subset of items to maximize their total utility while satisfying multiple cost constraints.

In [3]:
import numpy as np
import pandas as pd

In [4]:
class GeneticAlgorithmMDKP:

    """
    Genetic Algorithm Class for the Multiple-Choice Knapsack Problem (MDKP).

    Parameters:
    - num_objects (int): The number of objects available.
    - utilities (ndarray): An array of shape (num_objects,) containing the utility values for each object.
    - costs (ndarray): An array of shape (num_objects, num_budgets) containing the cost values for each object and budget.
    - budgets (ndarray): An array of shape (num_budgets,) containing the budget constraints.
    - pop_size (int): The size of the population (default: 100).
    - generations (int): The number of generations (default: 1000).
    - mutation_rate (float): The probability of mutation (default: 0.01).
    - crossover_rate (float): The probability of crossover (default: 0.7).
    - init_method (str): The initialization method for the population. Can be 'random', 'greedy', or 'mixed' (default: 'random').
    - crossover_method (str): The crossover method. Can be 'one_point', 'two_point', or'uniform' (default: 'one_point').

    Methods:
    - initialize_population(): Initializes the population based on the chosen initialization method.
    - random_initialization(): Randomly initializes the population.
    - greedy_initialization(): Initializes the population using a greedy approach.
    - mixed_initialization(): Initializes the population using a combination of random and greedy approaches.
    - is_feasible(individual): Checks if an individual is feasible (i.e., satisfies the budget constraints).
    - fitness(individual): Calculates the fitness value of an individual.
    - selection(): Performs selection of individuals based on their fitness values.
    - one_point_crossover(parent1, parent2): Performs crossover between two parents to generate two children using one point.
    - two_point_crossover(parent1, parent2): Performs crossover between two parents to generate two children using two points.
    - uniform_crossover(parent1, parent2): Performs uniform crossover between two parents to generate two children.
    - mutate(individual): Performs mutation on an individual.
    - repair(individual): Repairs an individual to satisfy the budget constraints.
    - run(): Runs the genetic algorithm and returns the best solution and its fitness value.
    """

    def __init__(self, num_objects, utilities, costs, budgets, pop_size=100, generations=1000, mutation_rate=0.01, crossover_rate=0.7, init_method='random', crossover_method='one_point'):
        self.num_objects = num_objects
        self.utilities = utilities
        self.costs = costs
        self.budgets = budgets
        self.pop_size = pop_size
        self.generations = generations
        self.mutation_rate = mutation_rate
        self.crossover_rate = crossover_rate
        self.init_method = init_method
        self.crossover_method = crossover_method
        self.population = self.initialize_population()
        self.crossover_methods = {
            'one_point': self.one_point_crossover,
            'two_point': self.two_point_crossover,
            'uniform': self.uniform_crossover,
        }

    def initialize_population(self):
        if self.init_method == 'random':
            return self.random_initialization()
        elif self.init_method == 'greedy':
            return self.greedy_initialization()
        elif self.init_method == 'mixed':
            return self.mixed_initialization()
        else:
            raise ValueError("Invalid initialization method.")

    def random_initialization(self):
        population = []
        while len(population) < self.pop_size:
            individual = np.random.randint(2, size=self.num_objects)
            if self.is_feasible(individual):
                population.append(individual)
        return np.array(population)

    def greedy_initialization(self):
        population = []
        utility_cost_ratio = self.utilities / np.sum(self.costs, axis=1)
        sorted_indices = np.argsort(utility_cost_ratio)[::-1]

        for _ in range(self.pop_size):
            individual = np.zeros(self.num_objects, dtype=int)
            total_costs = np.zeros(len(self.budgets))

            for idx in sorted_indices:
                if np.all(total_costs + self.costs[idx] <= self.budgets):
                    individual[idx] = 1
                    total_costs += self.costs[idx]

            if self.is_feasible(individual):
                population.append(individual)

        return np.array(population)

    def mixed_initialization(self):
        population = []
        half_pop_size = self.pop_size // 2

        # Random initialization
        while len(population) < half_pop_size:
            individual = np.random.randint(2, size=self.num_objects)
            if self.is_feasible(individual):
                population.append(individual)

        # Greedy initialization
        utility_cost_ratio = self.utilities / np.sum(self.costs, axis=1)
        sorted_indices = np.argsort(utility_cost_ratio)[::-1]

        while len(population) < self.pop_size:
            individual = np.zeros(self.num_objects, dtype=int)
            total_costs = np.zeros(len(self.budgets))

            for idx in sorted_indices:
                if np.all(total_costs + self.costs[idx] <= self.budgets):
                    individual[idx] = 1
                    total_costs += self.costs[idx]

            if self.is_feasible(individual):
                population.append(individual)

        return np.array(population)

    def is_feasible(self, individual):
        for i in range(len(self.budgets)):
            if np.sum(individual * self.costs[:, i]) > self.budgets[i]:
                return False
        return True

    def fitness(self, individual):
        return np.sum(individual * self.utilities)

    def selection(self):
        fitnesses = np.array([self.fitness(ind) for ind in self.population])
        probabilities = fitnesses / fitnesses.sum()
        selected_indices = np.random.choice(len(self.population), size=len(self.population), p=probabilities)
        return self.population[selected_indices]

    def one_point_crossover(self, parent1, parent2):
        if np.random.rand() < self.crossover_rate:
            point = np.random.randint(1, self.num_objects-1)
            child1 = np.concatenate((parent1[:point], parent2[point:]))
            child2 = np.concatenate((parent2[:point], parent1[point:]))
            return child1, child2
        else:
            return parent1, parent2

    def two_point_crossover(self, parent1, parent2):
        if np.random.rand() < self.crossover_rate:
            point1 = np.random.randint(1, self.num_objects-1)
            point2 = np.random.randint(point1, self.num_objects)
            child1 = np.concatenate((parent1[:point1], parent2[point1:point2], parent1[point2:]))
            child2 = np.concatenate((parent2[:point1], parent1[point1:point2], parent2[point2:]))
            return child1, child2
        else:
            return parent1, parent2

    def uniform_crossover(self, parent1, parent2):
        child1 = np.copy(parent1)
        child2 = np.copy(parent2)
        for i in range(self.num_objects):
            if np.random.rand() < 0.5: ## equal probability of being inherited by the offspring
                child1[i], child2[i] = child2[i], child1[i]
        return child1, child2

    def crossover(self, parent1, parent2):
        if np.random.rand() < self.crossover_rate:
            return self.crossover_methods[self.crossover_method](parent1, parent2)
        else:
            return parent1, parent2
    
    def mutate(self, individual):
        for i in range(len(individual)):
            if np.random.rand() < self.mutation_rate:
                individual[i] = 1 - individual[i]
        return individual

    def repair(self, individual):
        total_costs = np.sum(individual[:, np.newaxis] * self.costs, axis=0)
        if np.all(total_costs <= self.budgets):
            return individual
        
        sorted_indices = np.argsort(self.utilities / np.sum(self.costs, axis=1))[::-1]
        for idx in sorted_indices:
            if individual[idx] == 1 and any(total_costs > self.budgets):
                individual[idx] = 0
                total_costs -= self.costs[idx]
        
        for idx in sorted_indices:
            if individual[idx] == 0 and all(total_costs + self.costs[idx] <= self.budgets):
                individual[idx] = 1
                total_costs += self.costs[idx]

        return individual

    def run(self):
        best_solution = None
        best_fitness = 0
        
        for generation in range(self.generations):
            new_population = []
            selected_population = self.selection()
            
            for i in range(0, len(self.population), 2):
                parent1, parent2 = selected_population[i], selected_population[i+1]
                child1, child2 = self.crossover(parent1, parent2)
                new_population.append(self.mutate(self.repair(child1)))
                new_population.append(self.mutate(self.repair(child2)))
            
            self.population = np.array(new_population)
            current_best = max(self.population, key=self.fitness)
            current_fitness = self.fitness(current_best)
            
            if current_fitness > best_fitness:
                best_solution, best_fitness = current_best, current_fitness
            
            # print(f"Generation {generation+1}: Best Fitness = {best_fitness}")
        
        return best_solution, best_fitness

## Tests

The following ceode cell runs the genetic algorithm ten times with different random initialisations and stores the results in a dataframe

There are 3 possible initialisation techniques:

1. **'random'** 
Randomly generates a starting population of feasible individuals for the genetic algorithm to work with.

2. **'greedy'**
Initialize some individuals based on a greedy approach, where items with the highest utility-to-cost ratio are added until no more items can be added without violating the constraints.

3. **'mixed'** 
Combine random and greedy initialization to create a more diverse initial population.

There are 3 possible Crossover methods:

1. **'one_point'** 
one point is selected randomly on the parents, and the genes are swapped relatively to that poibt.

2. **'two_point'**
Two points are selected on the parents, and the genes between these points are swapped.

3. **'uniform'** 
Each gene from the parents has an equal probability of being inherited by the offspring.

### 1. Random 
#### a. one point crossover

In [9]:
num_runs = 10

def run_genetic_algorithm():
    num_objects = 10
    utilities = np.random.randint(1, 100, size=num_objects)
    costs = np.random.randint(1, 50, size=(num_objects, 3))
    budgets = np.random.randint(50, 150, size=3)
    ga = GeneticAlgorithmMDKP(num_objects, utilities, costs, budgets, init_method='random', crossover_method='one_point'
)
    best_solution, best_fitness = ga.run()
    return best_solution, best_fitness

results = []

for i in range(num_runs):
    best_solution, best_fitness = run_genetic_algorithm()
    
    results.append({'Best Solution': best_solution,
                    'Best Fitness': best_fitness})

random_results_df = pd.DataFrame(results)
random_results_df.head(10)

Unnamed: 0,Best Solution,Best Fitness
0,"[0, 1, 0, 0, 1, 1, 1, 1, 1, 0]",468
1,"[1, 1, 0, 1, 0, 1, 1, 0, 1, 1]",531
2,"[0, 1, 1, 1, 0, 1, 0, 1, 1, 0]",287
3,"[0, 1, 1, 0, 1, 0, 0, 1, 1, 0]",283
4,"[1, 1, 1, 1, 0, 1, 1, 1, 1, 0]",489
5,"[1, 0, 1, 1, 1, 1, 0, 0, 0, 1]",339
6,"[1, 1, 1, 0, 1, 1, 1, 1, 0, 0]",530
7,"[0, 1, 0, 1, 1, 0, 1, 1, 1, 1]",519
8,"[1, 0, 1, 1, 0, 0, 1, 0, 1, 0]",262
9,"[1, 0, 0, 1, 0, 1, 1, 0, 1, 0]",364


#### b. two point crossover

In [15]:
num_runs = 10

def run_genetic_algorithm():
    num_objects = 10
    utilities = np.random.randint(1, 100, size=num_objects)
    costs = np.random.randint(1, 50, size=(num_objects, 3))
    budgets = np.random.randint(50, 150, size=3)
    ga = GeneticAlgorithmMDKP(num_objects, utilities, costs, budgets, init_method='random', crossover_method='two_point'
)
    best_solution, best_fitness = ga.run()
    return best_solution, best_fitness

results = []

for i in range(num_runs):
    best_solution, best_fitness = run_genetic_algorithm()
    
    results.append({'Best Solution': best_solution,
                    'Best Fitness': best_fitness})

random_results_df = pd.DataFrame(results)
random_results_df.head(10)

Unnamed: 0,Best Solution,Best Fitness
0,"[1, 1, 0, 0, 1, 1, 0, 1, 0, 1]",412
1,"[0, 1, 1, 0, 0, 1, 1, 1, 1, 0]",386
2,"[1, 0, 0, 1, 1, 1, 0, 1, 1, 0]",406
3,"[0, 1, 0, 0, 1, 1, 1, 1, 1, 0]",384
4,"[1, 1, 0, 0, 0, 1, 1, 1, 1, 1]",524
5,"[1, 1, 0, 0, 1, 1, 0, 1, 1, 1]",386
6,"[1, 1, 0, 0, 0, 1, 0, 1, 1, 0]",319
7,"[0, 1, 0, 1, 0, 0, 1, 1, 0, 0]",274
8,"[0, 0, 1, 1, 1, 1, 1, 0, 1, 1]",500
9,"[0, 1, 0, 1, 1, 1, 0, 1, 1, 0]",399


#### c. Uniform crossover

In [16]:
num_runs = 10

def run_genetic_algorithm():
    num_objects = 10
    utilities = np.random.randint(1, 100, size=num_objects)
    costs = np.random.randint(1, 50, size=(num_objects, 3))
    budgets = np.random.randint(50, 150, size=3)
    ga = GeneticAlgorithmMDKP(num_objects, utilities, costs, budgets, init_method='random', crossover_method='uniform'
)
    best_solution, best_fitness = ga.run()
    return best_solution, best_fitness

results = []

for i in range(num_runs):
    best_solution, best_fitness = run_genetic_algorithm()
    
    results.append({'Best Solution': best_solution,
                    'Best Fitness': best_fitness})

random_results_df = pd.DataFrame(results)
random_results_df.head(10)

Unnamed: 0,Best Solution,Best Fitness
0,"[1, 0, 1, 0, 1, 1, 1, 0, 0, 0]",329
1,"[1, 0, 1, 1, 0, 1, 0, 1, 0, 0]",378
2,"[1, 1, 0, 0, 1, 0, 1, 0, 1, 1]",375
3,"[1, 1, 0, 0, 0, 0, 1, 1, 1, 1]",282
4,"[1, 0, 1, 1, 1, 0, 0, 1, 1, 0]",299
5,"[1, 1, 0, 0, 1, 0, 1, 1, 1, 1]",384
6,"[0, 0, 1, 1, 1, 0, 1, 0, 1, 1]",326
7,"[1, 0, 1, 1, 0, 0, 1, 1, 1, 0]",380
8,"[1, 0, 1, 0, 1, 0, 1, 1, 0, 1]",372
9,"[1, 0, 1, 0, 0, 1, 1, 1, 1, 1]",439


### 2. Greedy
#### a. one point crossover

In [17]:
num_runs = 10

def run_genetic_algorithm():
    num_objects = 10
    utilities = np.random.randint(1, 100, size=num_objects)
    costs = np.random.randint(1, 50, size=(num_objects, 3))
    budgets = np.random.randint(50, 150, size=3)
    ga = GeneticAlgorithmMDKP(num_objects, utilities, costs, budgets, init_method='greedy', crossover_method='one_point'
)
    best_solution, best_fitness = ga.run()
    return best_solution, best_fitness

results = []

for i in range(num_runs):
    best_solution, best_fitness = run_genetic_algorithm()
    
    results.append({'Best Solution': best_solution,
                    'Best Fitness': best_fitness})

greedy_results_df = pd.DataFrame(results)
greedy_results_df.head(10)

Unnamed: 0,Best Solution,Best Fitness
0,"[1, 1, 0, 1, 1, 1, 1, 1, 0, 0]",576
1,"[1, 0, 1, 1, 0, 0, 0, 1, 1, 1]",467
2,"[0, 1, 0, 1, 1, 0, 0, 1, 1, 1]",456
3,"[0, 1, 1, 1, 0, 1, 0, 1, 1, 0]",283
4,"[1, 1, 0, 0, 0, 1, 0, 1, 1, 1]",329
5,"[1, 0, 0, 0, 1, 0, 1, 1, 0, 1]",411
6,"[0, 1, 1, 0, 0, 0, 1, 1, 0, 1]",388
7,"[0, 1, 1, 1, 1, 0, 0, 1, 0, 0]",366
8,"[0, 0, 1, 0, 1, 1, 1, 1, 0, 1]",324
9,"[0, 0, 1, 0, 1, 1, 0, 0, 1, 0]",303


#### b. two point crossover

In [18]:
num_runs = 10

def run_genetic_algorithm():
    num_objects = 10
    utilities = np.random.randint(1, 100, size=num_objects)
    costs = np.random.randint(1, 50, size=(num_objects, 3))
    budgets = np.random.randint(50, 150, size=3)
    ga = GeneticAlgorithmMDKP(num_objects, utilities, costs, budgets, init_method='greedy', crossover_method='two_point'
)
    best_solution, best_fitness = ga.run()
    return best_solution, best_fitness

results = []

for i in range(num_runs):
    best_solution, best_fitness = run_genetic_algorithm()
    
    results.append({'Best Solution': best_solution,
                    'Best Fitness': best_fitness})

greedy_results_df = pd.DataFrame(results)
greedy_results_df.head(10)

Unnamed: 0,Best Solution,Best Fitness
0,"[0, 1, 1, 0, 0, 1, 0, 1, 1, 0]",365
1,"[1, 1, 0, 1, 0, 1, 0, 1, 1, 1]",470
2,"[1, 0, 0, 1, 1, 0, 0, 1, 1, 1]",321
3,"[1, 1, 1, 0, 1, 0, 1, 0, 1, 0]",318
4,"[1, 1, 1, 1, 0, 1, 0, 0, 0, 1]",450
5,"[0, 1, 1, 0, 0, 0, 1, 0, 1, 1]",378
6,"[0, 1, 0, 1, 1, 0, 1, 1, 1, 1]",431
7,"[0, 1, 1, 1, 0, 1, 1, 1, 1, 1]",358
8,"[1, 1, 1, 1, 0, 1, 1, 0, 0, 0]",363
9,"[0, 1, 0, 1, 0, 0, 1, 0, 1, 1]",295


#### c. Uniform crossover

In [19]:
num_runs = 10

def run_genetic_algorithm():
    num_objects = 10
    utilities = np.random.randint(1, 100, size=num_objects)
    costs = np.random.randint(1, 50, size=(num_objects, 3))
    budgets = np.random.randint(50, 150, size=3)
    ga = GeneticAlgorithmMDKP(num_objects, utilities, costs, budgets, init_method='greedy', crossover_method='uniform'
)
    best_solution, best_fitness = ga.run()
    return best_solution, best_fitness

results = []

for i in range(num_runs):
    best_solution, best_fitness = run_genetic_algorithm()
    
    results.append({'Best Solution': best_solution,
                    'Best Fitness': best_fitness})

greedy_results_df_results_df = pd.DataFrame(results)
greedy_results_df.head(10)

Unnamed: 0,Best Solution,Best Fitness
0,"[0, 1, 1, 0, 0, 1, 0, 1, 1, 0]",365
1,"[1, 1, 0, 1, 0, 1, 0, 1, 1, 1]",470
2,"[1, 0, 0, 1, 1, 0, 0, 1, 1, 1]",321
3,"[1, 1, 1, 0, 1, 0, 1, 0, 1, 0]",318
4,"[1, 1, 1, 1, 0, 1, 0, 0, 0, 1]",450
5,"[0, 1, 1, 0, 0, 0, 1, 0, 1, 1]",378
6,"[0, 1, 0, 1, 1, 0, 1, 1, 1, 1]",431
7,"[0, 1, 1, 1, 0, 1, 1, 1, 1, 1]",358
8,"[1, 1, 1, 1, 0, 1, 1, 0, 0, 0]",363
9,"[0, 1, 0, 1, 0, 0, 1, 0, 1, 1]",295


### 3. Mixed 
#### a. one point crossover

In [20]:
num_runs = 10

def run_genetic_algorithm():
    num_objects = 10
    utilities = np.random.randint(1, 100, size=num_objects)
    costs = np.random.randint(1, 50, size=(num_objects, 3))
    budgets = np.random.randint(50, 150, size=3)
    ga = GeneticAlgorithmMDKP(num_objects, utilities, costs, budgets, init_method='mixed', crossover_method='one_point'
)
    best_solution, best_fitness = ga.run()
    return best_solution, best_fitness

results = []

for i in range(num_runs):
    best_solution, best_fitness = run_genetic_algorithm()
    
    results.append({'Best Solution': best_solution,
                    'Best Fitness': best_fitness})

mixed_results_df = pd.DataFrame(results)
mixed_results_df.head(10)

Unnamed: 0,Best Solution,Best Fitness
0,"[0, 0, 0, 1, 1, 1, 1, 0, 1, 1]",342
1,"[1, 1, 1, 0, 1, 1, 0, 0, 1, 1]",473
2,"[0, 1, 1, 1, 1, 0, 1, 0, 1, 1]",543
3,"[1, 1, 1, 1, 1, 0, 0, 0, 1, 0]",307
4,"[1, 1, 0, 1, 1, 1, 1, 0, 1, 0]",420
5,"[0, 1, 1, 1, 0, 1, 1, 1, 0, 1]",418
6,"[0, 1, 0, 1, 1, 1, 1, 0, 1, 1]",461
7,"[1, 1, 1, 1, 1, 0, 0, 0, 1, 0]",479
8,"[1, 0, 1, 1, 0, 1, 0, 0, 1, 1]",330
9,"[0, 0, 0, 1, 1, 1, 1, 1, 1, 1]",451


#### b. two point crossover

In [21]:
num_runs = 10

def run_genetic_algorithm():
    num_objects = 10
    utilities = np.random.randint(1, 100, size=num_objects)
    costs = np.random.randint(1, 50, size=(num_objects, 3))
    budgets = np.random.randint(50, 150, size=3)
    ga = GeneticAlgorithmMDKP(num_objects, utilities, costs, budgets, init_method='mixed', crossover_method='two_point'
)
    best_solution, best_fitness = ga.run()
    return best_solution, best_fitness

results = []

for i in range(num_runs):
    best_solution, best_fitness = run_genetic_algorithm()
    
    results.append({'Best Solution': best_solution,
                    'Best Fitness': best_fitness})

mixed_results_df = pd.DataFrame(results)
mixed_results_df.head(10)

Unnamed: 0,Best Solution,Best Fitness
0,"[1, 0, 1, 1, 0, 1, 1, 0, 1, 0]",395
1,"[1, 1, 1, 0, 1, 1, 1, 0, 0, 1]",348
2,"[0, 1, 1, 0, 0, 0, 1, 1, 1, 1]",333
3,"[1, 1, 1, 1, 0, 0, 0, 1, 1, 0]",442
4,"[1, 1, 0, 0, 1, 0, 0, 1, 1, 0]",320
5,"[0, 1, 1, 1, 0, 1, 0, 1, 0, 1]",223
6,"[1, 0, 1, 0, 0, 1, 1, 1, 1, 1]",353
7,"[0, 0, 0, 1, 1, 1, 0, 1, 1, 1]",391
8,"[0, 1, 1, 1, 1, 1, 0, 0, 0, 1]",329
9,"[0, 0, 1, 1, 1, 1, 0, 1, 1, 0]",458


#### c. Uniform crossover

In [22]:
num_runs = 10

def run_genetic_algorithm():
    num_objects = 10
    utilities = np.random.randint(1, 100, size=num_objects)
    costs = np.random.randint(1, 50, size=(num_objects, 3))
    budgets = np.random.randint(50, 150, size=3)
    ga = GeneticAlgorithmMDKP(num_objects, utilities, costs, budgets, init_method='mixed', crossover_method='uniform'
)
    best_solution, best_fitness = ga.run()
    return best_solution, best_fitness

results = []

for i in range(num_runs):
    best_solution, best_fitness = run_genetic_algorithm()
    
    results.append({'Best Solution': best_solution,
                    'Best Fitness': best_fitness})

mixed_results_df = pd.DataFrame(results)
mixed_results_df.head(10)

Unnamed: 0,Best Solution,Best Fitness
0,"[1, 1, 1, 0, 1, 0, 1, 1, 0, 1]",414
1,"[1, 0, 1, 1, 1, 1, 1, 1, 1, 1]",457
2,"[1, 1, 0, 1, 1, 1, 1, 1, 1, 0]",396
3,"[1, 0, 0, 0, 1, 1, 0, 1, 1, 1]",418
4,"[1, 0, 0, 1, 1, 1, 0, 1, 0, 1]",434
5,"[1, 0, 0, 1, 1, 1, 1, 1, 0, 0]",442
6,"[1, 1, 1, 1, 0, 0, 1, 1, 0, 1]",344
7,"[1, 1, 1, 0, 1, 0, 0, 1, 0, 1]",384
8,"[0, 0, 1, 0, 1, 1, 1, 1, 1, 0]",449
9,"[1, 0, 1, 0, 1, 1, 1, 1, 0, 0]",325


## Reference

```
@article{chu1998genetic,
  title={A genetic algorithm for the multidimensional knapsack problem},
  author={Chu, Paul C and Beasley, John E},
  journal={Journal of heuristics},
  volume={4},
  pages={63--86},
  year={1998},
  publisher={Springer}
}
```