In [None]:
# Julia Yaklin

import numpy as np
import math
import matplotlib.pyplot as plt

# Load data from npz file
my_data = np.load('A.npz')

bag_capacity = my_data['capacity']
n_items = my_data['n_items']
item_values = my_data['item_values']    
item_weights = my_data['item_weights']

SEED=124
rng = np.random.default_rng(SEED)

# initialize population (how is knapsack represented?)
# members of population are arrays of indices of items in the bag
POP_SIZE = 12
population = np.zeros(shape=(POP_SIZE, bag_capacity), dtype=int)
for i in range(POP_SIZE):
    population[i] = rng.choice(n_items, size=bag_capacity, replace=False)

def fitness(population):
    ''' fitness assignment (does not penalize overweight bags) '''
    fitness = np.zeros(population.shape[0])
    for i, member in enumerate(population):
        fitness[i] = np.sum(item_values[member])
    return fitness

def weight(population):
    ''' returns total weight of each member bag '''
    weight = np.zeros(POP_SIZE)
    for i, member in enumerate(population):
        weight[i] = np.sum(item_weights[member])
    return weight

N_MATING = 5

def select_deterministic(population, pop_fitness):
    ''' return top N_MATING most fit members of population to mate and evaluate their fitness'''
    rank_indices = np.argsort(pop_fitness)
    parents = population[rank_indices][::-1][:N_MATING]
    return (parents, fitness(parents))

N_OFFSPRING = math.comb(N_MATING, 2) // 2  # number of unique pairs of parents
CROSSOVER_INDEX = bag_capacity // 2 # uniform crossover (always select same index)

def recombination(parents):
    ''' return offspring created by splitting parents at single index and recombining pairs.
        duplicates will be replaced by random indices later, so we allow them for now. '''
    offspring = np.empty(size=(N_OFFSPRING, bag_capacity))
    for i in range(N_OFFSPRING):
        parent_indices = rng.choice(range(N_MATING), size=2, replace=False) # parents never mate with themselves, so more solutions tested in same time
        parent1 = parents[parent_indices[0], :CROSSOVER_INDEX]
        parent2 = parents[parent_indices[1], CROSSOVER_INDEX:]
        
        offspring[i] = np.concatenate([parent1, parent2])
    return (offspring, fitness(offspring))

PM = 0.1  # mutation rate parameter
def mutation(offspring):
    ''' return mutated offspring and their fitness.
        randomly change indices in offspring with probability PM.
        duplicates will be replaced by random indices later, so we allow them for now. '''
    mutated_offspring = offspring.copy()
    
    mask = rng.uniform(size=(N_OFFSPRING, bag_capacity)) < PM
    drawn_indices = rng.integers(n_items, size=(N_OFFSPRING, bag_capacity))
    mutated_offspring[mask] = drawn_indices[mask]
    
    return (mutated_offspring, fitness(mutated_offspring))

def replace_duplicates(offspring):
    ''' replace duplicate indices in offspring with random indices.
        duplicates might have been introduced during either recombination or mutation.
        if duplicates were not removed, solution would be a single item with duplicates filling the bag. '''
    modified_offspring = np.zeros(shape=(N_OFFSPRING, bag_capacity))
    
    for i, member in enumerate(offspring):
        unique_indices = np.unique(member)
        if len(unique_indices) != bag_capacity:
            new_indices_pool = np.setdiff1d(np.arange(n_items), unique_indices)
            new_indices = rng.choice(new_indices_pool, size=(bag_capacity - len(unique_indices)), replace=False)
            member = np.concatenate((unique_indices, new_indices))
        modified_offspring[i] = member
    
    return modified_offspring

def hard_constraint(total_population):
    ''' scans the array for individual violating constraint bounds and chucks them out of processing.
        total_population includes all vectors (parents + mutated offsprings). '''
    modified_population = total_population.copy()
    
    mask = np.abs(modified_population).max(axis=1) < 4
    modified_population = modified_population[mask]
    
    return modified_population


    # for i, member in enumerate(offspring):
    #     unique, counts = np.unique(member, return_counts=True)
    #     duplicates = unique[counts > 1]
    #     for dup in duplicates:
    #         dup_indices = np.where(member == dup)[0]
    #         for idx in dup_indices[1:]:  # keep the first occurrence
    #             new_index = rng.choice([x for x in range(n_items) if x not in member])
    #             member[idx] = new_index
    #     offspring[i] = member
    # return offspring

test = rng.integers(n_items, size=(N_OFFSPRING, bag_capacity))
test_removed = remove_duplicates(test)
print(test)
print(test_removed)

# fitness(population)
# print(population)
# print(fitness(population))
# print(select_deterministic(population, fitness(population)))

(array([[10, 46, 45, 14,  4, 22, 33, 18,  9, 49, 27,  2, 28, 39, 16,  7,
        13, 20, 47, 32],
       [39, 25, 45, 21,  4, 37, 30, 42, 22, 34,  7, 27, 26, 28, 43, 17,
        32, 15,  3, 41],
       [11, 15,  0, 19, 17, 41, 30, 47, 44, 22, 34,  7, 27,  2, 38, 42,
         3, 23,  4,  1],
       [ 8, 17, 42, 26,  6, 39, 43, 15,  1, 10, 16, 34, 48, 12, 31, 22,
        21, 29, 33, 20],
       [19, 15, 18, 46, 31, 40,  9, 39, 25, 36, 20, 28, 11, 16,  5,  8,
        45, 47, 27, 49]]), array([112., 112., 107., 107., 103.]))
