Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# LAB9

Write a local-search algorithm (eg. an EA) able to solve the *Problem* instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.

### Deadlines:

* Submission: Sunday, December 3 ([CET](https://www.timeanddate.com/time/zones/cet))
* Reviews: Sunday, December 10 ([CET](https://www.timeanddate.com/time/zones/cet))

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [36]:
import random
from random import choices
from copy import copy
import numpy as np
import lab9_lib
from tqdm.notebook import tqdm

# Local Search:
Local search is a heuristic method for solving computationally hard optimization problems. Local search can be used on problems that can be formulated as finding a solution maximizing a criterion among a number of candidate solutions. Local search algorithms move from solution to solution in the space of candidate solutions (the search space) by applying local changes, until a solution deemed optimal is found or a time bound is elapsed.

In [37]:
l = 1000
problems = [1, 2, 5, 10]
half_pop_size = 5
µ = 2 * half_pop_size

## Implementation:
Idea: with the fitness function I can see which pieces of string are important and which are not, and therefore I can preserve the important pieces and throw away the useless ones.

IMPORTANT: PROMOTE DIVERSITY (I can do it in the selection, crossover and mutation)
* distance metric: 
  - how far the individual is from a subset of the population to the whole population
  - from a single individual
* property of the population

3 levels of diversity:
* phenotype
* genotype
* fitness

In [38]:
# invece che fare find distribution potrei fare direttamente una funzione che mi prende inf e sup di ogni serie consecutiva di 1
# oppure fare direttamente l'and bit a bit e contare il numero di 1
# L'and conta solo quali 1 sono in comune, ma non considera la similarità con gli 0
# quindi per vedere se due vettori sono simili devo fare uno xor, perché se i due elementi sono diversi, allora il risultato è 1, altrimenti 0
# poi conto gli 1 che indicano che due elem sono diversi

# Evaluate diversity (how many different elements we have in the genome)
def compute_diversity(e1, e2):
    xor_res = e1 ^ e2
    return np.sum(xor_res) / l


In [39]:
def init_population():
    return np.array([(np.array(choices([0, 1], k=l)), 0.0) for _ in range(µ)], dtype=object)

def evaluate_population(population, fitness):
    return np.array([(individual[0], fitness(individual[0])) for individual in population], dtype=object)

def select_with_replacement(population):
    # select a random individual from the population
    # find the individual with 
    # the highest fitness, 
    # the vector with the most distinct distribution of ones compared to the others
    div_matr = np.zeros((µ, µ))
    for i1, p1 in enumerate(population):
        for i2, p2 in enumerate(population):
            if i1 != i2:
                #the matrix is not symmetric since is added only for p2[1]
                div_matr[i1][i2] = (compute_diversity(p1[0], p2[0]) + p2[1]) / 2

    # find index of the individuals with highest fitness and highest diversity
    i1, i2 = np.unravel_index(np.argmax(div_matr), div_matr.shape)

    return population[i1], population[i2]

def crossover_cyclic_shift(parent1, parent2):
    # a two (rand) point crossover for now
    # swapping of two substrings of the same len but in random position
    # I want try to implement a circular translated swapping
    v = parent1[0]
    w = parent2[0]
    c = random.randint(0, l)
    d = random.randint(0, l)
    s = random.randint(0, l)
    if c < d:
        for i in range(c, d):
            v[i%l], w[(i+s)%l] = w[(i+s)%l], v[i%l]
    else:
        for i in range(c, d+l):
            v[i%l], w[(i+s)%l] = w[(i+s)%l], v[i%l]
    return (v, 0.0), (w, 0.0)

def std_crossover(parent1, parent2):
    # a two (rand) point crossover for now
    # swapping of two substrings of the same len but in random position
    # I want try to implement a circular translated swapping
    v = parent1[0]
    w = parent2[0]
    c = random.randint(0, l)
    d = random.randint(0, l)
    if c > d:
        c, d = d, c
    if c!=d:
        v[c:d], w[c:d] = w[c:d], v[c:d]
    return (v, 0.0), (w, 0.0)

def mutate(individual, mutation_prob=0.5):
    # bit flip mutation for now
    v = individual[0]
    for i in range(l):
        if mutation_prob >= random.random():
            v[i] = 1 - v[i]
    return individual

In [40]:
def genetic_algorithm(fitness, crossover):
    Best = None
    # 1. Initialize population
    population = init_population()
    population = evaluate_population(population, fitness)
    # 2. Repeat
    found = -1
    x = -1
    for i in tqdm(range(100)):
        for p in population:
            if Best is None or p[1] > Best[1]:
                Best = p
                found = fitness._calls
                x = i
        
        if Best is not None and Best[1]==1:
            break
        
        q = list()
        for _ in range(µ//2):
            # 2.1 Select parents
            parent_a, parent_b = select_with_replacement(population)
            # 2.2 Crossover
            child_a, child_b = crossover(copy(parent_a), copy(parent_b))
            
            # 2.3 Mutate
            mutated_a = mutate(child_a)
            mutated_b = mutate(child_b)
            q.append(mutated_a)
            q.append(mutated_b)
            
        population = evaluate_population(q, fitness)
    
    # 4. Return best individual
    return Best, found, x

In [41]:
for _ in range(3):
    my_list = list()
    found = -1
    fitness = None
    for prob in problems:
        fitness = lab9_lib.make_problem(prob)
        b, found, i = genetic_algorithm(fitness, crossover_cyclic_shift)
        my_list.append((prob, b[1], fitness.calls))
    for m in my_list:
        print(f"Problem\t{m[0]}:\t{m[1]:.2%},\tCalls:\t{m[2]},\tBest found at \t{found} fitness calls,\tat iteration {i}")
    print("-------------------------------------")

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

Problem	1:	54.20%,	Calls:	1010,	Best found at 	30 fitness calls,	at iteration 2
Problem	2:	52.20%,	Calls:	1010,	Best found at 	30 fitness calls,	at iteration 2
Problem	5:	30.70%,	Calls:	1010,	Best found at 	30 fitness calls,	at iteration 2
Problem	10:	21.02%,	Calls:	1010,	Best found at 	30 fitness calls,	at iteration 2
-------------------------------------


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

Problem	1:	54.70%,	Calls:	1010,	Best found at 	110 fitness calls,	at iteration 10
Problem	2:	51.00%,	Calls:	1010,	Best found at 	110 fitness calls,	at iteration 10
Problem	5:	31.03%,	Calls:	1010,	Best found at 	110 fitness calls,	at iteration 10
Problem	10:	16.50%,	Calls:	1010,	Best found at 	110 fitness calls,	at iteration 10
-------------------------------------


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

Problem	1:	54.60%,	Calls:	1010,	Best found at 	650 fitness calls,	at iteration 64
Problem	2:	52.20%,	Calls:	1010,	Best found at 	650 fitness calls,	at iteration 64
Problem	5:	30.12%,	Calls:	1010,	Best found at 	650 fitness calls,	at iteration 64
Problem	10:	17.13%,	Calls:	1010,	Best found at 	650 fitness calls,	at iteration 64
-------------------------------------


In [42]:
for _ in range(3):
    my_list = list()
    found = -1
    fitness = None
    for prob in problems:
        fitness = lab9_lib.make_problem(prob)
        b, found, i = genetic_algorithm(fitness, std_crossover)
        my_list.append((prob, b[1], fitness.calls))
    for m in my_list:
        print(f"Problem\t{m[0]}:\t{m[1]:.2%},\tCalls:\t{m[2]},\tBest found at \t{found} fitness calls,\tat iteration {i}")
    print("-------------------------------------")

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

Problem	1:	54.20%,	Calls:	1010,	Best found at 	850 fitness calls,	at iteration 84
Problem	2:	52.00%,	Calls:	1010,	Best found at 	850 fitness calls,	at iteration 84
Problem	5:	31.84%,	Calls:	1010,	Best found at 	850 fitness calls,	at iteration 84
Problem	10:	16.21%,	Calls:	1010,	Best found at 	850 fitness calls,	at iteration 84
-------------------------------------


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

Problem	1:	55.30%,	Calls:	1010,	Best found at 	450 fitness calls,	at iteration 44
Problem	2:	52.40%,	Calls:	1010,	Best found at 	450 fitness calls,	at iteration 44
Problem	5:	28.64%,	Calls:	1010,	Best found at 	450 fitness calls,	at iteration 44
Problem	10:	15.91%,	Calls:	1010,	Best found at 	450 fitness calls,	at iteration 44
-------------------------------------


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

Problem	1:	54.00%,	Calls:	1010,	Best found at 	590 fitness calls,	at iteration 58
Problem	2:	51.60%,	Calls:	1010,	Best found at 	590 fitness calls,	at iteration 58
Problem	5:	21.02%,	Calls:	1010,	Best found at 	590 fitness calls,	at iteration 58
Problem	10:	15.91%,	Calls:	1010,	Best found at 	590 fitness calls,	at iteration 58
-------------------------------------


In [43]:
for i, prob in enumerate(problems):
    print(f"Problem {prob}:")

    fitness = lab9_lib.make_problem(prob)
    for n in range(10):
        ind = choices([0, 1], k=1000)
        print(f"{''.join(str(g) for g in ind[0:5])}... | {fitness(ind):.2%}")

    print(fitness.calls)

    print("-------------------------------------")


Problem 1:
01000... | 52.80%
11111... | 50.40%
01101... | 50.40%
10001... | 49.10%
10110... | 51.40%
01010... | 50.40%
00001... | 49.10%
10101... | 50.20%
10110... | 50.40%
11010... | 51.70%
10
-------------------------------------
Problem 2:
00001... | 23.82%
10001... | 22.99%
00110... | 23.59%
01000... | 24.10%
00101... | 22.33%
01111... | 24.24%
11100... | 24.45%
11001... | 22.30%
10100... | 22.66%
11011... | 22.61%
10
-------------------------------------
Problem 5:
11001... | 10.56%
00111... | 9.74%
11001... | 9.38%
00011... | 9.55%
01000... | 19.70%
11111... | 9.76%
11010... | 9.56%
00111... | 10.73%
10011... | 9.66%
01001... | 9.62%
10
-------------------------------------
Problem 10:
00110... | 5.08%
10100... | 5.19%
11110... | 10.22%
10111... | 10.40%
00111... | 5.54%
00111... | 5.10%
00011... | 4.82%
00100... | 5.12%
01110... | 5.49%
10100... | 5.48%
10
-------------------------------------


In [44]:
fitness = lab9_lib.make_problem(1)
for n in range(10):
    ind = choices([0, 1], k=50)
    print(f"{''.join(str(g) for g in ind)}: {fitness(ind):.2%}")

print(fitness.calls)

10001001111111110110111000111000011010011100100111: 58.00%
10100101011111000100011111010011011000011010111110: 56.00%
00000101111101110011011011101101010000011111011010: 56.00%
00100000101110110011100000111001100110000001000100: 38.00%
10110010011100101001100011110101111110110011100110: 58.00%
11010001111110011110010001010111110010011110101110: 60.00%
00001111101101001101111101100110111011010001011000: 56.00%
10010100001111011011111011111101011011111000010101: 62.00%
10101100000011110011100000000101111111001101001011: 50.00%
11001001000101001010100110001100001100110010110101: 44.00%
10
