Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# LAB9

Write a local-search algorithm (eg. an EA) able to solve the *Problem* instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.

### Deadlines:

* Submission: Sunday, December 3 ([CET](https://www.timeanddate.com/time/zones/cet))
* Reviews: Sunday, December 10 ([CET](https://www.timeanddate.com/time/zones/cet))

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [310]:
import random
from random import choices
from copy import copy
import lab9_lib

# Local Search:
Local search is a heuristic method for solving computationally hard optimization problems. Local search can be used on problems that can be formulated as finding a solution maximizing a criterion among a number of candidate solutions. Local search algorithms move from solution to solution in the space of candidate solutions (the search space) by applying local changes, until a solution deemed optimal is found or a time bound is elapsed.

In [311]:
l = 1000
problems = [1, 2, 5, 10]
half_pop_size = 5
µ = 2 * half_pop_size

## Implementation:
Idea: with the fitness function I can see which pieces of string are important and which are not, and therefore I can preserve the important pieces and throw away the useless ones.

IMPORTANT: PROMOTE DIVERSITY (I can do it in the selection, crossover and mutation)
* distance metric: 
  - how far the individual is from a subset of the population to the whole population
  - from a single individual
* property of the population

3 levels of diversity:
* phenotype
* genotype
* fitness

In [317]:
#invece che fare find distribution potrei fare direttamente una funzione che mi prende inf e sup di ogni serie consecutiva di 1
def find_distribution(individual):
    vars = []
    means = []
    
    curr_var = 0
    curr_mean = None

    for i, val in enumerate(individual):
        if val == 1:
            curr_var += 1
            if curr_mean is None:
                curr_mean = i
        else:
            if curr_var > 0:
                vars.append(curr_var)
                means.append(curr_mean + curr_var // 2)

                curr_var = 0
                curr_mean = None

    # Add the last interval if terminates with 1
    if curr_var > 0:
        vars.append(curr_var)
        means.append(curr_mean + curr_var // 2)

    return list(zip(vars, means))

def calculate_overlapping(m1, v1, m2, v2):
    inf_1 = m1 - v1 / 2
    sup_1 = m1 + v1 / 2
    inf_2 = m2 - v2 / 2
    sup_2 = m2 + v2 / 2

    overlapping = min(sup_1, sup_2) - max(inf_1, inf_2)

    if overlapping > 0:
        return overlapping
    else:
        return 0

# Evaluate diversity
def evaluate_diversity(e1, e2):
    diversity = 0
    for m1, v1 in e1:
        for m2, v2 in e2:
            diversity += calculate_overlapping(m1, v1, m2, v2)
    return diversity
    
                

In [312]:
def init_population():
    return [(choices([0, 1], k=l), 0.0) for _ in range(µ)]

def evaluate_population(population, fitness):
    return [(individual[0], fitness(individual[0])) for individual in population]

def select_with_replacement(population):
    # select a random individual from the population
    # find the individual with 
    # the highest fitness, 
    # the vector with the most distinct distribution of ones compared to the others
    pop_distr = [find_distribution(individual[0]) for individual in population]
    div_matr = [[0 for _ in range(len(population))] for _ in range(len(population))]
    for i1, p1 in enumerate(pop_distr):
        for i2, p2 in enumerate(pop_distr):
            if i1 != i2:
                div_matr[i1][i2] = evaluate_diversity(p1[0], p2[0])
    # find indexes of 2 individuals with highest fitness and highest diversity
    

    return random.choice(population)

def crossover(parent1, parent2):
    # a two (rand) point crossover for now
    # provare a fare lo swapping di due sottostringhe di stessa lunghezza ma posizione casuale invece che stessa posizione
    v = parent1[0]
    w = parent2[0]
    c = random.randint(0, l)
    d = random.randint(0, l)
    if c > d:
        c, d = d, c
    if c != d:
        v[c:d], w[c:d] = w[c:d], v[c:d]
    return (v, 0.0), (w, 0.0)

def mutate(individual):
    # bit flip mutation for now
    p = 0.5
    v = individual[0]
    for i in range(l):
        if p >= random.random():
            v[i] = 1 - v[i]
    return individual


In [None]:
def genetic_algorithm(fitness):
    Best = None
    # 1. Initialize population
    population = init_population()
    population = evaluate_population(population, fitness)
    # 2. Repeat
    for i in range(100):
        for p in population:
            if Best is None or p[1] > Best[1]:
                Best = p
        
        if Best is not None and Best[1]==1:
            break
        
        q = list()
        for _ in range(µ//2):
            # 2.1 Select parents
            parent_a = select_with_replacement(population)
            parent_b = select_with_replacement(population)
            # 2.2 Crossover
            child_a, child_b = crossover(copy(parent_a), copy(parent_b))
            
            # 2.3 Mutate
            mutated_a = mutate(child_a)
            mutated_b = mutate(child_b)
            q.append(mutated_a)
            q.append(mutated_b)
            
        population = evaluate_population(q, fitness)
    
    # 4. Return best individual
    return Best

In [None]:
for _ in range(10):
    my_list = list()

    for prob in problems:
        fitness = lab9_lib.make_problem(prob)
        b = genetic_algorithm(fitness)
        my_list.append((prob, b[1], fitness.calls))

    for m in my_list:
        print(f"Problem\t{m[0]}:\t{m[1]:.2%},\tCalls:\t{m[2]}")
    print("-------------------------------------")

Problem	1:	54.40%,	Calls:	1010
Problem	2:	50.80%,	Calls:	1010
Problem	5:	30.97%,	Calls:	1010
Problem	10:	15.90%,	Calls:	1010
-------------------------------------
Problem	1:	56.30%,	Calls:	1010
Problem	2:	48.80%,	Calls:	1010
Problem	5:	20.86%,	Calls:	1010
Problem	10:	20.24%,	Calls:	1010
-------------------------------------
Problem	1:	54.80%,	Calls:	1010
Problem	2:	51.00%,	Calls:	1010
Problem	5:	30.97%,	Calls:	1010
Problem	10:	15.63%,	Calls:	1010
-------------------------------------
Problem	1:	54.80%,	Calls:	1010
Problem	2:	50.80%,	Calls:	1010
Problem	5:	21.10%,	Calls:	1010
Problem	10:	20.62%,	Calls:	1010
-------------------------------------
Problem	1:	53.90%,	Calls:	1010
Problem	2:	52.20%,	Calls:	1010
Problem	5:	29.49%,	Calls:	1010
Problem	10:	16.49%,	Calls:	1010
-------------------------------------
Problem	1:	53.70%,	Calls:	1010
Problem	2:	50.40%,	Calls:	1010
Problem	5:	21.28%,	Calls:	1010
Problem	10:	11.38%,	Calls:	1010
-------------------------------------
Problem	1:	54.30%,	Cal

In [None]:
fitness = lab9_lib.make_problem(1)
for n in range(10):
    ind = choices([0, 1], k=1000)
    print(f"{''.join(str(g) for g in ind[0:5])}...: {fitness(ind):.2%}")

print(fitness.calls)

10001...: 47.80%
00001...: 48.00%
10010...: 50.00%
01101...: 50.40%
01010...: 49.00%
01101...: 49.70%
10011...: 50.60%
11111...: 50.80%
10100...: 49.60%
00001...: 49.40%
10


In [None]:
fitness = lab9_lib.make_problem(1)
for n in range(10):
    ind = choices([0, 1], k=50)
    print(f"{''.join(str(g) for g in ind)}: {fitness(ind):.2%}")

print(fitness.calls)

10010001000001100100010010101101010101001110011000: 40.00%
10100101010000001111110110110010001001000111111010: 50.00%
00010011011110010111010000000010100010110100000001: 38.00%
10000000011101001000000111000110001110010111110001: 42.00%
11001101010110001111000100111001011111000000100110: 50.00%
00010101011110110110111111100000101001110010000111: 54.00%
01001001000001001101100001011100000010001001101011: 38.00%
01000101001011000101111000010101110111110101001101: 52.00%
00100101011001111110010111001010101110010110110011: 56.00%
00010001001000011110110111001000101010011100101000: 42.00%
10
