In [140]:
import logging
from itertools import combinations
import pandas as pd
import numpy as np
from geopy.distance import geodesic
import random


logging.basicConfig(level=logging.DEBUG)

In [185]:
CITIES = pd.read_csv('cities/italy.csv', header=None, names=['name', 'lat', 'lon'])
DIST_MATRIX = np.zeros((len(CITIES), len(CITIES)))
for c1, c2 in combinations(CITIES.itertuples(), 2):
    DIST_MATRIX[c1.Index, c2.Index] = DIST_MATRIX[c2.Index, c1.Index] = geodesic(
        (c1.lat, c1.lon), (c2.lat, c2.lon)
    ).km
CITIES.head()

len(CITIES)

46

## Lab2 - TSP

https://www.wolframcloud.com/obj/giovanni.squillero/Published/Lab2-tsp.nb

In [186]:
dist_matrix = np.zeros((len(CITIES), len(CITIES)))
for c1, c2 in combinations(CITIES.itertuples(), 2):
    dist_matrix[c1.Index, c2.Index] = dist_matrix[c2.Index, c1.Index] = geodesic(
        (c1.lat, c1.lon), (c2.lat, c2.lon)
    ).km

# i do dist_matrix[c1.Index, c2.Index] = dist_matrix[c2.Index, c1.Index] because combinations return only distinct pairs

In [187]:
def tsp_cost(tsp):
    assert tsp[0] == tsp[-1]
    assert set(tsp) == set(range(len(CITIES)))

    tot_cost = 0
    for c1, c2 in zip(tsp, tsp[1:]):
        tot_cost += DIST_MATRIX[c1, c2]
    return tot_cost

## First Greedy Algorithm

Greedy means that we always choose the best option at each step, don't look far away in the future. 
*I am in a city and I go to the nearest city.*

In [188]:
visited = np.full(len(CITIES), False)
dist = DIST_MATRIX.copy()
city = 0
visited[city] = True
tsp = list()
tsp.append(int(city))
while not np.all(visited):
    dist[:, city] = np.inf
    closest = np.argmin(dist[city])
    logging.debug(
        f"step: {CITIES.at[city,'name']} -> {CITIES.at[closest,'name']} ({DIST_MATRIX[city,closest]:.2f}km)"
    )
    visited[closest] = True
    city = closest
    tsp.append(int(city))

logging.debug(
    f"step: {CITIES.at[tsp[-1],'name']} -> {CITIES.at[tsp[0],'name']} ({DIST_MATRIX[tsp[-1],tsp[0]]:.2f}km)"
)
tsp.append(tsp[0])


logging.info(f"result: Found a path of {len(tsp)-1} steps, total length {tsp_cost(tsp):.2f}km")

DEBUG:root:step: Ancona -> Rimini (90.60km)
DEBUG:root:step: Rimini -> Forlì (46.72km)
DEBUG:root:step: Forlì -> Ravenna (26.46km)
DEBUG:root:step: Ravenna -> Ferrara (66.67km)
DEBUG:root:step: Ferrara -> Bologna (43.43km)
DEBUG:root:step: Bologna -> Modena (37.29km)
DEBUG:root:step: Modena -> Reggio nell'Emilia (23.94km)
DEBUG:root:step: Reggio nell'Emilia -> Parma (26.94km)
DEBUG:root:step: Parma -> Piacenza (57.65km)
DEBUG:root:step: Piacenza -> Milan (60.65km)
DEBUG:root:step: Milan -> Monza (14.51km)
DEBUG:root:step: Monza -> Bergamo (33.92km)
DEBUG:root:step: Bergamo -> Brescia (46.02km)
DEBUG:root:step: Brescia -> Verona (61.42km)
DEBUG:root:step: Verona -> Vicenza (44.70km)
DEBUG:root:step: Vicenza -> Padua (30.13km)
DEBUG:root:step: Padua -> Venice (36.07km)
DEBUG:root:step: Venice -> Trieste (115.09km)
DEBUG:root:step: Trieste -> Bolzano (209.68km)
DEBUG:root:step: Bolzano -> Trento (49.94km)
DEBUG:root:step: Trento -> Novara (206.69km)
DEBUG:root:step: Novara -> Turin (84.46

## Genetic Algorithm
- steady state: offsprings compete with the parents
- parent selection: we try different methods
- mutation: we try different methods (scramble, inversion)
- crossover: we try different methods (edge recombination, inver over)

Fitness is defined as the inverse of the sum of the distance between adjacent cities and the distance between the first and the last city.

In [146]:
def fitness(path):
    distance = 0
    for i in range(len(path) - 1):
        distance += DIST_MATRIX[path[i], path[i + 1]]
    distance += DIST_MATRIX[path[-1], path[0]]
    return 1/distance  # greater is the distance lower (worse) is the fitness

### Parent Selection

In [147]:
def parent_selection(population, fitnesses, method, num_parents = 40): 
    parents = []
    if method == 0 : # Roulette Wheel
        total_fitness = sum(fitnesses)
        relative_fitness = [f/total_fitness for f in fitnesses]  # 
        # Select num_parents from the population using relative fitness
        mating_pool = np.random.choice(range(len(population)), num_parents, p=relative_fitness)
        parents = [population[i] for i in mating_pool]
        
    elif method == 1: # Tournament Selection
        for _ in range(num_parents):
            # Select random indices for the tournament
            tournament_indices = np.random.choice(range(len(population)), int(0.15 * len(population)), replace=False)  #0.3 is the tournament size
            tournament_fitnesses = [fitnesses[i] for i in tournament_indices]
            
            # Select the individual with the best fitness in the tournament
            best_index = tournament_indices[np.argmax(tournament_fitnesses)]
            parents.append(population[best_index])
    
    elif method == 2: # Rank Selection
        rank = np.argsort(fitnesses)
        for i in range(num_parents):
            parents.append(population[rank[i]])
            
    else:
        logging.error("Invalid parent selection method")

    return parents

### Crossover

In [148]:
# edge recombination operator crossover

def edge_recombination(parent1, parent2):
    # Step 1: Create an adjacency list
    adjacency_list = {}
    for city in parent1:
        adjacency_list[city] = set()

    # Populate adjacency lists with neighbors from parent1
    for i in range(len(parent1)):
        left_neighbor = parent1[i - 1] if i > 0 else parent1[-1]
        right_neighbor = parent1[i + 1] if i < len(parent1) - 1 else parent1[0]
        adjacency_list[parent1[i]].update([left_neighbor, right_neighbor])

    # Populate adjacency lists with neighbors from parent2
    for i in range(len(parent2)):
        left_neighbor = parent2[i - 1] if i > 0 else parent2[-1]
        right_neighbor = parent2[i + 1] if i < len(parent2) - 1 else parent2[0]
        adjacency_list[parent2[i]].update([left_neighbor, right_neighbor])

    # Step 2: Initialize the offspring with the first city from parent1
    current_city = parent1[0]
    offspring = [current_city]

    # Step 3: Generate the rest of the offspring sequence
    while len(offspring) < len(parent1):
        # Remove the current city from all neighbors in the adjacency list
        for neighbors in adjacency_list.values():
            neighbors.discard(current_city)

        # Find the next city with the fewest neighbors
        if adjacency_list[current_city]:
            next_city = min(adjacency_list[current_city], key=lambda x: len(adjacency_list[x]))
        else:
            # If no adjacent city is available, pick a random city not in offspring
            next_city = random.choice([city for city in parent1 if city not in offspring])

        offspring.append(next_city)
        current_city = next_city

    return offspring

# Inver over operator crossover

def inver_over(parent1, parent2, p=0.3):
    # Initialize offspring as a copy of parent1
    offspring = parent1[:]
    n = len(offspring)
    
    # Set of cities already processed
    visited = set()

    # Start from a random city in parent1
    current_city = random.choice(offspring)
    visited.add(current_city)

    while len(visited) < n:
        # With probability p, pick the current city from the other parent
        if random.random() < p:
            # Choose the next city based on parent2's adjacency
            next_city_index = parent2.index(current_city)
            if next_city_index == n - 1:
                next_city = parent2[0]  # Wrap around
            else:
                next_city = parent2[next_city_index + 1]
        else:
            # Choose a random unvisited city
            next_city = random.choice([city for city in offspring if city not in visited])

        # Find positions of current and next city in offspring
        current_index = offspring.index(current_city)
        next_index = offspring.index(next_city)

        # Invert the segment between current and next city
        if current_index < next_index:
            offspring[current_index:next_index + 1] = reversed(offspring[current_index:next_index + 1])
        else:
            # Wrap around if next_index is "before" current_index
            segment = offspring[current_index:] + offspring[:next_index + 1]
            segment.reverse()
            offspring[current_index:] = segment[:n - current_index]
            offspring[:next_index + 1] = segment[n - current_index:]

        # Mark the next city as visited and move to it
        visited.add(next_city)
        current_city = next_city

    return offspring

### Mutation

In [149]:
def scramble_mutation(path):
    # Choose two random points in the sequence to define the subset
    start, end = sorted(random.sample(range(len(path)), 2))

    # Scramble the subset
    subset = path[start:end + 1]
    random.shuffle(subset)

    # Place the scrambled subset back in the path
    mutated_path = path[:start] + subset + path[end + 1:]
    return mutated_path


def inversion_mutation(path):
    # Choose two random points in the path
    start, end = sorted(random.sample(range(len(path)), 2))

    # Reverse the subset
    mutated_path = path[:start] + path[start:end + 1][::-1] + path[end + 1:]
    return mutated_path


def adaptive_mutation(path, generation, max_generations, mutation_fn):
    """Mutation rate adapts based on generation progress"""
    # Higher mutation rate early, lower later
    mutation_rate = 0.8 * (1 - generation / max_generations)  #0.8 is the initial mutation rate
    
    if random.random() < mutation_rate:
        return mutation_fn(path)
    return path

### Genetic Algorithm Function

In [150]:
def print_path_details(path, cities_df, dist_matrix):
    """
    Print detailed information about a TSP path including city-to-city steps and distances
    
    Parameters:
    path: list of indices representing the city order
    cities_df: DataFrame containing city information with 'name' column
    dist_matrix: distance matrix between cities
    """
    total_distance = 0
    steps = 0
    
    path = [int(i) for i in path]
    
    # Print each step in the path
    for i in range(len(path)):
        current_city = cities_df.iloc[path[i]]['name']
        next_city = cities_df.iloc[path[(i + 1) % len(path)]]['name']
        distance = dist_matrix[path[i], path[(i + 1) % len(path)]]
        
        logging.debug(f"step: {current_city} -> {next_city} ({distance:.2f}km)")
        total_distance += distance
        steps += 1
    
    logging.info(f"result: Found a path of {steps} steps, total length {total_distance:.2f}km")
    return total_distance, steps


In [151]:
def genetic_algorithm(population_size, num_generations, crossover_fn, mutation_fn, parent_selection_fn, p_selection_method):
    # Set replacement rate - 50% of population
    replacement_rate = 0.5   
    num_replacements = int(population_size * replacement_rate)
    
    # Initialize the population with random TSP paths
    population = [list(np.random.permutation(len(CITIES))) for _ in range(population_size)]
    best_path = None
    best_fitness = float('-inf')

    diversity_threshold = 0.7  # Add diversity checking

    # Evaluate the initial population
    fitness_values = [fitness(path) for path in population]

    for generation in range(num_generations):
        # Select a few parents for crossover
        parents = parent_selection_fn(population, fitness_values, p_selection_method)

        # Generate offspring by crossover and mutation
        next_generation = []
        for i in range(0, len(parents), 2):
            child1 = crossover_fn(parents[i], parents[i + 1])
            child2 = crossover_fn(parents[i + 1], parents[i])
            next_generation.extend([adaptive_mutation(child1,generation,num_generations,mutation_fn), adaptive_mutation(child2,generation,num_generations,mutation_fn)])

        # Calculate fitness for the offspring
        offspring_fitness = [fitness(child) for child in next_generation]

        # Replace the least fit individuals with new offspring
        for child, child_fitness in zip(next_generation[:num_replacements], offspring_fitness[:num_replacements]):
            # Find the index of the least fit individual in the population
            least_fit_index = np.argmin(fitness_values)
            if child_fitness > fitness_values[least_fit_index]:
                # Replace least fit individual with the new child
                population[least_fit_index] = child
                fitness_values[least_fit_index] = child_fitness

                # Update best path and fitness if needed
                if child_fitness > best_fitness:
                    best_path = child
                    best_fitness = child_fitness
                    if generation % 10 == 0:
                        logging.info(f"Generation {generation}: Best Fitness = {1/best_fitness:.4f}")


        # Inject new random solutions if diversity is low
        if generation % 100 == 0:  # Check periodically
            unique_paths = len(set(tuple(p) for p in population))
            if unique_paths / len(population) < diversity_threshold:
                num_new = int(0.1 * population_size)  # Inject 10% new solutions
                for _ in range(num_new):
                    idx = np.argmin(fitness_values)
                    new_solution = list(np.random.permutation(len(CITIES)))
                    population[idx] = new_solution
                    fitness_values[idx] = fitness(new_solution)

                    

    # Print final path details
    logging.info("Final path details:")
    
    # Print the actual path of city names for easier comparison
    city_path = [CITIES.iloc[i]['name'] for i in best_path]
    logging.info(f"Optimized TSP Path: {city_path}")
    logging.info(f"Optimized Total Cost: {1/best_fitness:.2f} km")
    
    
    return best_path, best_fitness


### Iterations

In [154]:
# Params for Genetic Algorithm
population_size = 200 
num_generations = 50000
crossover_fn = edge_recombination
mutation_fn = inversion_mutation
parent_selection_fn = parent_selection
p_selection_method = 1  # 0: Roulette Wheel, 1: Tournament Selection, 2: Rank Selection

# Run Genetic Algorithm
best_path, best_fitness = genetic_algorithm(population_size, num_generations, crossover_fn, mutation_fn, parent_selection_fn, p_selection_method)



INFO:root:Generation 0: Best Fitness = 14935.3940
INFO:root:Generation 0: Best Fitness = 14845.4163
INFO:root:Generation 0: Best Fitness = 14735.2464
INFO:root:Generation 0: Best Fitness = 14401.8373
INFO:root:Generation 0: Best Fitness = 14330.5482


INFO:root:Generation 10: Best Fitness = 9064.2520
INFO:root:Generation 10: Best Fitness = 9016.2389
INFO:root:Generation 10: Best Fitness = 8897.8431
INFO:root:Generation 10: Best Fitness = 8752.5880
INFO:root:Generation 20: Best Fitness = 7904.5751
INFO:root:Generation 60: Best Fitness = 5416.9849
INFO:root:Generation 70: Best Fitness = 5287.5757
INFO:root:Generation 100: Best Fitness = 4872.4950
INFO:root:Generation 130: Best Fitness = 4737.1548
INFO:root:Generation 130: Best Fitness = 4668.7207
INFO:root:Generation 220: Best Fitness = 4267.8434
INFO:root:Generation 220: Best Fitness = 4266.7119
INFO:root:Final path details:
DEBUG:root:step: Trieste -> Venice (115.09km)
DEBUG:root:step: Venice -> Padua (36.07km)
DEBUG:root:step: Padua -> Vicenza (30.13km)
DEBUG:root:step: Vicenza -> Ferrara (79.10km)
DEBUG:root:step: Ferrara -> Ravenna (66.67km)
DEBUG:root:step: Ravenna -> Forlì (26.46km)
DEBUG:root:step: Forlì -> Rimini (46.72km)
DEBUG:root:step: Rimini -> Ancona (90.60km)
DEBUG:roo