### Exercise 5.4

In [23]:
import numpy as np
import random
# TSP Data for a smaller set of points.
# This dataset represents a collection of coordinates (x, y) on a plane,
#  simulating locations of cities. The objective
# in a TSP is to find the shortest possible route that visits each point exactly once
# and returns to the origin point.
tsp_data = [
    (0.2554, 18.2366), (0.4339, 15.2476), (0.7377, 8.3137), (1.1354, 16.5638), (1.5820, 17.3030),
    (2.0913, 9.2924), (2.2631, 17.3392), (2.6373, 2.6425), (3.0040, 19.5712), (3.6684, 14.8018),
    (3.8630, 13.7008), (4.2065, 9.8224), (4.8353, 2.0944), (4.9785, 3.1596), (5.3754, 17.6381),
    (5.9425, 6.0360), (6.1451, 3.8132), (6.7782, 11.0125), (6.9223, 7.7819), (7.5691, 0.9378),
    (7.8190, 13.1697), (8.3332, 5.9161), (8.5872, 7.8303), (9.1224, 14.5889), (9.4076, 9.7166),
    (9.7208, 8.1154), (10.1662, 19.1705), (10.7387, 2.0090), (10.9354, 5.1813), (11.3707, 7.2406),
    (11.7418, 13.6874), (12.0526, 4.7186), (12.6385, 12.1000), (13.0950, 13.6956), (13.3533, 17.3524),
    (13.8794, 3.9479), (14.2674, 15.8651), (14.5520, 17.2489), (14.9737, 13.2245), (15.2841, 1.4455),
    (15.5761, 12.1270), (16.1313, 14.2029), (16.4388, 16.0084), (16.7821, 9.4334), (17.3928, 12.9692),
    (17.5139, 6.4828), (17.9487, 7.5563), (18.3958, 19.5112), (18.9696, 19.3565), (19.0928, 16.5453)
]

# TSP CoMoT Data for a larger set of points.
# Similar to the `tsp_data`, this dataset represents another set of coordinates
# on a plane.
tsp_comot_data = [
    (288, 149), (288, 129), (270, 133), (256, 141), (256, 157),
    (246, 157), (236, 169), (228, 169), (228, 161), (220, 169),
    (212, 169), (204, 169), (196, 169), (188, 169), (196, 161),
    (188, 145), (172, 145), (164, 145), (156, 145), (148, 145),
    (140, 145), (148, 169), (164, 169), (172, 169), (156, 169),
    (140, 169), (132, 169), (124, 169), (116, 161), (104, 153),
    (260, 77), (276, 77), (276, 69), (276, 61), (276, 53),
    (284, 53), (284, 61), (284, 69), (284, 77), (284, 85),
    (284, 93), (284, 101), (288, 109), (280, 109), (276, 101),
    (276, 93), (276, 85), (268, 97), (260, 109), (252, 101),
    (260, 93), (260, 85), (236, 85), (228, 85), (228, 93),
    (236, 93), (236, 101), (228, 101), (228, 109), (228, 117),
    (228, 125), (220, 125), (212, 117), (204, 109), (196, 101),
    (188, 93), (180, 93), (180, 101), (180, 109), (180, 117),
    (180, 125), (196, 145), (204, 145), (212, 145), (220, 145),
    (228, 145), (236, 145), (246, 141), (252, 125), (260, 129),
    (280, 133)
]




In [24]:
#simple EA for TSP
import numpy as np
import random

def distance(city1, city2):
    """
    Calculate the Euclidean distance between two cities.

    Args:
        city1 (tuple): Coordinates (x, y) of the first city.
        city2 (tuple): Coordinates (x, y) of the second city.

    Returns:
        float: The Euclidean distance between city1 and city2.
    """
    return np.sqrt((city1[0] - city2[0]) ** 2 + (city1[1] - city2[1]) ** 2)

def create_distance_matrix(tsp_data):
    """
    Create a distance matrix for the given TSP data.

    Args:
        tsp_data (list of tuples): The TSP data as a list of coordinates (x, y).

    Returns:
        numpy.ndarray: A square matrix where element (i, j) represents the distance
                       between city i and city j.
    """
    num_cities = len(tsp_data)
    distance_matrix = np.zeros((num_cities, num_cities))
    for i in range(num_cities):
        for j in range(num_cities):
            distance_matrix[i, j] = distance(tsp_data[i], tsp_data[j])
    return distance_matrix

def initialize_population(pop_size, num_cities):
    """
    Initialize a population for the genetic algorithm.

    Args:
        pop_size (int): The size of the population.
        num_cities (int): The number of cities in the TSP data.

    Returns:
        list of lists: A list containing `pop_size` individuals, where each individual
                       is a list representing a permutation of the city indices.
    """
    population = [random.sample(range(num_cities), num_cities) for _ in range(pop_size)]
    return population

def calculate_fitness(tour, distance_matrix):
    """
    Calculate the fitness of a tour.

    Args:
        tour (list): A list representing a tour (permutation of city indices).
        distance_matrix (numpy.ndarray): The distance matrix of the cities.

    Returns:
        float: The total distance of the tour.
    """
    return sum(distance_matrix[tour[i]][tour[(i + 1) % len(tour)]] for i in range(len(tour)))

def tournament_selection(population, distance_matrix, tournament_size=3):
    """
    Select an individual from the population using tournament selection.

    Args:
        population (list of lists): The current population.
        distance_matrix (numpy.ndarray): The distance matrix of the cities.
        tournament_size (int): The number of individuals participating in each tournament.

    Returns:
        list: The winning tour (individual) of the tournament.
    """
    best = None
    for _ in range(tournament_size):
        ind = random.choice(population)
        if best is None or calculate_fitness(ind, distance_matrix) < calculate_fitness(best, distance_matrix):
            best = ind
    return best

def ordered_crossover(parent1, parent2):
    """
    Perform ordered crossover on two parents to produce an offspring.

    Args:
        parent1 (list): The first parent (a tour).
        parent2 (list): The second parent (a tour).

    Returns:
        list: The offspring produced by ordered crossover.
    """
    size = len(parent1)
    start, stop = sorted(random.sample(range(size), 2))
    offspring = [None]*size
    offspring[start:stop] = parent1[start:stop]
    p2_index = stop
    for i in range(stop, stop + size):
        if parent2[i % size] not in offspring:
            offspring[p2_index % size] = parent2[i % size]
            p2_index += 1
    return offspring

def mutate(tour, mutation_rate):
    """
    Mutate a tour by swapping two cities with a given mutation rate.

    Args:
        tour (list): The tour to mutate.
        mutation_rate (float): The probability of each city being swapped.

    Returns:
        list: The mutated tour.
    """
    for i in range(len(tour)):
        if random.random() < mutation_rate:
            j = random.randint(0, len(tour) - 1)
            tour[i], tour[j] = tour[j], tour[i]
    return tour

def evolutionary_algorithm(tsp_data, pop_size, num_generations, mutation_rate):
    """
    Run the evolutionary algorithm to solve the TSP.

    Args:
        tsp_data (list of tuples): The TSP data as a list of coordinates (x, y).
        pop_size (int): The size of the population.
        num_generations (int): The number of generations to evolve.
        mutation_rate (float): The mutation rate.

    Returns:
        tuple: The best tour found and its fitness.
    """
    num_cities = len(tsp_data)
    distance_matrix = create_distance_matrix(tsp_data)
    population = initialize_population(pop_size, num_cities)

    best_tour = None
    best_fitness = float('inf')

    for generation in range(num_generations):
        new_population = []
        for _ in range(pop_size):
            parent1 = tournament_selection(population, distance_matrix)
            parent2 = tournament_selection(population, distance_matrix)
            offspring = ordered_crossover(parent1, parent2)
            offspring = mutate(offspring, mutation_rate)
            new_population.append(offspring)

        population = new_population

        current_best_tour = min(population, key=lambda tour: calculate_fitness(tour, distance_matrix))
        current_best_fitness = calculate_fitness(current_best_tour, distance_matrix)

        if current_best_fitness < best_fitness:
            best_fitness = current_best_fitness
            best_tour = current_best_tour

    return best_tour, best_fitness

# Parameters for the evolutionary algorithm
pop_size = 50
num_generations = 1
mutation_rate = 0.01

# Run the evolutionary algorithm for the first dataset
best_tour, best_fitness = evolutionary_algorithm(tsp_data, pop_size, num_generations, mutation_rate)
print(f"Best tour: {best_tour}")
print(f"Best fitness (Total Distance): {best_fitness}")

# Run the evolutionary algorithm for the second dataset
best_tour_comot, best_fitness_comot = evolutionary_algorithm(tsp_comot_data, pop_size, num_generations, mutation_rate)
print(f"Best tour comot: {best_tour_comot}")
print(f"Best fitness (Total Distance) comot: {best_fitness_comot}")



Best tour: [45, 35, 37, 19, 25, 21, 24, 12, 23, 32, 30, 46, 4, 43, 22, 9, 6, 3, 33, 39, 28, 14, 10, 5, 1, 8, 2, 18, 0, 34, 17, 20, 13, 29, 42, 47, 48, 44, 11, 40, 49, 36, 7, 41, 16, 15, 38, 26, 27, 31]
Best fitness (Total Distance): 434.96152947239267
Best tour comot: [43, 16, 34, 42, 54, 79, 71, 52, 8, 69, 64, 37, 26, 11, 14, 24, 15, 30, 0, 44, 50, 1, 7, 60, 23, 72, 20, 21, 17, 12, 78, 68, 51, 35, 58, 57, 56, 10, 2, 63, 25, 32, 39, 31, 33, 74, 13, 29, 28, 40, 36, 67, 49, 5, 75, 27, 66, 22, 76, 18, 80, 48, 45, 77, 65, 62, 9, 19, 6, 38, 53, 70, 55, 46, 61, 3, 47, 73, 59, 4, 41]
Best fitness (Total Distance) comot: 5135.44827880235


In [26]:
import numpy as np
import random

def two_opt_swap(route, i, k):
    """
    Perform a 2-opt swap by reversing the route segment between two indices.

    Args:
        route (list): The current tour.
        i (int): The starting index of the segment to reverse.
        k (int): The ending index of the segment to reverse.

    Returns:
        list: A new tour with the segment between indices i and k reversed.
    """
    new_route = route[:i]
    new_route.extend(reversed(route[i:k + 1]))
    new_route.extend(route[k + 1:])
    return new_route

def two_opt(route, distance_matrix, max_iterations=50):
    """
    Apply the 2-opt algorithm to improve a tour.

    Args:
        route (list): The initial tour to optimize.
        distance_matrix (numpy.ndarray): The distance matrix of the cities.
        max_iterations (int): The maximum number of iterations to perform.

    Returns:
        list: An optimized tour obtained through 2-opt swaps.
    """
    improvement = True
    best_route = route
    best_distance = calculate_fitness(route, distance_matrix)
    iterations = 0

    while improvement and iterations < max_iterations:
        improvement = False
        for i in range(1, len(route) - 2):
            for k in range(i + 1, len(route)):
                if k - i == 1: continue  # Skip adjacent edges
                new_route = two_opt_swap(best_route, i, k)
                new_distance = calculate_fitness(new_route, distance_matrix)
                if new_distance < best_distance:
                    best_distance = new_distance
                    best_route = new_route
                    improvement = True
        iterations += 1
    return best_route

def memetic_algorithm(tsp_data, pop_size, num_generations, mutation_rate):
    """
    Run a memetic algorithm (MA) combining genetic algorithm with local search (2-opt) to solve the TSP.

    Args:
        tsp_data (list of tuples): The TSP data as a list of coordinates (x, y).
        pop_size (int): The size of the population.
        num_generations (int): The number of generations to evolve.
        mutation_rate (float): The mutation rate.

    Returns:
        tuple: The best tour found and its fitness.
    """
    num_cities = len(tsp_data)
    distance_matrix = create_distance_matrix(tsp_data)
    population = initialize_population(pop_size, num_cities)

    best_tour = None
    best_fitness = float('inf')

    for generation in range(num_generations):
        new_population = []
        for tour in population:
            # Apply local search (2-opt) to each individual before reproduction
            tour = two_opt(tour, distance_matrix)
            new_population.append(tour)

        for _ in range(len(new_population) // 2):
            parent1 = tournament_selection(new_population, distance_matrix)
            parent2 = tournament_selection(new_population, distance_matrix)
            offspring = ordered_crossover(parent1, parent2)
            offspring = mutate(offspring, mutation_rate)
            offspring = two_opt(offspring, distance_matrix)  # Apply local search to offspring
            new_population.append(offspring)

        # Select the next generation based on fitness
        population = sorted(new_population, key=lambda tour: calculate_fitness(tour, distance_matrix))[:pop_size]

        current_best_tour = population[0]
        current_best_fitness = calculate_fitness(current_best_tour, distance_matrix)

        if current_best_fitness < best_fitness:
            best_fitness = current_best_fitness
            best_tour = current_best_tour

    return best_tour, best_fitness

# Running the Memetic Algorithm
# first dataset
best_tour, best_fitness = memetic_algorithm(tsp_data, pop_size=50, num_generations=1, mutation_rate=0.01)
print(f"Best tour: {best_tour}")
print(f"Best fitness (Total Distance): {best_fitness}")

# second dataset
best_tour_comot, best_fitness_comot = memetic_algorithm(tsp_data, pop_size=50, num_generations=1, mutation_rate=0.01)
print(f"Best tour comot: {best_tour_comot}")
print(f"Best fitness (Total Distance) comot: {best_fitness_comot}")


Best tour: [40, 44, 41, 42, 49, 48, 47, 36, 37, 34, 26, 14, 8, 6, 4, 0, 3, 1, 9, 10, 11, 5, 2, 7, 13, 12, 19, 27, 31, 35, 39, 45, 46, 43, 29, 28, 21, 16, 15, 18, 22, 25, 24, 17, 20, 23, 30, 32, 33, 38]
Best fitness (Total Distance): 123.34808804092654
Best tour comot: [40, 43, 46, 45, 39, 35, 31, 28, 29, 25, 21, 27, 19, 7, 12, 13, 16, 15, 18, 22, 24, 32, 33, 30, 23, 20, 17, 11, 5, 2, 10, 9, 1, 3, 0, 4, 6, 8, 14, 26, 34, 37, 47, 48, 49, 42, 36, 38, 41, 44]
Best fitness (Total Distance) comot: 123.43211914402806


In [None]:
import numpy as np
import random

def compare_algorithms(tsp_data, runs=10):
    """
    Compare the performance of the evolutionary algorithm (EA) and the memetic algorithm (MA)
    on the given TSP data over a specified number of runs.

    This function runs both algorithms multiple times on the same dataset, then calculates
    and prints the average best fitness achieved by each algorithm, along with the percentage
    improvement of MA over EA.

    Args:
        tsp_data (list of tuples): The TSP data as a list of coordinates (x, y).
        runs (int): The number of runs to perform for each algorithm.

    Returns:
        None: This function prints the average best fitness for EA and MA, and the improvement
              percentage of MA over EA.
    """
    ea_results = []
    ma_results = []

    for _ in range(runs):
        # Run the evolutionary algorithm and memetic algorithm separately and store their fitness results
        _, ea_fitness = evolutionary_algorithm(tsp_data, pop_size=50, num_generations=1500, mutation_rate=0.01)
        ea_results.append(ea_fitness)

        _, ma_fitness = memetic_algorithm(tsp_data, pop_size=50, num_generations=1500, mutation_rate=0.01)
        ma_results.append(ma_fitness)

    # Calculate and print average best fitness for EA and MA
    ea_average = np.mean(ea_results)
    ma_average = np.mean(ma_results)

    print(f"Evolutionary Algorithm - Average Best Fitness: {ea_average}")
    print(f"Memetic Algorithm - Average Best Fitness: {ma_average}")

    # Calculate and print the percentage improvement of MA over EA
    improvement = ((ea_average - ma_average) / ea_average) * 100
    print(f"Improvement of MA over EA: {improvement:.2f}%")

# Assuming the rest of the necessary functions (evolutionary_algorithm, memetic_algorithm, etc.)
# are defined elsewhere in the code.

# Run the comparison for the first TSP dataset
print('results for tsp_data 1')
compare_algorithms(tsp_data)

# Run the comparison for the second TSP dataset (Comot link)
print('results for tsp_data from comot')
compare_algorithms(tsp_comot_data)


results for tsp_data 1


#### Question 2
Comparing a simple Evolutionary Algorithm (EA) with the same number of generations to an EA enhanced with local search to determine if the addition of local search improves performance might not always constitute a fair baseline. First, (1) local search adds computational overhead. An EA with local search takes 10 times more than the simple EA, therefore it has higher computational cost per generation than a simple EA due to the additional operations required for local search. Therefore, comparing them with the same number of generations doesn't account for the increased computational effort. Furthermore, (2) local search can significantly alter the evolutionary dynamics by intensifying the search around promising solutions, potentially leading to faster convergence or better exploitation of the search space. Simply comparing the number of generations overlooks how these dynamics change the path and efficiency of the search process. Also, (3) if the performance is measured solely by the quality of the final solution without considering computational resources, then the comparison might ignore important practical considerations like time or energy efficiency.

To conduct a fair comparison, i would suggest to:
1. Instead of fixing the number of generations, equalizing the total computational budget between the two setups might be better. This could be based on the total number of fitness evaluations, total runtime, or energy consumption. This way, both algorithms expend the same amount of resources, providing a more apples-to-apples comparison.
2. Besides the quality of the final solution,  metrics such as convergence speed, computational time, and robustness across multiple runs should be added. This multi-faceted evaluation provides a more comprehensive view of the impact of local search.
3. Exploring the performance across a range of parameter settings for both the simple EA and the EA with local search is a must. This includes mutation rates, population sizes, and local search intensity. Such an exploration can reveal under what conditions the local search provides the most benefit.
4. Use statistical tests to determine if observed differences in performance are significant. This involves running each algorithm configuration multiple times on multiple problem instances and applying statistical tests (e.g., t-tests, ANOVA) to the results to assess the significance of the differences.
5. Analyze the diversity of the populations and the convergence behavior of both algorithms. This can help understand if local search leads to premature convergence or if it helps in maintaining a healthy diversity for exploring the search space effectively.

