## üîπ GASA - Genetic Algorithm with Simulated Annealing


In [None]:
import time
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tracemalloc
import random

from typing import List, Tuple

SEED = 42  
random.seed(SEED)
np.random.seed(SEED)


class GASA_mTSP:
    def __init__(self, distance_matrix: np.ndarray, m: int = 3,
                 pop_size: int = 100, generations: int = 300,
                 pc: float = 0.8, pm: float = 0.1,
                 sa_T0: float = 100, sa_alpha: float = 0.95, sa_Tmin: float = 1e-3):
        
        self.distance_matrix = distance_matrix
        self.n_cities = len(distance_matrix)
        self.m = m
        self.pop_size = pop_size
        self.generations = generations
        self.pc = pc
        self.pm = pm
        self.depot = 0

        # SA parameters
        self.sa_T0 = sa_T0
        self.sa_alpha = sa_alpha
        self.sa_Tmin = sa_Tmin

    # ---------------- Step 1: Initialization ----------------
    def initialize_population(self) -> List[List[int]]:
        cities = list(range(1, self.n_cities))
        return [random.sample(cities, len(cities)) for _ in range(self.pop_size)]

    # ---------------- Decode chromosome into m routes ----------------
    def decode_solution(self, chromosome: List[int]) -> List[List[int]]:
        routes = [[] for _ in range(self.m)]
        for i, city in enumerate(chromosome):
            routes[i % self.m].append(city)
        return routes

    # ---------------- Step 2: Fitness ----------------
    def calculate_fitness(self, chromosome: List[int]) -> float:
        routes = self.decode_solution(chromosome)
        route_distances = [
            sum(self.distance_matrix[route[i]][route[i+1]] for i in range(len(route)-1))
            if len(route) > 1 else 0
            for route in [[self.depot] + r + [self.depot] for r in routes]
        ]
        return max(route_distances) if route_distances else float('inf')

    # ---------------- Step 3A: Selection (Roulette Wheel) ----------------
    def tournament_selection(self, population: List[List[int]], fitness: List[float]) -> List[int]:
        total_fitness = sum(1.0 / (f + 1e-6) for f in fitness)
        pick = random.uniform(0, total_fitness)
        current = 0
        for ind, fit in zip(population, fitness):
            current += 1.0 / (fit + 1e-6)
            if current >= pick:
                return ind.copy()
        return population[-1].copy()

    # ---------------- Step 3B: Order-based Crossover (OX) ----------------
    def partially_mapped_crossover(self, p1: List[int], p2: List[int]) -> Tuple[List[int], List[int]]:
        if random.random() > self.pc:
            return p1.copy(), p2.copy()
        size = len(p1)
        start, end = sorted(random.sample(range(size), 2))
        
        def ox(parent1, parent2):
            child = [None] * size
            child[start:end] = parent1[start:end]
            pos = end
            for city in parent2[end:] + parent2[:end]:
                if city not in child:
                    if pos >= size:
                        pos = 0
                    child[pos] = city
                    pos += 1
            return child
        
        return ox(p1, p2), ox(p2, p1)

    # ---------------- Step 3C: Scramble Mutation ----------------
    def index_mutation(self, chromosome: List[int]) -> List[int]:
        if random.random() < self.pm:
            start, end = sorted(random.sample(range(len(chromosome)), 2))
            subset = chromosome[start:end]
            random.shuffle(subset)
            chromosome[start:end] = subset
        return chromosome

    # ---------------- Step 4: Simulated Annealing (SA) ----------------
    def simulated_annealing(self, chromosome: List[int]) -> List[int]:
        current = chromosome.copy()
        current_fit = self.calculate_fitness(current)
        T = self.sa_T0

        while T > self.sa_Tmin:
            # Neighbor by swapping two cities
            neighbor = current.copy()
            i, j = sorted(random.sample(range(len(neighbor)), 2))
            neighbor[i], neighbor[j] = neighbor[j], neighbor[i]
            neighbor_fit = self.calculate_fitness(neighbor)

            # Accept if better or with probability exp(-Œî/T)
            if neighbor_fit < current_fit or random.random() < math.exp(-(neighbor_fit - current_fit) / T):
                current, current_fit = neighbor, neighbor_fit

            T *= self.sa_alpha
        
        return current

    # ---------------- Step 5: Main Loop ----------------
    def run(self):
        population = self.initialize_population()
        best_solution = None
        best_fitness = float('inf')
        fitness_history = []

        for gen in range(self.generations):
            fitness_scores = [self.calculate_fitness(ind) for ind in population]
            sorted_idx = np.argsort(fitness_scores)
            population = [population[i] for i in sorted_idx]
            fitness_scores = [fitness_scores[i] for i in sorted_idx]

            if fitness_scores[0] < best_fitness:
                best_fitness = fitness_scores[0]
                best_solution = population[0].copy()

            # GA: Selection + Crossover + Mutation
            new_population = []
            elite = population[0].copy()
            new_population.append(elite)

            while len(new_population) < self.pop_size:
                p1 = self.tournament_selection(population, fitness_scores)
                p2 = self.tournament_selection(population, fitness_scores)
                c1, c2 = self.partially_mapped_crossover(p1, p2)
                c1 = self.index_mutation(c1)
                c2 = self.index_mutation(c2)
                new_population.extend([c1, c2])

            population = new_population[:self.pop_size]

            # SA on the best individual
            sa_solution = self.simulated_annealing(population[0])
            sa_fitness = self.calculate_fitness(sa_solution)
            if sa_fitness < self.calculate_fitness(population[0]):
                population[0] = sa_solution

            fitness_history.append(best_fitness)

            if gen % 20 == 0:
                print(f"Gen {gen}: Best max route length = {best_fitness:.2f}")

        routes = self.decode_solution(best_solution)
        route_distances = [
            sum(self.distance_matrix[r[i]][r[i+1]] for i in range(len(r)-1))
            for r in [[self.depot] + route + [self.depot] for route in routes]
        ]
        total_distance = sum(route_distances)
        balance_metric = max(route_distances) - min(route_distances) if route_distances else 0

        return routes, total_distance, best_fitness, balance_metric, fitness_history

NameError: name 'List' is not defined

In [None]:
try:
    distance_df = pd.read_csv('../data/HN_distance_matrix.csv', index_col=0)
    distance_matrix = distance_df.values
    print("Loaded real distance matrix")
except:
    # Fallback to example matrix
    n_cities = 127
    rng = np.random.default_rng(42)
    distance_matrix = rng.uniform(10, 100, size=(n_cities, n_cities))
    distance_matrix = (distance_matrix + distance_matrix.T) / 2
    np.fill_diagonal(distance_matrix, 0)
    print("Using example distance matrix")

print("S·ªë th√†nh ph·ªë:", distance_matrix.shape[0])
print("V√≠ d·ª• kho·∫£ng c√°ch [0][1]:", distance_matrix[0][1])

Loaded real distance matrix
S·ªë th√†nh ph·ªë: 127
V√≠ d·ª• kho·∫£ng c√°ch [0][1]: 21.48


In [None]:
def detect_convergence(fitness_list, window=5, threshold=1e-3):
    """Detect convergence in fitness evolution"""
    if len(fitness_list) < window:
        return len(fitness_list)
    for i in range(len(fitness_list) - window):
        if abs(fitness_list[i] - fitness_list[i + window]) < threshold:
            return i + window
    return len(fitness_list)

In [None]:

# File t·ªïng h·ª£p k·∫øt qu·∫£
summary_file = "results_summary_gasa.csv"
summary_cols = [
    "m", "total_distance", "fitness", "balance_metric", 
    "exec_time", "memory_current_MB", "memory_peak_MB", 
    "converged_gen", "convergence_speed"
]

# T·∫°o file r·ªóng (ch·ªâ ch·∫°y l·∫ßn ƒë·∫ßu)
pd.DataFrame(columns=summary_cols).to_csv(summary_file, index=False)

for m in range(1, 4):
    print(f"\n=== S·ªë ng∆∞·ªùi (m) = {m} ===")
    
    tracemalloc.start()
    start_time = time.time()
    
    total_distance, routes, fitness, fitness_per_generation = solve(distance_matrix, m)
    
    end_time = time.time()
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    exec_time = end_time - start_time

    # ƒê·ªô d√†i t·ª´ng tuy·∫øn
    route_distances = [
        sum(distance_matrix[r[i]][r[i+1]] for i in range(len(r)-1))
        for r in routes
    ]
    max_len = max(route_distances)
    min_len = min(route_distances)
    balance_metric = max_len - min_len

    # Ph√¢n t√≠ch h·ªôi t·ª•
    converged_gen = detect_convergence(fitness_per_generation)
    convergence_speed = (fitness_per_generation[0] - fitness_per_generation[-1]) / converged_gen if converged_gen else 0

    # In k·∫øt qu·∫£
    print(f"T·ªïng qu√£ng ƒë∆∞·ªùng: {total_distance:.2f}")
    print(f"Tuy·∫øn d√†i nh·∫•t (fitness): {fitness:.2f}")
    print(f"Ch√™nh l·ªách gi·ªØa c√°c tuy·∫øn (Balance metric): {balance_metric:.2f}")
    print(f"Th·ªùi gian th·ª±c thi: {exec_time:.4f} gi√¢y")
    print(f"S·ªë v√≤ng l·∫∑p ƒë·ªÉ h·ªôi t·ª•: {converged_gen}")
    print(f"T·ªëc ƒë·ªô h·ªôi t·ª•: {convergence_speed:.4f} ƒë∆°n v·ªã/gen")
    print(f"Memory hi·ªán t·∫°i: {current / 10**6:.2f} MB; Peak: {peak / 10**6:.2f} MB")

    for i, r in enumerate(routes):
        print(f" - Tuy·∫øn {i+1} ({route_distances[i]:.2f}): {r}")

    # === L∆∞u file t·ªïng h·ª£p ===
    row = {
        "m": m,
        "total_distance": total_distance,
        "fitness": fitness,
        "balance_metric": balance_metric,
        "exec_time": exec_time,
        "memory_current_MB": current / 10**6,
        "memory_peak_MB": peak / 10**6,
        "converged_gen": converged_gen,
        "convergence_speed": convergence_speed
    }
    pd.DataFrame([row]).to_csv(summary_file, mode="a", index=False, header=False)

    # === L∆∞u fitness_per_generation ri√™ng ===
    fitness_file = f"gasa_fitness_m{m}.csv"
    pd.DataFrame({"generation": range(len(fitness_per_generation)),
                  "fitness": fitness_per_generation}).to_csv(fitness_file, index=False)

    # V·∫Ω bi·ªÉu ƒë·ªì fitness (t√πy ch·ªçn)
    plt.figure(figsize=(8, 4))
    plt.plot(fitness_per_generation, marker='o', linestyle='-', color='blue')
    plt.title(f"Fitness qua c√°c th·∫ø h·ªá (m = {m})")
    plt.xlabel("Th·∫ø h·ªá")
    plt.ylabel("Fitness (Tuy·∫øn d√†i nh·∫•t)")
    plt.grid(True)
    plt.tight_layout()
    plt.show()



=== S·ªë ng∆∞·ªùi (m) = 1 ===


NameError: name 'time' is not defined