In [1]:
import numpy as np
import random
import logging
import pandas as pd
from geopy.distance import geodesic
from joblib import Parallel, delayed

# Set up logging
logging.basicConfig(level=logging.INFO)

Load data and initialize the population

In [2]:
# Load city data and calculate the distance matrix
def load_cities_and_distance_matrix(file_path):
    cities = pd.read_csv(file_path, header=None, names=['name', 'lat', 'lon'])
    num_cities = len(cities)
    
    # Initialize an empty distance matrix
    dist_matrix = np.zeros((num_cities, num_cities))
    
    # Calculate distances using geopy
    for i in range(num_cities):
        for j in range(i + 1, num_cities):
            dist = geodesic((cities.iloc[i].lat, cities.iloc[i].lon), (cities.iloc[j].lat, cities.iloc[j].lon)).km
            dist_matrix[i, j] = dist_matrix[j, i] = dist
    
    return cities, dist_matrix

# Calculate the total cost of the TSP path
def tsp_cost(tsp, dist_matrix):
    cost = 0
    for i in range(len(tsp) - 1):
        cost += dist_matrix[tsp[i], tsp[i + 1]]
    return cost


# Generate an initial population of random paths
def generate_initial_population(size, num_cities):
    population = []
    for _ in range(size):
        tour = np.random.permutation(range(1, num_cities)).tolist()
        population.append([0] + tour + [0])  # Ensure start and end at city 0
    return population

#First and faster Greedy algorithm approach(Nearest Neigbour)

In [3]:
# Nearest Neighbor (Greedy) Algorithm
def nearest_neighbor(cities, dist_matrix):
    num_cities = len(cities)
    visited = np.full(num_cities, False)
    city = 0  # Start at the first city
    visited[city] = True
    tsp = [city]

    while len(tsp) < num_cities:
        next_city = np.argmin([dist_matrix[city, i] if not visited[i] else float('inf') for i in range(num_cities)])
        visited[next_city] = True
        tsp.append(next_city)
        city = next_city

    tsp.append(tsp[0])  # Return to the start
    return tsp
#main function

#Slower but more accurate: genetic approach with mutation

In [4]:
# Simplified Swap Mutation
def swap_mutation(path):
    new_path = path[1:-1]  # Don't mutate start/end cities
    i, j = random.sample(range(len(new_path)), 2)
    new_path[i], new_path[j] = new_path[j], new_path[i]
    return [0] + new_path + [0]  # Ensure path starts and ends at city 0

# Genetic Algorithm
def genetic_algorithm(cities, dist_matrix, population_size=50, generations=200, elitism_size=2, early_stopping=10):
    # Initialize population with random paths
    population = generate_initial_population(population_size, len(cities))
    best_cost = float('inf')
    no_improvement = 0

    for generation in range(generations):
        # Evaluate the fitness of the population
        costs = [tsp_cost(individual, dist_matrix) for individual in population]
        population = [x for _, x in sorted(zip(costs, population), key=lambda x: x[0])]
        
        # Elitism: Keep the best individuals
        population = population[:population_size - elitism_size]

        # Select parents and generate children
        children = []
        for _ in range(population_size // 2):
            parents = random.sample(population, 2)
            # Simple crossover (just swap segments)
            cut1, cut2 = sorted(random.sample(range(1, len(parents[0]) - 1), 2))
            child1 = parents[0][:cut1] + parents[1][cut1:cut2] + parents[0][cut2:]
            child2 = parents[1][:cut1] + parents[0][cut1:cut2] + parents[1][cut2:]

            if random.random() < 0.5:
                child1 = swap_mutation(child1)
            if random.random() < 0.5:
                child2 = swap_mutation(child2)
            
            children.extend([child1, child2])
        
        # Add the children to the population
        population.extend(children)
        
        # Keep the best elitism individuals
        population.extend(sorted(population, key=lambda x: tsp_cost(x, dist_matrix))[:elitism_size])

        # Early stopping if no improvement
        current_best_cost = tsp_cost(population[0], dist_matrix)
        if current_best_cost < best_cost:
            best_cost = current_best_cost
            no_improvement = 0
        else:
            no_improvement += 1
        
        if no_improvement >= early_stopping:
            logging.info(f"Early stopping at generation {generation}")
            break

        # Logging progress
        if generation % 20 == 0:
            logging.info(f"Generation {generation}: Best Cost = {best_cost:.2f} km")

    return population[0]

Solving the tsp

In [5]:
def solve_tsp(file_path):
    # Load cities and distance matrix
    cities, dist_matrix = load_cities_and_distance_matrix(file_path)
    
    # Run Greedy Algorithm
    greedy_path = nearest_neighbor(cities, dist_matrix)
    greedy_cost = tsp_cost(greedy_path, dist_matrix)
    
    logging.info(f" Cost: {greedy_cost:.2f} km")
    
    # Run Steady-State Evolutionary Algorithm
    genetic_path = genetic_algorithm(cities, dist_matrix)
    genetic_cost = tsp_cost(genetic_path, dist_matrix)
    
    logging.info(f"Cost: {genetic_cost:.2f} km")


# Example run (use your own file path)
if __name__ == "__main__":
    solve_tsp("/Users/nikolastankovic/Downloads/vanuatu.csv")
    #solve_tsp("/Users/nikolastankovic/Downloads/italy.csv")
    #solve_tsp("/Users/nikolastankovic/Downloads/russia.csv")
    #solve_tsp("/Users/nikolastankovic/Downloads/us.csv")
    #solve_tsp("/Users/nikolastankovic/Downloads/china.csv")

INFO:root: Cost: 1475.53 km
INFO:root:Generation 0: Best Cost = 1494.39 km
INFO:root:Generation 20: Best Cost = 859.46 km
INFO:root:Early stopping at generation 22
INFO:root:Cost: 859.46 km
