# Hello World

In [2]:
import tsplib95
from pathlib import Path
import itertools
import numpy as np
from metric_split import balanced_metric_split
import threading
from concurrent.futures import ThreadPoolExecutor

TSP_LIB_PATH = Path("../../ALL_tsp")
DIMENSION_THRESHOLD = 150

In [3]:
all_problems : list[tsplib95.models.StandardProblem] = []

for file in sorted(TSP_LIB_PATH.iterdir()): # Loop through every tsp
    if file.suffix != ".tsp" or not file.is_file():
        continue
    problem = tsplib95.load(f"{file.absolute()}")
    if problem.edge_weight_type in ["EXPLICIT"]:
        continue # TODO: include ATT and GEO and maybe EUC_3D
    
    all_problems.append(problem)
    print(f"Added {problem.name}")

print("Found", len(all_problems), "euclidean TSPs")

Added Tnm100.tsp
Added Tnm103.tsp
Added Tnm106.tsp
Added Tnm109.tsp
Added Tnm112.tsp
Added Tnm115.tsp
Added Tnm118.tsp
Added Tnm121.tsp
Added Tnm124.tsp
Added Tnm127.tsp
Added Tnm130.tsp
Added Tnm133.tsp
Added Tnm136.tsp
Added Tnm139.tsp
Added Tnm142.tsp
Added Tnm145.tsp
Added Tnm148.tsp
Added Tnm151.tsp
Added Tnm154.tsp
Added Tnm157.tsp
Added Tnm160.tsp
Added Tnm163.tsp
Added Tnm166.tsp
Added Tnm169.tsp
Added Tnm172.tsp
Added Tnm175.tsp
Added Tnm178.tsp
Added Tnm181.tsp
Added Tnm184.tsp
Added Tnm187.tsp
Added Tnm190.tsp
Added Tnm193.tsp
Added Tnm196.tsp
Added Tnm199.tsp
Added Tnm52.tsp
Added Tnm55.tsp
Added Tnm58.tsp
Added Tnm61.tsp
Added Tnm64.tsp
Added Tnm67.tsp
Added Tnm70.tsp
Added Tnm73.tsp
Added Tnm76.tsp
Added Tnm79.tsp
Added Tnm82.tsp
Added Tnm85.tsp
Added Tnm88.tsp
Added Tnm91.tsp
Added Tnm94.tsp
Added Tnm97.tsp
Added a280
Added ali535
Added ara238025
Added att48
Added att532
Added bby34656
Added bbz25234
Added bch2762
Added bck2217
Added bcl380
Added beg3293
Added berlin52
A

In [19]:
# TEST
def geo_based_dist_matrix(nodes_array, radius: float = 6378.388) -> np.ndarray:
    lat = np.deg2rad(nodes_array[:, 0])
    lon = np.deg2rad(nodes_array[:, 1])

    # Broadcast to pairwise differences
    lat_i = lat[:, None]
    lat_j = lat[None, :]
    lon_i = lon[:, None]
    lon_j = lon[None, :]

    q1 = np.cos(lon_i - lon_j)
    q2 = np.cos(lat_i - lat_j)
    q3 = np.cos(lat_i + lat_j)

    return radius * np.arccos(0.5 * ((1 + q1) * q2 - (1 - q1) * q3))

def convert_to_dist_matrix(problem: tsplib95.models.StandardProblem) -> np.ndarray:
    """Convert a tsplib95 StandardProblem to a distance matrix."""
    n = int(problem.dimension)  # Cast to int to resolve type mismatch
    dist_matrix = np.zeros((n, n))
    nodes_array = np.array(list(problem.node_coords.values()))
    if problem.edge_weight_type in ["EUC_2D", "CEIL_2D", "ATT"]:
        dist_matrix = np.linalg.norm(nodes_array[:, np.newaxis] - nodes_array[np.newaxis, :], axis=2)
        if problem.edge_weight_type == "ATT":
            dist_matrix /= np.sqrt(10)
    elif "GEO" in problem.edge_weight_type:
        dist_matrix = geo_based_dist_matrix(nodes_array)
    else:
        raise ValueError(f"Unsupported edge weight type: {problem.edge_weight_type}")
    if "CEIL" in problem.edge_weight_type:
        dist_matrix = np.ceil(dist_matrix)
    return dist_matrix


all_dist_matrices = ((problem.name, convert_to_dist_matrix(problem)) for problem in all_problems if int(problem.dimension) <= DIMENSION_THRESHOLD)

In [20]:
forest_amounts: list[tuple[int, int] | None] = []

def _compute_split(args: tuple[str, np.ndarray]):
    name, dist_matrix = args
    print(name, dist_matrix.shape[0])
    forest = balanced_metric_split(dist_matrix, s=1.0, k=13)
    return None if len(forest) == 1 else tuple(len(cluster) for cluster in forest)
    
# Run splits concurrently; order preserved with executor.map
with ThreadPoolExecutor() as executor:
    for result in executor.map(_compute_split, all_dist_matrices):
        forest_amounts.append(result)

Tnm100.tsp 100
Tnm103.tsp 103
Tnm106.tsp 106
Tnm109.tsp 109
Tnm112.tsp 112
Tnm115.tsp 115
Tnm118.tsp 118
Tnm121.tsp 121
Tnm124.tsp 124
Tnm127.tsp 127
Tnm130.tsp 130
Tnm133.tsp 133
Tnm136.tsp 136
Tnm139.tsp 139
Tnm142.tsp 142
Tnm145.tsp 145
Tnm148.tsp 148
Tnm52.tsp 52
Tnm55.tsp 55
Tnm58.tsp 58
Tnm61.tsp 61
Tnm64.tspTnm67.tsp 67
 64
Tnm70.tsp 70
Tnm73.tsp 73
Tnm76.tsp 76
Tnm79.tsp 79
Tnm82.tsp 82
Tnm85.tsp 85
Tnm88.tsp 88
Tnm91.tsp 91
Tnm94.tsp 94
Tnm97.tsp 97
att48 48
berlin52 52
bier127 127
burma14 14
ch130 130
ch150dj38 38
 150
eil101 101
eil51 51
eil76gr137 137
 76
gr96 96
kroA100 100
kroA150 150
kroB100 100
kroB150 150
kroC100 100
kroD100 100
kroE100 100
lin105 105
pr107 107
pr124 124
pr136 136
pr144 144
pr76 76
rat99 99
rd100 100
st70 70
ulysses16.tsp 16
ulysses22.tsp 22
wi29 29
xqf131 131


In [21]:
forest_amounts

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 (1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1),
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 (54, 53),
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 (2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1),
 (2, 2, 2, 1, 2, 1, 4, 1, 3, 1, 1, 1, 1),
 None,
 None]