# Hello World
pr107 is the only problem so far for those <= 400

In [None]:

import os
import tsplib95
from pathlib import Path
import itertools
import numpy as np
from metric_split import balanced_metric_split
import threading
from concurrent.futures import ThreadPoolExecutor

from utils import is_metric

TSP_LIB_PATH = Path("../../ALL_tsp")
DIMENSION_THRESHOLD = 750
METRIC_THRESHOLD = min(1000, DIMENSION_THRESHOLD)
METRIC_CHECK = True

In [22]:
all_problems : list[tsplib95.models.StandardProblem] = []

for file in sorted(TSP_LIB_PATH.iterdir()): # Loop through every tsp
    if file.suffix != ".tsp" or not file.is_file():
        continue
    problem = tsplib95.load(f"{file.absolute()}")
    if problem.edge_weight_type in ["EXPLICIT"]:
        if problem.dimension > DIMENSION_THRESHOLD:
            continue
        if METRIC_CHECK and not is_metric(problem._create_explicit_matrix().to_numpy(), tol=1e-5):
            continue
        print(f"Adding {problem.name} since it's metric")
    
    all_problems.append(problem)
    print(f"Added {problem.name}")

print("Found", len(all_problems), "euclidean TSPs")

Adding 100001 since it's metric
Added 100001
Adding 10007 since it's metric
Added 10007
Adding 10008 since it's metric
Added 10008
Adding 10010 since it's metric
Added 10010
Adding 11675 since it's metric
Added 11675
Adding 12290 since it's metric
Added 12290
Adding 14850 since it's metric
Added 14850
Adding 15002 since it's metric
Added 15002
Adding 15005 since it's metric
Added 15005
Adding 15007 since it's metric
Added 15007
Adding 16038 since it's metric
Added 16038
Adding 20004 since it's metric
Added 20004
Adding 20007 since it's metric
Added 20007
Adding 20009 since it's metric
Added 20009
Adding 20181 since it's metric
Added 20181
Adding 25001 since it's metric
Added 25001
Adding 25004 since it's metric
Added 25004
Adding 25006 since it's metric
Added 25006
Adding 30001 since it's metric
Added 30001
Adding 30003 since it's metric
Added 30003
Adding 30005 since it's metric
Added 30005
Adding 33001 since it's metric
Added 33001
Adding 35002 since it's metric
Added 35002
Adding 35

In [23]:
def geo_based_dist_matrix(nodes_array, radius: float = 6378.388) -> np.ndarray:
    lat = np.deg2rad(nodes_array[:, 0])
    lon = np.deg2rad(nodes_array[:, 1])

    # Broadcast to pairwise differences
    lat_i = lat[:, None]
    lat_j = lat[None, :]
    lon_i = lon[:, None]
    lon_j = lon[None, :]

    q1 = np.cos(lon_i - lon_j)
    q2 = np.cos(lat_i - lat_j)
    q3 = np.cos(lat_i + lat_j)

    return radius * np.arccos(0.5 * ((1 + q1) * q2 - (1 - q1) * q3))

def convert_to_dist_matrix(problem: tsplib95.models.StandardProblem) -> np.ndarray:
    """Convert a tsplib95 StandardProblem to a distance matrix."""
    n = int(problem.dimension)  # Cast to int to resolve type mismatch
    dist_matrix = np.zeros((n, n))
    nodes_array = np.array(list(problem.node_coords.values()))
    if problem.edge_weight_type in ["EUC_2D", "CEIL_2D", "ATT"]:
        dist_matrix = np.linalg.norm(nodes_array[:, np.newaxis] - nodes_array[np.newaxis, :], axis=2)
        if problem.edge_weight_type == "ATT":
            dist_matrix /= np.sqrt(10)
    elif "GEO" in problem.edge_weight_type:
        dist_matrix = geo_based_dist_matrix(nodes_array)
    elif "EXPLICIT" in problem.edge_weight_type:
        dist_matrix = problem._create_explicit_matrix().to_numpy()
    else:
        raise ValueError(f"Unsupported edge weight type: {problem.edge_weight_type}")
    if "CEIL" in problem.edge_weight_type:
        dist_matrix = np.ceil(dist_matrix)
    return dist_matrix


all_dist_matrices = ((problem.name, convert_to_dist_matrix(problem)) for problem in all_problems if int(problem.dimension) <= DIMENSION_THRESHOLD)

In [24]:
forest_amounts: dict[str, tuple[int, int] | None] = {}

def _compute_split(args: tuple[str, np.ndarray]):
    name, dist_matrix = args
    print(name, dist_matrix.shape[0])
    forest = balanced_metric_split(dist_matrix, s=1.0, k=7, tol=1e-7)
    return name, None if len(forest) == 1 else tuple(len(cluster) for cluster in forest)
    
# Run splits concurrently; order preserved with executor.map
with ThreadPoolExecutor(max_workers=os.process_cpu_count()) as executor:
    for name, result in executor.map(_compute_split, all_dist_matrices):
        forest_amounts[name] = result
#for name, dist_matrix in all_dist_matrices:
#    name, result = _compute_split((name, dist_matrix))
#    forest_amounts[name] = result

100001 10
10007 10
10008 10
10010 10
11675 11
12290 12


1485015002 15
 14
15005 15
15007 15
16038 16
20004 20
20007 20
20009 20
20181 20
25001 25
25004 25
25006 25
30001 30
30003 30
30005 30
33001 33
35002 35
35003 35
35009 35
40003 40
40004 40
40008 40
Tnm100.tsp 100
Tnm103.tsp 103
Tnm106.tsp 106
Tnm109.tsp 109
Tnm112.tsp 112
Tnm115.tsp 115
Tnm118.tsp 118
Tnm121.tsp 121
Tnm124.tsp 124
Tnm127.tsp 127
Tnm130.tsp 130
Tnm133.tsp 133
Tnm136.tsp 136
Tnm139.tsp 139
Tnm142.tsp 142
Tnm145.tsp 145
Tnm148.tsp 148
Tnm151.tsp 151
Tnm154.tsp 154
Tnm157.tsp 157
Tnm160.tsp 160
Tnm163.tsp 163
Tnm166.tsp 166
Tnm169.tsp 169
Tnm172.tsp 172
Tnm175.tsp 175
Tnm178.tsp 178
Tnm181.tsp 181
Tnm184.tsp 184
Tnm187.tsp 187
Tnm190.tsp 190
Tnm193.tsp 193
Tnm196.tsp 196
Tnm199.tsp 199
Tnm52.tsp 52
Tnm55.tspTnm58.tsp 58
 55
Tnm61.tsp 61
Tnm64.tsp 64
Tnm67.tsp 67
Tnm70.tsp 70
Tnm73.tsp 73
Tnm76.tsp 76
Tnm79.tsp 79
Tnm82.tsp 82
Tnm85.tsp 85
Tnm88.tsp 88
Tnm91.tsp 91
Tnm94.tsp 94
Tnm97.tsp 97
a280 280
ali535 535
att48 48
att48_hard 48
att532 532
bayg29 29
bayg29_hard 29
bays2

In [28]:
{name: amt for name, amt in forest_amounts.items() if amt is not None}

{'pr107': (54, 53), 'swiss42_hard': (38, 1, 1, 1, 1)}