# Comparison Setup, loads TSPs and creates pandas dataframe

Ensure that TSP is setup correctly before running (ie no commented important code and correct pointer of distance matrix)

## Imports

In [1]:
import os

import matplotlib.pyplot as plt
import pandas as pd
import tsplib95
import numpy as np
from multiprocess import Pool
import signal
import tqdm

import perspective

from wsp import tsp, ds, util # TODO: refine imports

TREE_TYPE = ds.PKPRQuadTree
BF_THRESHOLD = 19
S_FACTOR = 2.0 # NOTE: if this value is changed then the kernel ought to be restarted

## Loading Euclidean TSPs

In [2]:
all_problems : list[tsplib95.models.StandardProblem] = []

for file in sorted(os.listdir("ALL_tsp")): # Loop through every tsp
    if not file.endswith(".tsp"):
        continue
    problem = tsplib95.load(f"ALL_tsp/{file}")
    if problem.edge_weight_type != "EUC_2D": # Skip non-Euclidean TSPs
        continue # TODO: include ATT and GEO and maybe EUC_3D
    
    all_problems.append(problem)
    print(f"Added {problem.name}")

print("Found", len(all_problems), "euclidean TSPs")

Added Tnm100.tsp
Added Tnm103.tsp
Added Tnm106.tsp
Added Tnm109.tsp
Added Tnm112.tsp
Added Tnm115.tsp
Added Tnm118.tsp
Added Tnm121.tsp
Added Tnm124.tsp
Added Tnm127.tsp
Added Tnm130.tsp
Added Tnm133.tsp
Added Tnm136.tsp
Added Tnm139.tsp
Added Tnm142.tsp
Added Tnm145.tsp
Added Tnm148.tsp
Added Tnm151.tsp
Added Tnm154.tsp
Added Tnm157.tsp
Added Tnm160.tsp
Added Tnm163.tsp
Added Tnm166.tsp
Added Tnm169.tsp
Added Tnm172.tsp
Added Tnm175.tsp
Added Tnm178.tsp
Added Tnm181.tsp
Added Tnm184.tsp
Added Tnm187.tsp
Added Tnm190.tsp
Added Tnm193.tsp
Added Tnm196.tsp
Added Tnm199.tsp
Added Tnm52.tsp
Added Tnm55.tsp
Added Tnm58.tsp
Added Tnm61.tsp
Added Tnm64.tsp
Added Tnm67.tsp
Added Tnm70.tsp
Added Tnm73.tsp
Added Tnm76.tsp
Added Tnm79.tsp
Added Tnm82.tsp
Added Tnm85.tsp
Added Tnm88.tsp
Added Tnm91.tsp
Added Tnm94.tsp
Added Tnm97.tsp
Added a280
Added ara238025
Added bby34656
Added bbz25234
Added bch2762
Added bck2217
Added bcl380
Added beg3293
Added berlin52
Added bgb4355
Added bgd4396
Added bgf44

## Setting up data science & Save Default Values into dataframe

In [3]:
# Create a data frame with columns for the name opt solution, nearest neighbor solution, and untouched solution

if os.path.exists("results.pkl"):
    df = pd.read_pickle("results.pkl")
else:
    df = pd.DataFrame(columns=['name', 'num_points', f'wsp_count_{S_FACTOR}', 'opt_len', 'nn_len', 'ls_len', 'ls_q_len', 'untouched_len', 'nwsp5_len'])
    df.name = pd.Series(sorted([problem.name for problem in all_problems]))
    df.set_index("name", inplace = True)

print("null percents", df.isnull().sum() * 100 / len(df), sep="\n")
df

null percents
num_points        0.000000
opt_len          29.844961
nn_len            0.775194
untouched_len     0.000000
nwsp5_2_len      74.806202
nwsp5_len        93.410853
wsp_count_2.0     0.000000
ls_len           70.930233
ls_q_len         70.930233
dtype: float64


Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len,nwsp5_2_len,nwsp5_len,wsp_count_2.0,ls_len,ls_q_len
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
a280,280,2.586770e+03,3.161004e+03,4.847061e+03,,,2571,2769.255661,2956.917525
ara238025,238025,,7.591498e+05,1.176390e+06,,,3032365,,
bby34656,34656,,1.253833e+05,1.966541e+05,,,412676,,
bbz25234,25234,,8.772034e+04,1.414553e+05,,,299703,,
bch2762,2762,,1.073362e+04,1.624823e+04,,,28611,,
...,...,...,...,...,...,...,...,...,...
Tnm85,85,1.117368e+06,1.257389e+06,1.143585e+07,,,463,,
Tnm88,88,1.172726e+06,1.320711e+06,1.243584e+07,,,475,,
Tnm91,91,1.228712e+06,1.338013e+06,1.347724e+07,,,461,,
Tnm94,94,1.285408e+06,1.396057e+06,1.456005e+07,,,507,,


## Running Necessary Math On Each Problem

In [6]:
problems = list(filter(lambda problem: problem.dimension < 250, all_problems)) # Filter out problems with too many points

# fig, ax = plt.subplots(1, 2, figsize=(12,6))
ax = np.array([None, None])
i = 0
def setup(problem : tsplib95.models.StandardProblem):
    problem.name = problem.name.removesuffix(".tsp")
    # print(problem.name) # i
    # ax[0].clear()
    # ax[1].clear()
    updates = {}

    points = [ds.Point(*problem.node_coords[i]) for i in problem.get_nodes()]
    
    # if len(points) > 150000: return None # saves time by skipping before creating the tree

    ts_problem : tsp.TravellingSalesmanProblem[TREE_TYPE] = None
    def lazy():
        """Loading in some of the problems is expensive so we should only do so if we are actually doing work"""
        nonlocal ts_problem
        ts_problem = tsp.TravellingSalesmanProblem[TREE_TYPE](TREE_TYPE, points, ax, s=S_FACTOR) if ts_problem is None else ts_problem

    # MARK: load the dp solution
    def try_load_dp_path(): # REVIEW: hacky
        num_path = None
        if os.path.exists(f"ALL_tsp/{problem.name}.opt.tour"): # If there is an optimal tour file
            try:
                opt_problem = tsplib95.load(f"ALL_tsp/{problem.name}.opt.tour")
            except:
                raise ValueError(f"Failed to load {problem.name} tour")
            assert(len(opt_problem.tours) == 1)
            if len(opt_problem.tours[0]) != problem.dimension:
                print(f"Invalid tour {problem.name}")
            assert(len(opt_problem.tours[0]) == problem.dimension, f"Invalid tour {problem.name}")
            num_path = opt_problem.tours[0]
        if os.path.exists(f"ALL_tsp/{problem.name}.tour"): # If there is an optimal tour file
            try:
                non_opt_problem = tsplib95.load(f"ALL_tsp/{problem.name}.tour")
            except:
                raise ValueError(f"Failed to load {problem.name} tour")
            assert(len(non_opt_problem.tours) == 1)
            assert(len(non_opt_problem.tours[0]) == problem.dimension)
            num_path = non_opt_problem.tours[0]
        elif len(problem.tours) > 0:
            assert len(problem.tours) == 1
            num_path = problem.tours[0] # NOTE: I do not know of any euclidean which has this property
            raise ValueError("A shooting star")
        if num_path is not None:
            try:
                path = [points[i-1] for i in num_path] + [points[num_path[0] - 1]] # NOTE: ts_problem.points are reordered use the ordering from the files
            except IndexError:
                print(f"Invalid tour indexes {problem.name}")
                raise ValueError("Bad touring")
            lazy()
            ts_problem.dp_path = (path, ts_problem.calc_dist(path), None)
            # ts_problem.draw_tour(ts_problem.dp_path[0], '#FFC0CB')
        elif len(points) <= BF_THRESHOLD:
            print("Brute forcing", problem.name)
            lazy()
            ts_problem.dp_alt_path = ts_problem.dp_path # Load the property
        else:
            return False
        return True
    
    new_problem = problem.name not in df.index
    if new_problem:
        print("New problem " + problem.name)

    # MARK: Populate row values
    if new_problem or pd.isna(df.loc[problem.name, "num_points"]):
        lazy()
        assert len(ts_problem.points) == len(points) and len(points) > 2, "Invalid points"
        updates["num_points"] = len(ts_problem.points)
    if new_problem or pd.isna(df.loc[problem.name, "opt_len"]):
        if try_load_dp_path():
            assert ts_problem.check_tour(ts_problem.dp_path[0]), "Invalid tour"
            updates["opt_len"] = ts_problem.dp_path[1]
    if new_problem or pd.isna(df.loc[problem.name, "nn_len"]):
        lazy()
        assert ts_problem.check_tour(ts_problem.nnn_path[0]), "Invalid tour"
        updates["nn_len"] = ts_problem.nnn_path[1]
    if new_problem or pd.isna(df.loc[problem.name, "untouched_len"]):
        lazy()
        assert ts_problem.check_tour(ts_problem.untouched_path[0]), "Invalid tour"
        updates["untouched_len"] = ts_problem.untouched_path[1]
    # if new_problem or pd.isna(df.loc[problem.name, "nwsp5_len"]) and "dp_path" in ts_problem.__dict__:
    #     lazy()
    #     assert ts_problem.check_tour(ts_problem.nwsp_path(5)[0]), "Invalid path"
    #     df.loc[problem.name, "nwsp5_len"] = ts_problem.nwsp_path(5)[1]
    # if new_problem or pd.isna(df.loc[problem.name, "ls_len"]):
    #     lazy()x
    #     assert ts_problem.check_tour(ts_problem.local_search_path[0]), "Invalid tour"
    #     updates["ls_len"] = ts_problem.local_search_path[1]
    #     # save the tour to a file
    #     num_tour = ts_problem.point_tour_to_ids(ts_problem.local_search_path[0], offset_add=1)[:-1]
    #     tsplib95.models.StandardProblem(
    #         name=problem.name, 
    #         comment=f"Local search tour for {problem.name} ({ts_problem.quick_local_search_path[1]})",
    #         type="TOUR", 
    #         dimension=problem.dimension, 
    #         tours=[num_tour]
    #     ).save(f"SUPP_tsp/{problem.name}.ls.tour")

    # if new_problem or pd.isna(df.loc[problem.name, "ls_q_len"]):
    #     lazy()
    #     assert ts_problem.check_tour(ts_problem.quick_local_search_path[0]), "Invalid tour"
    #     updates["ls_q_len"] = ts_problem.quick_local_search_path[1]
    #     #save the tour to a file
    #     num_tour = ts_problem.point_tour_to_ids(ts_problem.quick_local_search_path[0], offset_add=1)[:-1]
    #     tsplib95.models.StandardProblem(
    #         name=problem.name, 
    #         comment=f"Quick (5min) local search tour for {problem.name} ({ts_problem.quick_local_search_path[1]})",
    #         type="TOUR", 
    #         dimension=problem.dimension, 
    #         tours=[num_tour]
    #     ).save(f"SUPP_tsp/{problem.name}.lsq.tour")
            
    if new_problem or pd.isna(df.loc[problem.name, f"wsp_count_{S_FACTOR}"]):
        lazy()
        assert len(ts_problem.wspd) > 0, "Invalid wspd"
        updates[f"wsp_count_{S_FACTOR}"] = len(ts_problem.wspd)
    
    # i += 1
    # if updates != {}:
    #     print(problem.name)
    
    return {problem.name: updates}

original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
pool = Pool(6) # NUMBER OF PROCESSES
signal.signal(signal.SIGINT, original_sigint_handler)
update_collection = []
u = 0
pf = {"updated": 0}
try:
    with tqdm.tqdm(total=len(problems), unit="problem", postfix=pf) as pbar:
        for result in pool.imap_unordered(setup, problems):
            update_collection.append(result)
            pbar.update(1)
            prob_name = next(iter(result))
            if result[prob_name] != {}:
                u += 1
                pbar.set_postfix(pf := {**pf, **{"last": prob_name, "last_updated": prob_name, "updated": u}})
            else:
                pbar.set_postfix(pf := {**pf, **{"last": prob_name}})
except KeyboardInterrupt:
    print("Caught KeyboardInterrupt, terminating workers")
    pool.terminate()
else:
    pool.close()
pool.join()

print([x for x in update_collection if x is not None and x != {}])

for update in update_collection:
    if update is None: continue
    for key, value in update.items():
        df.loc[key, value.keys()] = value.values()
     
print("done") # TODO: use progress instead of prints

  assert(len(opt_problem.tours[0]) == problem.dimension, f"Invalid tour {problem.name}")
100%|██████████| 87/87 [00:00<00:00, 789.65problem/s, updated=21, last=xqg237, last_updated=Tnm199]  

[{'Tnm100': {}}, {'Tnm103': {}}, {'Tnm106': {}}, {'Tnm109': {}}, {'Tnm118': {}}, {'Tnm112': {}}, {'Tnm121': {}}, {'Tnm115': {}}, {'Tnm124': {}}, {'Tnm127': {}}, {'Tnm130': {}}, {'Tnm133': {}}, {'Tnm136': {}}, {'Tnm139': {'opt_len': 2060620.872593101}}, {'Tnm142': {'opt_len': 2118748.3683304107}}, {'Tnm145': {'opt_len': 2177155.262259658}}, {'Tnm148': {'opt_len': 2235651.987522812}}, {'Tnm151': {'opt_len': 2293242.80656263}}, {'Tnm154': {'opt_len': 2350321.803848672}}, {'Tnm157': {'opt_len': 2407134.9902226212}}, {'Tnm160': {'opt_len': 2463834.0929175094}}, {'Tnm163': {'opt_len': 2485445.0015514353}}, {'Tnm166': {'opt_len': 2543442.945751582}}, {'Tnm169': {'opt_len': 2600519.2156886505}}, {'Tnm172': {'opt_len': 2657345.57638297}}, {'Tnm175': {'opt_len': 2714512.6655559083}}, {'Tnm178': {'opt_len': 2771934.1172950836}}, {'Tnm181': {'opt_len': 2829677.1458229613}}, {'Tnm184': {'opt_len': 2887653.144036206}}, {'Tnm187': {'opt_len': 2945915.29215473}}, {'Tnm52': {}}, {'Tnm55': {}}, {'Tnm190




In [9]:
# df.iloc[-10:, :]
# df = df.replace({pd.NA: np.nan})
# df["wsp_count_2.0"] = df["wsp_count_2.0"].astype(pd.Int64Dtype())
print((~df.isna()).sum())
dfa = df[~df['ls_len'].isna()]
print((dfa['ls_len'] <= dfa['ls_q_len']).sum())
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):
#     display(dfa)

# perspective.Table(df)
df.loc['Tnm199']

num_points       258
opt_len          202
nn_len           256
untouched_len    258
nwsp5_2_len       65
nwsp5_len         17
wsp_count_2.0    258
ls_len            75
ls_q_len          75
dtype: int64
75


num_points                 199.0
opt_len            3139756.32129
nn_len            3342749.890861
untouched_len    71960899.177624
nwsp5_2_len                 <NA>
nwsp5_len                   <NA>
wsp_count_2.0             1092.0
ls_len                      <NA>
ls_q_len                    <NA>
Name: Tnm199, dtype: Float64

In [10]:
df.to_pickle('results.pkl')