# Comparison Setup, loads TSPs and creates pandas dataframe

## Imports

In [22]:
import os

import matplotlib.pyplot as plt
import pandas as pd
import tsplib95
import numpy as np
from multiprocess import Pool
import signal
import tqdm

from wsp import tsp, ds, util # TODO: refine imports

TREE_TYPE = ds.PKPRQuadTree
BF_THRESHOLD = 19
S_FACTOR = 2.0 # NOTE: if this value is changed then the kernel ought to be restarted

## Loading Euclidean TSPs

In [23]:
all_problems : list[tsplib95.models.StandardProblem] = []

for file in sorted(os.listdir("ALL_tsp")): # Loop through every tsp
    if not file.endswith(".tsp"):
        continue
    problem = tsplib95.load(f"ALL_tsp/{file}")
    if problem.edge_weight_type != "EUC_2D": # Skip non-Euclidean TSPs
        continue # TODO: include ATT and GEO and maybe EUC_3D
    
    all_problems.append(problem)
    print(f"Added {problem.name}")

print("Found", len(all_problems), "euclidean TSPs")

Added a280
Added ara238025
Added bby34656
Added bbz25234
Added bch2762
Added bck2217
Added bcl380
Added beg3293
Added berlin52
Added bgb4355
Added bgd4396
Added bgf4475
Added bier127
Added bm33708
Added bna56769
Added bnd7168
Added boa28924
Added brd14051
Added bva2144
Added ca4663
Added ch130
Added ch150
Added ch71009
Added d1291
Added d15112
Added d1655
Added d18512
Added d198
Added d2103
Added d493
Added d657
Added dan59296
Added dbj2924
Added dca1389
Added dcb2086
Added dcc1911
Added dea2382
Added dga9698
Added dhb3386
Added dj38
Added dja1436
Added djb2036
Added djc1785
Added dka1376
Added dkc3938
Added dkd1973
Added dke3097
Added dkf3954
Added dkg813
Added dlb3694
Added eg7146
Added ei8246
Added eil101
Added eil51
Added eil76
Added fdp3256
Added fea5557
Added fht47608
Added fi10639
Added fjr3672
Added fjs3649
Added fl1400
Added fl1577
Added fl3795
Added fl417
Added fma21553
Added fna52057
Added fnb1615
Added fnc19402
Added fnl4461
Added fqm5087
Added fra1488
Added frh19289
Added 

## Setting up data science & Save Default Values into dataframe

In [27]:
# Create a data frame with columns for the name opt solution, nearest neighbor solution, and untouched solution

if os.path.exists("results.pkl"):
    df = pd.read_pickle("results.pkl")
else:
    df = pd.DataFrame(columns=['name', 'num_points', f'wsp_count_{S_FACTOR}', 'opt_len', 'nn_len', 'ls_len', 'ls_q_len', 'untouched_len', 'nwsp5_len'])
    df.name = pd.Series(sorted([problem.name for problem in all_problems]))
    df.set_index("name", inplace = True)
df

Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len,nwsp5_2_len,nwsp5_len,wsp_count_2.0,ls_len,ls_q_len
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
a280,280,2586.769648,3161.003928,4.847061e+03,,,2571,2769.255661,2956.917525
ara238025,238025,,759149.842377,1.176390e+06,,,3032365,,
bby34656,34656,,125383.275407,1.966541e+05,,,412676,,
bbz25234,25234,,87720.344055,1.414553e+05,,,299703,,
bch2762,2762,,10733.617810,1.624823e+04,,,28611,,
...,...,...,...,...,...,...,...,...,...
xua3937,3937,,14177.249409,2.054156e+04,,,42157,,
xva2993,2993,,10942.207468,1.608712e+04,,,32046,,
xvb13584,13584,37774.934821,46586.545388,7.446251e+04,,,160624,,
ym7663,7663,237978.589063,301008.545258,4.588740e+05,,,120650,,


## Running Necessary Math On Each Problem

In [29]:
problems = list(filter(lambda problem: problem.dimension < 2000, all_problems)) # Filter out problems with too many points

# fig, ax = plt.subplots(1, 2, figsize=(12,6))
ax = np.array([None, None])
i = 0
def setup(problem : tsplib95.models.StandardProblem):
    # print(problem.name) # i
    # ax[0].clear()
    # ax[1].clear()
    updates = {}

    points = [ds.Point(*problem.node_coords[i]) for i in problem.get_nodes()]
    
    # if len(points) > 150000: return None # saves time by skipping before creating the tree

    ts_problem : tsp.TravellingSalesmanProblem[TREE_TYPE] = None
    def lazy():
        nonlocal ts_problem
        ts_problem = tsp.TravellingSalesmanProblem[TREE_TYPE](TREE_TYPE, points, ax, s=S_FACTOR) if ts_problem is None else ts_problem

    # MARK: load the dp solution
    def try_load_dp_path(): # REVIEW: hacky
        num_path = None
        if os.path.exists(f"ALL_tsp/{problem.name}.opt.tour"): # If there is an optimal tour file
            opt_problem = tsplib95.load(f"ALL_tsp/{problem.name}.opt.tour")
            assert(len(opt_problem.tours) == 1)
            num_path = opt_problem.tours[0]
        if os.path.exists(f"ALL_tsp/{problem.name}.tour"): # If there is an optimal tour file
            opt_problem = tsplib95.load(f"ALL_tsp/{problem.name}.tour")
            assert(len(opt_problem.tours) == 1)
            num_path = opt_problem.tours[0]
        elif len(problem.tours) > 0:
            assert len(problem.tours) == 1
            num_path = problem.tours[0] # NOTE: I do not know of any euclidean which has this property
            raise ValueError("A shooting star")
        if num_path is not None:
            path = [points[i-1] for i in num_path] + [points[num_path[0] - 1]] # NOTE: ts_problem.points are reordered use the ordering from the files
            lazy()
            ts_problem.dp_path = (path, util.calc_dist(path), None)
            # ts_problem.draw_tour(ts_problem.dp_path[0], '#FFC0CB')
        elif len(points) <= BF_THRESHOLD:
            print("Brute forcing", problem.name)
            lazy()
            ts_problem.dp_alt_path = ts_problem.dp_path # Load the property
        else:
            return False
        return True

    # MARK: Populate row values
    if problem.name not in df.index or pd.isna(df.loc[problem.name, "num_points"]):
        lazy()
        assert len(ts_problem.points) == len(points) and len(points) > 2, "Invalid points"
        updates["num_points"] = len(ts_problem.points)
    if pd.isna(df.loc[problem.name, "opt_len"]):
        if try_load_dp_path():
            assert ts_problem.check_tour(ts_problem.dp_path[0]), "Invalid tour"
            updates["opt_len"] = ts_problem.dp_path[1]
    if pd.isna(df.loc[problem.name, "nn_len"]):
        lazy()
        assert ts_problem.check_tour(ts_problem.nnn_path[0]), "Invalid tour"
        updates["nn_len"] = ts_problem.nnn_path[1]
    if pd.isna(df.loc[problem.name, "untouched_len"]):
        lazy()
        assert ts_problem.check_tour(ts_problem.untouched_path[0]), "Invalid tour"
        updates["untouched_len"] = ts_problem.untouched_path[1]
    # if pd.isna(df.loc[problem.name, "nwsp5_len"]) and "dp_path" in ts_problem.__dict__:
    #     lazy()
    #     assert ts_problem.check_tour(ts_problem.nwsp_path(5)[0]), "Invalid path"
    #     df.loc[problem.name, "nwsp5_len"] = ts_problem.nwsp_path(5)[1]
    # if pd.isna(df.loc[problem.name, "ls_len"]):
    #     lazy()
    #     assert ts_problem.check_tour(ts_problem.local_search_path[0]), "Invalid tour"
    #     updates["ls_len"] = ts_problem.local_search_path[1]
        
    #     num_tour = ts_problem.point_tour_to_ids(ts_problem.local_search_path[0], offset_add=1)[:-1]
    #     tsplib95.models.StandardProblem(
    #         name=problem.name, 
    #         comment=f"Local search tour for {problem.name} ({ts_problem.quick_local_search_path[1]})",
    #         type="TOUR", 
    #         dimension=problem.dimension, 
    #         tours=[num_tour]
    #     ).save(f"SUPP_tsp/{problem.name}.ls.tour")

    if pd.isna(df.loc[problem.name, "ls_q_len"]):
        lazy()
        assert ts_problem.check_tour(ts_problem.quick_local_search_path[0]), "Invalid tour"
        updates["ls_q_len"] = ts_problem.quick_local_search_path[1]
        #save the tour to a file
        num_tour = ts_problem.point_tour_to_ids(ts_problem.quick_local_search_path[0], offset_add=1)[:-1]
        tsplib95.models.StandardProblem(
            name=problem.name, 
            comment=f"Quick (5min) local search tour for {problem.name} ({ts_problem.quick_local_search_path[1]})",
            type="TOUR", 
            dimension=problem.dimension, 
            tours=[num_tour]
        ).save(f"SUPP_tsp/{problem.name}.lsq.tour")
            
    if pd.isna(df.loc[problem.name, f"wsp_count_{S_FACTOR}"]):
        lazy()
        assert len(ts_problem.wspd) > 0, "Invalid wspd"
        updates[f"wsp_count_{S_FACTOR}"] = len(ts_problem.wspd)
    
    # i += 1
    # if updates != {}:
    #     print(problem.name)
    
    return {problem.name: updates}

original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
pool = Pool(8)
signal.signal(signal.SIGINT, original_sigint_handler)
update_collection = []
u = 0
pf = {"updated": 0}
try:
    with tqdm.tqdm(total=len(problems), unit="problem", postfix=pf) as pbar:
        for result in pool.imap_unordered(setup, problems):
            update_collection.append(result)
            pbar.update(1)
            prob_name = next(iter(result))
            if result[prob_name] != {}:
                u += 1
                pbar.set_postfix(pf := {**pf, **{"last": prob_name, "last_updated": prob_name, "updated": u}})
            else:
                pbar.set_postfix(pf := {**pf, **{"last": prob_name}})
except KeyboardInterrupt:
    print("Caught KeyboardInterrupt, terminating workers")
    pool.terminate()
else:
    pool.close()
pool.join()

print(update_collection)

for update in update_collection:
    if update is None: continue
    for key, value in update.items():
        df.loc[key, value.keys()] = value.values()
     
print("done") # TODO: use progress instead of prints

 17%|█▋        | 16/96 [11:58<59:52, 44.91s/problem, updated=2, last=d1291, last_updated=d1291]      

Caught KeyboardInterrupt, terminating workers
[{'a280': {}}, {'bcl380': {}}, {'berlin52': {}}, {'bier127': {}}, {'ch130': {}}, {'ch150': {}}, {'d198': {}}, {'d493': {}}, {'d657': {}}, {'dj38': {}}, {'dca1389': {'ls_len': 5645.432501955452, 'ls_q_len': 5924.37473972435}}, {'dkg813': {}}, {'eil101': {}}, {'eil51': {}}, {'eil76': {}}, {'d1291': {'ls_len': 55660.70758243624, 'ls_q_len': 57847.595311815676}}]
done





In [30]:
# df.iloc[-10:, :]
# df = df.replace({pd.NA: np.nan})
# df["wsp_count_2.0"] = df["wsp_count_2.0"].astype(pd.Int64Dtype())
print((~df.isna()).sum())
dfa = df[~df['ls_len'].isna()]
print((dfa['ls_len'] <= dfa['ls_q_len']).sum())
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):
#     display(dfa)
dfa

num_points       199
opt_len           86
nn_len           198
untouched_len    198
nwsp5_2_len       65
nwsp5_len         17
wsp_count_2.0    198
ls_len            70
ls_q_len          70
dtype: int64
70


Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len,nwsp5_2_len,nwsp5_len,wsp_count_2.0,ls_len,ls_q_len
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
a280,280,2586.769648,3161.003928,4847.061413,,,2571,2769.255661,2956.917525
bcl380,380,1653.191022,2011.585726,2806.570037,,,3207,1815.691437,1960.842922
berlin52,52,7544.365902,9194.130643,11689.756586,22846.619539,11728.735582,376,8145.075963,8492.201855
bier127,127,,139602.236419,184430.885477,314224.537324,,1129,123856.138025,124909.992262
ch130,130,6110.860950,7378.680775,10586.047059,20993.789501,17099.212197,1097,6406.879644,7055.184224
...,...,...,...,...,...,...,...,...,...
wi29,29,27601.173774,36388.059232,38076.849514,,,97,28735.599716,29035.512080
xqf131,131,567.202932,700.981976,1122.194500,,,1307,607.628878,646.447972
xqg237,237,1031.063508,1273.325466,2124.696785,,,2436,1144.278000,1171.833942
xql662,662,2555.557248,3285.548574,5759.792739,,,8236,2760.724402,2956.148712


In [32]:
df.to_pickle('results.pkl')