# Comparison Setup, loads TSPs and creates pandas dataframe

## Imports

In [53]:
import os

import matplotlib.pyplot as plt
import pandas as pd
import tsplib95
import numpy as np

from wsp import tsp, ds, util # TODO: refine imports

TREE_TYPE = ds.PKPRQuadTree
BF_THRESHOLD = 14
S_FACTOR = 2.0 # NOTE: if this value is changed then the kernel ought to be restarted

## Loading Euclidean TSPs

In [54]:
problems : list[tsplib95.models.StandardProblem] = []

for file in sorted(os.listdir("ALL_tsp")): # Loop through every tsp
    if not file.endswith(".tsp"):
        continue
    problem = tsplib95.load(f"ALL_tsp/{file}")
    if problem.edge_weight_type != "EUC_2D": # Skip non-Euclidean TSPs
        continue # TODO: include ATT and GEO and maybe EUC_3D
    
    problems.append(problem)
    print(f"Added {problem.name}")

print("Found", len(problems), "euclidean TSPs")

Added a280
Added ara238025
Added bby34656
Added bbz25234
Added bch2762
Added bck2217
Added bcl380
Added beg3293
Added berlin52
Added bgb4355
Added bgd4396
Added bgf4475
Added bier127
Added bm33708
Added bna56769
Added bnd7168
Added boa28924
Added brd14051
Added bva2144
Added ca4663
Added ch130
Added ch150
Added ch71009
Added d1291
Added d15112
Added d1655
Added d18512
Added d198
Added d2103
Added d493
Added d657
Added dan59296
Added dbj2924
Added dca1389
Added dcb2086
Added dcc1911
Added dea2382
Added dga9698
Added dhb3386
Added dj38
Added dja1436
Added djb2036
Added djc1785
Added dka1376
Added dkc3938
Added dkd1973
Added dke3097
Added dkf3954
Added dkg813
Added dlb3694
Added eg7146
Added ei8246
Added eil101
Added eil51
Added eil76
Added fdp3256
Added fea5557
Added fht47608
Added fi10639
Added fjr3672
Added fjs3649
Added fl1400
Added fl1577
Added fl3795
Added fl417
Added fma21553
Added fna52057
Added fnb1615
Added fnc19402
Added fnl4461
Added fqm5087
Added fra1488
Added frh19289
Added 

## Setting up data science & Save Default Values into dataframe

In [55]:
# Create a data frame with columns for the name opt solution, nearest neighbor solution, and untouched solution

if os.path.exists("results.pkl"):
    df = pd.read_pickle("results.pkl")
else:
    df = pd.DataFrame(columns=['name', 'num_points', f'wsp_count_{S_FACTOR}', 'opt_len', 'nn_len', 'untouched_len', 'nwsp5_len', 'nwsp7_len', 'nwsp9_len'])
    df.name = pd.Series(sorted([problem.name for problem in problems]))
    df.set_index("name", inplace = True)
df

Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len,nwsp5_2_len,nwsp7_len,nwsp9_len,nwsp5_len,wsp_count_2.0
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
a280,280,2586.769648,3161.003928,4.847061e+03,,,,,2571
ara238025,238025,,759149.842377,1.176390e+06,,,,,3032365
bby34656,34656,,125383.275407,1.966541e+05,,,,,412676
bbz25234,25234,,87720.344055,1.414553e+05,,,,,299703
bch2762,2762,,10733.617810,1.624823e+04,,,,,28611
...,...,...,...,...,...,...,...,...,...
xua3937,3937,,14177.249409,2.054156e+04,,,,,42157
xva2993,2993,,10942.207468,1.608712e+04,,,,,32046
xvb13584,13584,,46586.545388,7.446251e+04,,,,,160624
ym7663,7663,237978.589063,301008.545258,4.588740e+05,,,,,120650


## Running Necessary Math On Each Problem

In [57]:
# fig, ax = plt.subplots(1, 2, figsize=(12,6))
ax = np.array([None, None])
i = 0
for problem in problems:
    print(i, problem.name)
    # ax[0].clear()
    # ax[1].clear()

    points = [ds.Point(*problem.node_coords[i]) for i in problem.get_nodes()]
    
    if len(points) > 150000: continue # saves time by skipping before creating the tree

    ts_problem : tsp.TravellingSalesmanProblem[TREE_TYPE] = None
    def lazy():
        global ts_problem
        ts_problem = tsp.TravellingSalesmanProblem[TREE_TYPE](TREE_TYPE, points, ax, s=S_FACTOR) if ts_problem is None else ts_problem
    
    # MARK: load the dp solution
    def try_load_dp_path(): # REVIEW: hacky
        num_path = None
        if os.path.exists(f"ALL_tsp/{problem.name}.opt.tour"): # If there is an optimal tour file
            opt_problem = tsplib95.load(f"ALL_tsp/{problem.name}.opt.tour")
            assert(len(opt_problem.tours) == 1)
            num_path = opt_problem.tours[0]
        if os.path.exists(f"ALL_tsp/{problem.name}.tour"): # If there is an optimal tour file
            opt_problem = tsplib95.load(f"ALL_tsp/{problem.name}.tour")
            assert(len(opt_problem.tours) == 1)
            num_path = opt_problem.tours[0]
        elif len(problem.tours) > 0:
            assert len(problem.tours) == 1
            num_path = problem.tours[0] # NOTE: I do not know of any euclidean which has this property
            raise ValueError("A shooting star")
        if num_path is not None:
            path = [points[i-1] for i in num_path] + [points[num_path[0] - 1]] # NOTE: ts_problem.points are reordered use the ordering from the files
            lazy()
            ts_problem.dp_path = (path, util.calc_dist(path), None)
            # ts_problem.draw_tour(ts_problem.dp_path[0], '#FFC0CB')
        elif len(points) <= BF_THRESHOLD:
            print("Brute forcing", problem.name)
            lazy()
            _ = ts_problem.dp_path # Load the property
        else:
            return False
        return True

    # MARK: Populate row values
    if problem.name not in df.index or pd.isna(df.loc[problem.name, "num_points"]):
        lazy()
        assert len(ts_problem.points) == len(points) and len(points) > 2, "Invalid points"
        df.loc[problem.name, "num_points"] = len(ts_problem.points)
    if pd.isna(df.loc[problem.name, "opt_len"]):
        if try_load_dp_path():
            assert ts_problem.check_tour(ts_problem.dp_path[0]), "Invalid tour"
            df.loc[problem.name, "opt_len"] = ts_problem.dp_path[1]
    if pd.isna(df.loc[problem.name, "nn_len"]):
        lazy()
        assert ts_problem.check_tour(ts_problem.nnn_path[0]), "Invalid tour"
        df.loc[problem.name, "nn_len"] = ts_problem.nnn_path[1]
    if pd.isna(df.loc[problem.name, "untouched_len"]):
        lazy()
        assert ts_problem.check_tour(ts_problem.untouched_path[0]), "Invalid tour"
        df.loc[problem.name, "untouched_len"] = ts_problem.untouched_path[1]
    # if pd.isna(df.loc[problem.name, "nwsp5_len"]) and "dp_path" in ts_problem.__dict__:
    #     lazy()
    #     assert ts_problem.check_tour(ts_problem.nwsp_path(5)[0]), "Invalid path"
    #     df.loc[problem.name, "nwsp5_len"] = ts_problem.nwsp_path(5)[1]
    if pd.isna(df.loc[problem.name, f"wsp_count_{S_FACTOR}"]):
        lazy()
        assert len(ts_problem.wspd) > 0, "Invalid wspd"
        df.loc[problem.name, f"wsp_count_{S_FACTOR}"] = len(ts_problem.wspd)
    
    i += 1
print("done") # TODO: use progress instead of prints

0 a280
1 ara238025
1 bby34656
2 bbz25234
3 bch2762
4 bck2217
5 bcl380
6 beg3293
7 berlin52
8 bgb4355
9 bgd4396
10 bgf4475
11 bier127
12 bm33708
13 bna56769
14 bnd7168
15 boa28924
16 brd14051
17 bva2144
18 ca4663
19 ch130
20 ch150
21 ch71009
22 d1291
23 d15112
24 d1655
25 d18512
26 d198
27 d2103
28 d493
29 d657
30 dan59296
31 dbj2924
32 dca1389
33 dcb2086
34 dcc1911
35 dea2382
36 dga9698
37 dhb3386
38 dj38
39 dja1436
40 djb2036
41 djc1785
42 dka1376
43 dkc3938
44 dkd1973
45 dke3097
46 dkf3954
47 dkg813
48 dlb3694
49 eg7146
50 ei8246
51 eil101
52 eil51
53 eil76
54 fdp3256
55 fea5557
56 fht47608
57 fi10639
58 fjr3672
59 fjs3649
60 fl1400
61 fl1577
62 fl3795
63 fl417
64 fma21553
65 fna52057
66 fnb1615
67 fnc19402
68 fnl4461
69 fqm5087
70 fra1488
71 frh19289
72 frv4410
73 fry33203
74 fyg28534
75 gil262
76 gr9882
77 ics39603
78 icw1483
79 icx28698
80 ida8197
81 ido21215
82 ird29514
83 irw2802
84 irx28268
85 it16862
86 ja9847
87 kroA100
88 kroA150
89 kroA200
90 kroB100
91 kroB150
92 kroB200
9

In [58]:
# df.iloc[-10:, :]
# df = df.replace({pd.NA: np.nan})
# df["wsp_count_2.0"] = df["wsp_count_2.0"].astype(pd.Int64Dtype())
df

Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len,nwsp5_2_len,nwsp7_len,nwsp9_len,nwsp5_len,wsp_count_2.0
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
a280,280,2586.769648,3161.003928,4.847061e+03,,,,,2571
ara238025,238025,,759149.842377,1.176390e+06,,,,,3032365
bby34656,34656,,125383.275407,1.966541e+05,,,,,412676
bbz25234,25234,,87720.344055,1.414553e+05,,,,,299703
bch2762,2762,,10733.617810,1.624823e+04,,,,,28611
...,...,...,...,...,...,...,...,...,...
xua3937,3937,,14177.249409,2.054156e+04,,,,,42157
xva2993,2993,,10942.207468,1.608712e+04,,,,,32046
xvb13584,13584,37774.934821,46586.545388,7.446251e+04,,,,,160624
ym7663,7663,237978.589063,301008.545258,4.588740e+05,,,,,120650


In [59]:
df.to_pickle('results.pkl')