# Comparison Setup, loads TSPs and creates pandas dataframe

## Imports

In [68]:
import os

import matplotlib.pyplot as plt
import pandas as pd
import tsplib95
import numpy as np

from wsp import tsp, ds, util # TODO: refine imports

TREE_TYPE = ds.PKPRQuadTree
BF_THRESHOLD = 14
S_FACTOR = 2.0 # NOTE: if this value is changed then the kernel ought to be restarted

## Loading Euclidean TSPs

In [69]:
problems : list[tsplib95.models.StandardProblem] = []

for file in sorted(os.listdir("ALL_tsp")): # Loop through every tsp
    if not file.endswith(".tsp"):
        continue
    problem = tsplib95.load(f"ALL_tsp/{file}")
    if problem.edge_weight_type != "EUC_2D": # Skip non-Euclidean TSPs
        continue # TODO: include ATT and GEO and maybe EUC_3D
    
    if problem.name in ["a280", "rd100"]: # Special skip cases, TODO: remove
        continue
    
    problems.append(problem)
    print(f"Added {problem.name}")

print("Found", len(problems), "euclidean TSPs")

Added ara238025
Added bby34656
Added bbz25234
Added bch2762
Added bck2217
Added bcl380
Added beg3293
Added berlin52
Added bgb4355
Added bgd4396
Added bgf4475
Added bier127
Added bm33708
Added bna56769
Added bnd7168
Added boa28924
Added brd14051
Added bva2144
Added ca4663
Added ch130
Added ch150
Added ch71009
Added d1291
Added d15112
Added d1655
Added d18512
Added d198
Added d2103
Added d493
Added d657
Added dan59296
Added dbj2924
Added dca1389
Added dcb2086
Added dcc1911
Added dea2382
Added dga9698
Added dhb3386
Added dj38
Added dja1436
Added djb2036
Added djc1785
Added dka1376
Added dkc3938
Added dkd1973
Added dke3097
Added dkf3954
Added dkg813
Added dlb3694
Added eg7146
Added ei8246
Added eil101
Added eil51
Added eil76
Added fdp3256
Added fea5557
Added fht47608
Added fi10639
Added fjr3672
Added fjs3649
Added fl1400
Added fl1577
Added fl3795
Added fl417
Added fma21553
Added fna52057
Added fnb1615
Added fnc19402
Added fnl4461
Added fqm5087
Added fra1488
Added frh19289
Added frv4410
Add

## Setting up data science

In [70]:
# Create a data frame with columns for the name opt solution, nearest neighbor solution, and untouched solution

if os.path.exists("results.pkl"):
    df = pd.read_pickle("results.pkl")
else:
    df = pd.DataFrame(columns=['name', 'num_points', f'wsp_count_{S_FACTOR}', 'opt_len', 'nn_len', 'untouched_len', 'nwsp5_len', 'nwsp7_len', 'nwsp9_len'])
    df.name = pd.Series(sorted([problem.name for problem in problems]))
    df.set_index("name", inplace = True)
df

Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len,nwsp5_2_len,nwsp7_len,nwsp9_len,nwsp5_len,wsp_count_2.0
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
berlin52,52,7544.365902,9194.130643,10818.833107,22846.619539,,,11728.735582,376
bier127,127,,139602.236419,173055.130034,314224.537324,,,,1129
brd14051,14051,,575718.536949,837139.548800,,,,,
ch130,130,6110.860950,7378.680775,10043.991903,20993.789501,,,17099.212197,1097
ch150,150,6532.280933,7982.489176,10882.863400,29154.866635,,,29705.180285,1402
...,...,...,...,...,...,...,...,...,...
xqc2175,2175,6953.194492,8863.970176,13835.784078,,,,,24447
xqd4966,4966,,20226.325007,81824.274755,,,,,287471
xqe3891,3891,,14964.310287,22318.421062,,,,,42097
xua3937,3937,,14177.249409,20347.097786,,,,,42157


## Save Default Values into dataframe

In [85]:
# fig, ax = plt.subplots(1, 2, figsize=(12,6))
ax = np.array([None, None])
i = 0
for problem in problems:
    print(i, problem.name)
    # ax[0].clear()
    # ax[1].clear()

    points = [ds.Point(*problem.node_coords[i]) for i in problem.get_nodes()]
    
    if len(points) > 18000: continue # saves time by skipping before creating the tree

    ts_problem : tsp.TravellingSalesmanProblem[TREE_TYPE] = None
    def lazy():
        global ts_problem
        ts_problem = tsp.TravellingSalesmanProblem[TREE_TYPE](TREE_TYPE, points, ax, s=S_FACTOR) if ts_problem is None else ts_problem
    
    # MARK: load the dp solution
    def try_load_dp_path(): # REVIEW: hacky
        num_path = None
        if os.path.exists(f"ALL_tsp/{problem.name}.opt.tour"): # If there is an optimal tour file
            opt_problem = tsplib95.load(f"ALL_tsp/{problem.name}.opt.tour")
            assert(len(opt_problem.tours) == 1)
            num_path = opt_problem.tours[0]
        if os.path.exists(f"ALL_tsp/{problem.name}.tour"): # If there is an optimal tour file
            opt_problem = tsplib95.load(f"ALL_tsp/{problem.name}.tour")
            assert(len(opt_problem.tours) == 1)
            num_path = opt_problem.tours[0]
        elif len(problem.tours) > 0:
            assert len(problem.tours) == 1
            num_path = problem.tours[0] # NOTE: I do not know of any euclidean which has this property
            raise ValueError("A shooting star")
        if num_path is not None:
            path = [points[i-1] for i in num_path] + [points[num_path[0] - 1]] # NOTE: ts_problem.points are reordered use the ordering from the files
            lazy()
            ts_problem.dp_path = (path, util.calc_dist(path), None)
            # ts_problem.draw_tour(ts_problem.dp_path[0], '#FFC0CB')
        elif len(points) <= BF_THRESHOLD:
            print("Brute forcing", problem.name)
            lazy()
            _ = ts_problem.dp_path # Load the property
        else:
            return False
        return True

    # MARK: Populate row values
    if problem.name not in df.index or pd.isna(df.loc[problem.name, "num_points"]):
        lazy()
        assert len(ts_problem.points) == len(points) and len(points) > 2, "Invalid points"
        df.loc[problem.name, "num_points"] = len(ts_problem.points)
    if pd.isna(df.loc[problem.name, "opt_len"]):
        if try_load_dp_path():
            assert ts_problem.check_tour(ts_problem.dp_path[0]), "Invalid tour"
            df.loc[problem.name, "opt_len"] = ts_problem.dp_path[1]
    if pd.isna(df.loc[problem.name, "nn_len"]):
        lazy()
        assert ts_problem.check_tour(ts_problem.nnn_path[0]), "Invalid tour"
        df.loc[problem.name, "nn_len"] = ts_problem.nnn_path[1]
    if pd.isna(df.loc[problem.name, "untouched_len"]):
        lazy()
        assert ts_problem.check_tour(ts_problem.untouched_path[0]), "Invalid tour"
        df.loc[problem.name, "untouched_len"] = ts_problem.untouched_path[1]
    # if pd.isna(df.loc[problem.name, "nwsp5_len"]) and "dp_path" in ts_problem.__dict__:
    #     lazy()
    #     assert ts_problem.check_tour(ts_problem.nwsp_path(5)[0]), "Invalid path"
    #     df.loc[problem.name, "nwsp5_len"] = ts_problem.nwsp_path(5)[1]
    if pd.isna(df.loc[problem.name, f"wsp_count_{S_FACTOR}"]):
        lazy()
        assert len(ts_problem.wspd) > 0, "Invalid wspd"
        df.loc[problem.name, f"wsp_count_{S_FACTOR}"] = len(ts_problem.wspd)
    
    i += 1
print("done") # TODO: use progress instead of prints

0 ara238025
0 bby34656
0 bbz25234
0 bch2762
1 bck2217
2 bcl380
3 beg3293
4 berlin52
5 bgb4355
6 bgd4396
7 bgf4475
8 bier127
9 bm33708
9 bna56769
9 bnd7168
10 boa28924
10 brd14051
11 bva2144
12 ca4663
13 ch130
14 ch150
15 ch71009
15 d1291
16 d15112
17 d1655
18 d18512
18 d198
19 d2103
20 d493
21 d657
22 dan59296
22 dbj2924
23 dca1389
24 dcb2086
25 dcc1911
26 dea2382
27 dga9698
28 dhb3386
29 dj38
30 dja1436
31 djb2036
32 djc1785
33 dka1376
34 dkc3938
35 dkd1973
36 dke3097
37 dkf3954
38 dkg813
39 dlb3694
40 eg7146
41 ei8246
42 eil101
43 eil51
44 eil76
45 fdp3256
46 fea5557
47 fht47608
47 fi10639
48 fjr3672
49 fjs3649
50 fl1400
51 fl1577
52 fl3795
53 fl417
54 fma21553
54 fna52057
54 fnb1615
55 fnc19402
55 fnl4461
56 fqm5087
57 fra1488
58 frh19289
58 frv4410
59 fry33203
59 fyg28534
59 gil262
60 gr9882
61 ics39603
61 icw1483
62 icx28698
62 ida8197
63 ido21215
63 ird29514
63 irw2802
64 irx28268
64 it16862
65 ja9847
66 kroA100
67 kroA150
68 kroA200
69 kroB100
70 kroB150
71 kroB200
72 kroC100
73

KeyboardInterrupt: 

In [90]:
# df.iloc[-10:, :]
# df = df.replace({pd.NA: np.nan})
# df["wsp_count_2.0"] = df["wsp_count_2.0"].astype(pd.Int64Dtype())
df

Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len,nwsp5_2_len,nwsp7_len,nwsp9_len,nwsp5_len,wsp_count_2.0
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
bch2762,2762.0,,10733.617810,15799.341802,,,,,28611
bck2217,2217.0,6896.495845,8567.593709,15001.424632,,,,,30863
bcl380,380.0,1653.191022,2011.585726,2669.051912,,,,,3207
beg3293,3293.0,,12510.410359,17932.447021,,,,,38571
berlin52,52.0,7544.365902,9194.130643,10818.833107,22846.619539,,,11728.735582,376
...,...,...,...,...,...,...,...,...,...
xua3937,3937.0,,14177.249409,20347.097786,,,,,42157
xva2993,2993.0,,10942.207468,16101.712182,,,,,32046
xvb13584,13584.0,,46586.545388,73054.521034,,,,,160624
ym7663,7663.0,237978.589063,301008.545258,448968.393477,,,,,120650


In [91]:
df.to_pickle('results.pkl')