# Comparison Setup, loads TSPs and creates pandas dataframe

## Imports

In [3]:
import os

import matplotlib.pyplot as plt
import pandas as pd
import tsplib95
import numpy as np

from wsp import tsp, ds, util # TODO: refine imports

TREE_TYPE = ds.PKPRQuadTree
BF_THRESHOLD = 14
S_FACTOR = 2.0 # NOTE: if this value is changed then the kernel ought to be restarted

## Loading Euclidean TSPs

In [4]:
problems : list[tsplib95.models.StandardProblem] = []

for file in sorted(os.listdir("ALL_tsp")): # Loop through every tsp
    if not file.endswith(".tsp"):
        continue
    problem = tsplib95.load(f"ALL_tsp/{file}")
    if problem.edge_weight_type != "EUC_2D": # Skip non-Euclidean TSPs
        continue # TODO: include ATT and GEO and maybe EUC_3D
    
    if problem.name in ["a280", "rd100"]: # Special skip cases, TODO: remove
        continue
    
    problems.append(problem)
    print(f"Added {problem.name}")

print("Found", len(problems), "euclidean TSPs")

Added berlin52
Added bier127
Added bm33708
Added brd14051
Added ca4663
Added ch130
Added ch150
Added ch71009
Added d1291
Added d15112
Added d1655
Added d18512
Added d198
Added d2103
Added d493
Added d657
Added dj38
Added eg7146
Added ei8246
Added eil101
Added eil51
Added eil76
Added fi10639
Added fl1400
Added fl1577
Added fl3795
Added fl417
Added fnl4461
Added gil262
Added gr9882
Added it16862
Added ja9847
Added kroA100
Added kroA150
Added kroA200
Added kroB100
Added kroB150
Added kroB200
Added kroC100
Added kroD100
Added kroE100
Added kz9976
Added lin105
Added lin318
Added mo14185
Added mu1979
Added nrw1379
Added p654
Added pcb1173
Added pcb3038
Added pcb442
Added pr1002
Added pr107
Added pr124
Added pr136
Added pr144
Added pr152
Added pr226
Added pr2392
Added pr264
Added pr299
Added pr439
Added pr76
Added qa194
Added rat195
Added rat575
Added rat783
Added rat99
Added rd400
Added rl11849
Added rl1304
Added rl1323
Added rl1889
Added rl5915
Added rl5934
Added st70
Added sw24978
Added ts

## Setting up data science

In [5]:
# Create a data frame with columns for the name opt solution, nearest neighbor solution, and untouched solution

if os.path.exists("results.pkl"):
    df = pd.read_pickle("results.pkl")
else:
    df = pd.DataFrame(columns=['name', 'num_points', f'wsp_count_{S_FACTOR}', 'opt_len', 'nn_len', 'untouched_len', 'nwsp5_len', 'nwsp7_len', 'nwsp9_len'])
    df.name = pd.Series(sorted([problem.name for problem in problems]))
    df.set_index("name", inplace = True)
df

Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len,nwsp5_2_len,nwsp7_len,nwsp9_len,nwsp5_len,wsp_count_2.0
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
berlin52,52,7544.365902,9194.130643,10818.833107,22846.619539,,,11728.735582,376
bier127,127,,139602.236419,173055.130034,314224.537324,,,,1129
brd14051,14051,,575718.536949,837139.5488,,,,,
ch130,130,6110.86095,7378.680775,10043.991903,20993.789501,,,17099.212197,1097
ch150,150,6532.280933,7982.489176,10882.8634,29154.866635,,,29705.180285,1402
...,...,...,...,...,...,...,...,...,...
uy734,734,,100013.231237,,,,,,8499
wi29,29,,36388.059232,,,,,,97
ym7663,7663,237978.589063,301008.545258,,,,,,120650
zi929,929,,120899.283457,,,,,,10561


## Save Default Values into dataframe

In [6]:
# fig, ax = plt.subplots(1, 2, figsize=(12,6))
ax = np.array([None, None])
i = 0
for problem in problems:
    print(i, problem.name)
    # ax[0].clear()
    # ax[1].clear()

    points = [ds.Point(*problem.node_coords[i]) for i in problem.get_nodes()]
    
    if len(points) > 10000: continue # saves time by skipping before creating the tree

    ts_problem = tsp.TravellingSalesmanProblem[TREE_TYPE](TREE_TYPE, points, ax, s=S_FACTOR) # TODO: ought to lazy load this
    
    # MARK: load the dp solution
    num_path = None
    if os.path.exists(f"ALL_tsp/{problem.name}.opt.tour"): # If there is an optimal tour file
        opt_problem = tsplib95.load(f"ALL_tsp/{problem.name}.opt.tour")
        assert(len(opt_problem.tours) == 1)
        num_path = opt_problem.tours[0]
    if os.path.exists(f"ALL_tsp/{problem.name}.tour"): # If there is an optimal tour file
        opt_problem = tsplib95.load(f"ALL_tsp/{problem.name}.tour")
        assert(len(opt_problem.tours) == 1)
        num_path = opt_problem.tours[0]
    elif len(problem.tours) > 0:
        assert len(problem.tours) == 1
        num_path = problem.tours[0] # NOTE: I do not know of any euclidean which has this property
        raise ValueError("A shooting star")
    if num_path is not None:
        path = [points[i-1] for i in num_path] + [points[num_path[0] - 1]] # NOTE: ts_problem.points are reordered use the ordering from the files
        ts_problem.dp_path = (path, util.calc_dist(path), None)
        # ts_problem.draw_tour(ts_problem.dp_path[0], '#FFC0CB')
    elif len(ts_problem.points) <= BF_THRESHOLD:
        print("Brute forcing", problem.name)
        _ = ts_problem.dp_path # Load the property

    # MARK: Populate row values
    df.loc[problem.name, "num_points"] = len(ts_problem.points)
    
    if "dp_path" in ts_problem.__dict__:
        assert ts_problem.check_tour(ts_problem.dp_path[0]), "Invalid path"
        df.loc[problem.name, "opt_len"] = ts_problem.dp_path[1]
    if pd.isna(df.loc[problem.name, "nn_len"]):
        assert ts_problem.check_tour(ts_problem.nnn_path[0]), "Invalid path"
        df.loc[problem.name, "nn_len"] = ts_problem.nnn_path[1]
    if pd.isna(df.loc[problem.name, "untouched_len"]):
        assert ts_problem.check_tour(ts_problem.untouched_path[0]), "Invalid path"
        df.loc[problem.name, "untouched_len"] = ts_problem.untouched_path[1]
    # if pd.isna(df.loc[problem.name, "nwsp5_len"]) and "dp_path" in ts_problem.__dict__:
    #     assert ts_problem.check_tour(ts_problem.nwsp_path(5)[0]), "Invalid path"
    #     df.loc[problem.name, "nwsp5_len"] = ts_problem.nwsp_path(5)[1]
    if pd.isna(df.loc[problem.name, f"wsp_count_{S_FACTOR}"]):
        df.loc[problem.name, f"wsp_count_{S_FACTOR}"] = len(ts_problem.wspd)
    
    i += 1
print("done") # TODO: use progress instead of prints

0 berlin52
1 bier127
2 bm33708
2 brd14051
2 ca4663
3 ch130
4 ch150
5 ch71009
5 d1291
6 d15112
6 d1655
7 d18512
7 d198
8 d2103
9 d493
10 d657
11 dj38
12 eg7146
13 ei8246
14 eil101
15 eil51
16 eil76
17 fi10639
17 fl1400
18 fl1577
19 fl3795
20 fl417
21 fnl4461
22 gil262
23 gr9882
24 it16862
24 ja9847
25 kroA100
26 kroA150
27 kroA200
28 kroB100
29 kroB150
30 kroB200
31 kroC100
32 kroD100
33 kroE100
34 kz9976
35 lin105
36 lin318
37 mo14185
37 mu1979
38 nrw1379
39 p654
40 pcb1173
41 pcb3038
42 pcb442
43 pr1002
44 pr107
45 pr124
46 pr136
47 pr144
48 pr152
49 pr226
50 pr2392
51 pr264
52 pr299
53 pr439
54 pr76
55 qa194
56 rat195
57 rat575
58 rat783
59 rat99
60 rd400
61 rl11849
61 rl1304
62 rl1323
63 rl1889
64 rl5915
65 rl5934
66 st70
67 sw24978
67 ts225
68 tsp225
69 tz6117
70 u1060
71 u1432
72 u159
73 u1817
74 u2152
75 u2319
76 u574
77 u724
78 usa13509
78 uy734
79 vm1084
80 vm1748
81 vm22775
81 wi29
82 ym7663
83 zi929
done


In [12]:
# df["nwsp5_len"] = np.nan
df.iloc[-10:, :]

Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len,nwsp5_2_len,nwsp7_len,nwsp9_len,nwsp5_len,wsp_count_2.0
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
ja9847,9847,491320.958357,630231.097594,923091.067615,,,,,117457
kz9976,9976,1061875.390091,1355055.476566,2203658.917334,,,,,201364
mu1979,1979,,116687.080512,178649.561598,,,,,18169
qa194,194,,12368.119325,16550.488281,,,,,1815
tz6117,6117,394278.746553,498714.198936,749330.436762,,,,,73926
uy734,734,,100013.231237,141047.281549,,,,,8499
wi29,29,,36388.059232,33223.237172,,,,,97
ym7663,7663,237978.589063,301008.545258,448968.393477,,,,,120650
zi929,929,,120899.283457,167130.430206,,,,,10561
ca4663,4663,1290437.588617,1692835.419853,2573851.489875,,,,,68643


In [8]:
df.to_pickle('results.pkl')