# Comparison Setup, loads TSPs and creates pandas dataframe

## Imports

In [1]:
import os

import matplotlib.pyplot as plt
import pandas as pd
import tsplib95
import numpy as np

from wsp import tsp, ds, util # TODO: refine imports

TREE_TYPE = ds.PKPRQuadTree
BF_THRESHOLD = 14
S_FACTOR = 2.0 # NOTE: if this value is changed then the kernel ought to be restarted

## Loading Euclidean TSPs

In [2]:
problems : list[tsplib95.models.StandardProblem] = []

for file in sorted(os.listdir("ALL_tsp")): # Loop through every tsp
    if not file.endswith(".tsp"):
        continue
    problem = tsplib95.load(f"ALL_tsp/{file}")
    if problem.edge_weight_type != "EUC_2D": # Skip non-Euclidean TSPs
        continue # TODO: include ATT and GEO and maybe EUC_3D
    
    if problem.name in ["a280", "rd100"]: # Special skip cases, TODO: remove
        continue
    
    problems.append(problem)
    print(f"Added {problem.name}")

print("Found", len(problems), "euclidean TSPs")

Added berlin52
Added bier127
Added brd14051
Added ch130
Added ch150
Added d1291
Added d15112
Added d1655
Added d18512
Added d198
Added d2103
Added d493
Added d657
Added eil101
Added eil51
Added eil76
Added fl1400
Added fl1577
Added fl3795
Added fl417
Added fnl4461
Added gil262
Added kroA100
Added kroA150
Added kroA200
Added kroB100
Added kroB150
Added kroB200
Added kroC100
Added kroD100
Added kroE100
Added lin105
Added lin318
Added nrw1379
Added p654
Added pcb1173
Added pcb3038
Added pcb442
Added pr1002
Added pr107
Added pr124
Added pr136
Added pr144
Added pr152
Added pr226
Added pr2392
Added pr264
Added pr299
Added pr439
Added pr76
Added rat195
Added rat575
Added rat783
Added rat99
Added rd400
Added rl11849
Added rl1304
Added rl1323
Added rl1889
Added rl5915
Added rl5934
Added st70
Added ts225
Added tsp225
Added u1060
Added u1432
Added u159
Added u1817
Added u2152
Added u2319
Added u574
Added u724
Added usa13509
Added vm1084
Added vm1748
Found 75 euclidean TSPs


## Setting up data science

In [3]:
# Create a data frame with columns for the name opt solution, nearest neighbor solution, and untouched solution

if os.path.exists("results.pkl"):
    df = pd.read_pickle("results.pkl")
else:
    df = pd.DataFrame(columns=['name', 'num_points', f'wsp_count_{S_FACTOR}', 'opt_len', 'nn_len', 'untouched_len', 'nwsp5_len', 'nwsp7_len', 'nwsp9_len'])
    df.name = pd.Series(sorted([problem.name for problem in problems]))
    df.set_index("name", inplace = True)
df

Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len,nwsp5_2_len,nwsp7_len,nwsp9_len,nwsp5_len,wsp_count_2.0
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
berlin52,52,28918.21655,9194.130643,10818.833107,22846.619539,,,11728.735582,
bier127,127,,139602.236419,173055.130034,314224.537324,,,,
brd14051,14051,,575718.536949,837139.5488,,,,,
ch130,130,44083.168052,7378.680775,10043.991903,20993.789501,,,17099.212197,
ch150,150,54024.084495,7982.489176,10882.8634,29154.866635,,,29705.180285,
...,...,...,...,...,...,...,...,...,...
u574,574,,48675.570373,73100.255604,335467.490191,,,,
u724,724,,54024.446688,83230.419554,321516.846393,,,,
usa13509,13509,,25178315.446225,39019971.446296,,,,,
vm1084,1084,,301229.210482,506252.713343,3073543.181063,,,,


## Save Default Values into dataframe

In [14]:
# fig, ax = plt.subplots(1, 2, figsize=(12,6))
ax = np.array([None, None])
i = 0
for problem in problems:
    print(i, problem.name)
    # ax[0].clear()
    # ax[1].clear()

    points = [ds.Point(*problem.node_coords[i]) for i in problem.get_nodes()]
    
    if len(points) > 2500: continue # saves time by skipping before creating the tree

    ts_problem = tsp.TravellingSalesmanProblem[TREE_TYPE](TREE_TYPE, points, ax, s=S_FACTOR)
    
    # MARK: load the dp solution
    num_path = None
    if os.path.exists(f"ALL_tsp/{problem.name}.opt.tour"): # If there is an optimal tour file
        opt_problem = tsplib95.load(f"ALL_tsp/{problem.name}.opt.tour")
        assert(len(opt_problem.tours) == 1)
        num_path = opt_problem.tours[0]
    elif len(problem.tours) > 0:
        assert len(problem.tours) == 1
        num_path = problem.tours[0] # NOTE: I do not know of any euclidean which has this property
        raise ValueError("A shooting star")
    if num_path is not None:
        path = [points[i-1] for i in num_path] + [points[num_path[0] - 1]] # NOTE: ts_problem.points are reordered use the ordering from the files
        ts_problem.dp_path = (path, util.calc_dist(path), None)
        # ts_problem.draw_tour(ts_problem.dp_path[0], '#FFC0CB')
    elif len(ts_problem.points) <= BF_THRESHOLD:
        print("Brute forcing", problem.name)
        _ = ts_problem.dp_path # Load the property

    # MARK: Populate row values
    df.loc[problem.name, "num_points"] = len(ts_problem.points)
    
    if "dp_path" in ts_problem.__dict__:
        assert ts_problem.check_tour(ts_problem.dp_path[0]), "Invalid path"
        df.loc[problem.name, "opt_len"] = ts_problem.dp_path[1]
    # if pd.isna(df.loc[problem.name, "nn_len"]):
    #     assert ts_problem.check_tour(ts_problem.nnn_path[0]), "Invalid path"
    #     df.loc[problem.name, "nn_len"] = ts_problem.nnn_path[1]
    # if pd.isna(df.loc[problem.name, "untouched_len"]):
    #     assert ts_problem.check_tour(ts_problem.untouched_path[0]), "Invalid path"
    #     df.loc[problem.name, "untouched_len"] = ts_problem.untouched_path[1]
    # if pd.isna(df.loc[problem.name, "nwsp5_len"]) and "dp_path" in ts_problem.__dict__:
    #     assert ts_problem.check_tour(ts_problem.nwsp_path(5)[0]), "Invalid path"
    #     df.loc[problem.name, "nwsp5_len"] = ts_problem.nwsp_path(5)[1]
    if pd.isna(df.loc[problem.name, f"wsp_count_{S_FACTOR}"]):
        df.loc[problem.name, f"wsp_count_{S_FACTOR}"] = len(ts_problem.wspd)
    
    i += 1
print("done") # TODO: use progress instead of prints

0 berlin52
1 bier127
2 brd14051
2 ch130
3 ch150
4 d1291
5 d15112
5 d1655
6 d18512
6 d198
7 d2103
8 d493
9 d657
10 eil101
11 eil51
12 eil76
13 fl1400
14 fl1577
15 fl3795
15 fl417
16 fnl4461
16 gil262
17 kroA100
18 kroA150
19 kroA200
20 kroB100
21 kroB150
22 kroB200
23 kroC100
24 kroD100
25 kroE100
26 lin105
27 lin318
28 nrw1379
29 p654
30 pcb1173
31 pcb3038
31 pcb442
32 pr1002
33 pr107
34 pr124
35 pr136
36 pr144
37 pr152
38 pr226
39 pr2392
40 pr264
41 pr299
42 pr439
43 pr76
44 rat195
45 rat575
46 rat783
47 rat99
48 rd400
49 rl11849
49 rl1304
50 rl1323
51 rl1889
52 rl5915
52 rl5934
52 st70
53 ts225
54 tsp225
55 u1060
56 u1432
57 u159
58 u1817
59 u2152
60 u2319
61 u574
62 u724
63 usa13509
63 vm1084
64 vm1748
done


In [15]:
# df["nwsp5_len"] = np.nan
df

Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len,nwsp5_2_len,nwsp7_len,nwsp9_len,nwsp5_len,wsp_count_2.0
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
berlin52,52,7544.365902,9194.130643,10818.833107,22846.619539,,,11728.735582,376
bier127,127,,139602.236419,173055.130034,314224.537324,,,,1129
brd14051,14051,,575718.536949,837139.5488,,,,,
ch130,130,6110.86095,7378.680775,10043.991903,20993.789501,,,17099.212197,1097
ch150,150,6532.280933,7982.489176,10882.8634,29154.866635,,,29705.180285,1402
...,...,...,...,...,...,...,...,...,...
u574,574,,48675.570373,73100.255604,335467.490191,,,,6802
u724,724,,54024.446688,83230.419554,321516.846393,,,,10145
usa13509,13509,,25178315.446225,39019971.446296,,,,,
vm1084,1084,,301229.210482,506252.713343,3073543.181063,,,,11097


In [16]:
df.to_pickle('results.pkl')