# Comparison Setup, loads TSPs and creates pandas dataframe

## Imports

In [5]:
import os

import matplotlib.pyplot as plt
import pandas as pd
import tsplib95
import numpy as np

from wsp import tsp, ds, util # TODO: refine imports

TREE_TYPE = ds.PKPRQuadTree
BF_THRESHOLD = 14

## Loading Euclidean TSPs

In [2]:
problems : list[tsplib95.models.StandardProblem] = []

for file in os.listdir("ALL_tsp"): # Loop through every tsp
    if not file.endswith(".tsp"):
        continue
    problem = tsplib95.load(f"ALL_tsp/{file}")
    if problem.edge_weight_type != "EUC_2D": # Skip non-Euclidean TSPs
        continue # TODO: include ATT and GEO and maybe EUC_3D
    
    if problem.name in ["a280", "rd100"]: # Special skip cases, TODO: remove
        continue
    
    problems.append(problem)
    print(f"Added {problem.name}")

print("Found", len(problems), "euclidean TSPs")

Added pr439
Added rl5934
Added pcb442
Added u2319
Added gil262
Added pcb3038
Added lin105
Added fl417
Added tsp225
Added fl1400
Added nrw1379
Added d2103
Added kroA150
Added pcb1173
Added d198
Added fl1577
Added ch130
Added kroB100
Added u1060
Added berlin52
Added eil51
Added rl1304
Added u2152
Added u724
Added kroD100
Added pr299
Added rd400
Added vm1084
Added rat575
Added d1655
Added ch150
Added d15112
Added pr107
Added kroB200
Added brd14051
Added d1291
Added pr264
Added pr76
Added d493
Added pr136
Added rat195
Added rl11849
Added kroA100
Added kroB150
Added bier127
Added kroC100
Added usa13509
Added eil76
Added pr124
Added rl1323
Added p654
Added rl1889
Added d657
Added eil101
Added fnl4461
Added pr2392
Added rat783
Added ts225
Added u1432
Added u1817
Added lin318
Added d18512
Added rl5915
Added st70
Added rat99
Added fl3795
Added u159
Added kroA200
Added u574
Added pr1002
Added pr152
Added pr226
Added vm1748
Added pr144
Added kroE100
Found 75 euclidean TSPs


## Setting up data science

In [3]:
# Create a data frame with columns for the name opt solution, nearest neighbor solution, and untouched solution

if os.path.exists("results.pkl"):
    df = pd.read_pickle("results.pkl")
else:
    df = pd.DataFrame(columns=['name', 'num_points', 'opt_len', 'nn_len', 'untouched_len', 'nwsp5_len', 'nwsp7_len', 'nwsp9_len'])
    df.name = pd.Series(sorted([problem.name for problem in problems]))
    df.set_index("name", inplace = True)
df

Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len,nwsp5_len,nwsp7_len,nwsp9_len
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
berlin52,52,28353.570643,9194.130643,10818.833107,,,
bier127,127,,139602.236419,173055.130034,,,
brd14051,14051,,575718.536949,837139.5488,,,
ch130,130,43509.343306,7378.680775,10043.991903,,,
ch150,150,53488.012505,7982.489176,10882.8634,,,
...,...,...,...,...,...,...,...
u574,574,,48675.570373,73100.255604,,,
u724,724,,54024.446688,83230.419554,,,
usa13509,13509,,25178315.446225,39019971.446296,,,
vm1084,1084,,301229.210482,506252.713343,,,


## Save Default Values into dataframe

In [13]:

# fig, ax = plt.subplots(1, 2, figsize=(12,6))
ax = np.array([None, None])
i = 0
for problem in problems:
    print(i, problem.name)
    # ax[0].clear()
    # ax[1].clear()

    points = [ds.Point(*problem.node_coords[i]) for i in problem.get_nodes()]

    ts_problem = tsp.TravellingSalesmanProblem[TREE_TYPE](TREE_TYPE, points, ax, s=1.0)
    
    # MARK: load the dp solution
    num_path = None
    if os.path.exists(f"ALL_tsp/{problem.name}.opt.tour"): # If there is an optimal tour file
        num_path = tsplib95.load(f"ALL_tsp/{problem.name}.opt.tour").tours[0]
    elif len(problem.tours) > 0:
        num_path = problem.tours[0] # NOTE: I do not know of any euclidean which has this property
        raise ValueError("A shooting star")
    if num_path is not None:
        path = [ts_problem.points[i-1] for i in num_path] + [ts_problem.points[num_path[0] - 1]]
        ts_problem.dp_path = (path, util.calc_dist(path), None)
        # ts_problem.draw_path(ts_problem.dp_path[0], '#FFC0CB')
    elif len(ts_problem.points) <= BF_THRESHOLD:
        print("Brute forcing", problem.name)
        ts_problem.dp_path # Load the property

    # MARK: Populate row values
    df.loc[problem.name, "num_points"] = len(ts_problem.points)
    
    if "dp_path" in ts_problem.__dict__:
        assert ts_problem.check_tour(ts_problem.dp_path[0]), "Invalid path"
        df.loc[problem.name, "opt_len"] = ts_problem.dp_path[1]
    if pd.isna(df.loc[problem.name, "nn_len"]):
        assert ts_problem.check_tour(ts_problem.nnn_path[0]), "Invalid path"
        df.loc[problem.name, "nn_len"] = ts_problem.nnn_path[1]
    if pd.isna(df.loc[problem.name, "untouched_len"]):
        assert ts_problem.check_tour(ts_problem.untouched_path[0]), "Invalid path"
        df.loc[problem.name, "untouched_len"] = ts_problem.untouched_path[1]
    if pd.isna(df.loc[problem.name, "nwsp5_len"]) and len(ts_problem.points) <= 2500:
        assert ts_problem.check_tour(ts_problem.nwsp_path(5)[0]), "Invalid path"
        df.loc[problem.name, "nwsp5_len"] = ts_problem.nwsp_path(5)[1]
    # if pd.isna(df.loc[problem.name, "nwsp7_len"]):
    #     assert ts_problem.check_tour(ts_problem.nwsp_path(7)[0]), "Invalid path"
    #     df.loc[problem.name, "nwsp7_len"] = ts_problem.nwsp_path(7)[1]
    # if df.loc[problem.name, "nwsp9_len"]:
    #     df.loc[problem.name, "nwsp9_len"] = ts_problem.nwsp_path(9)[1]
    
    i += 1
print("done") # TODO: use progress instead of prints

0 pr439
1 rl5934
2 pcb442
3 u2319
4 gil262
5 pcb3038
6 lin105
7 fl417
8 tsp225
9 fl1400
10 nrw1379
11 d2103
12 kroA150
13 pcb1173
14 d198
15 fl1577
16 ch130
17 kroB100
18 u1060
19 berlin52
20 eil51
21 rl1304
22 u2152
23 u724
24 kroD100
25 pr299
26 rd400
27 vm1084
28 rat575
29 d1655
30 ch150
31 d15112
32 pr107
33 kroB200
34 brd14051
35 d1291
36 pr264
37 pr76
38 d493
39 pr136
40 rat195
41 rl11849
42 kroA100
43 kroB150
44 bier127
45 kroC100
46 usa13509
47 eil76
48 pr124
49 rl1323
50 p654
51 rl1889
52 d657
53 eil101
54 fnl4461
55 pr2392
56 rat783
57 ts225
58 u1432
59 u1817
60 lin318
61 d18512
62 rl5915
63 st70
64 rat99
65 fl3795
66 u159
67 kroA200
68 u574
69 pr1002
70 pr152
71 pr226
72 vm1748
73 pr144
74 kroE100
done


In [14]:
df #[df['opt_len'] > 0]

Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len,nwsp5_len,nwsp7_len,nwsp9_len
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
berlin52,52,28918.21655,9194.130643,10818.833107,15754.600919,,
bier127,127,,139602.236419,173055.130034,240701.831225,,
brd14051,14051,,575718.536949,837139.5488,,,
ch130,130,44083.168052,7378.680775,10043.991903,18073.575419,,
ch150,150,54024.084495,7982.489176,10882.8634,21066.948606,,
...,...,...,...,...,...,...,...
u574,574,,48675.570373,73100.255604,239647.778422,,
u724,724,,54024.446688,83230.419554,139368.707691,,
usa13509,13509,,25178315.446225,39019971.446296,,,
vm1084,1084,,301229.210482,506252.713343,2150603.324728,,


In [15]:
df.to_pickle('results.pkl')