# Comparison Setup, loads TSPs and creates pandas dataframe

## Imports

In [1]:
import os

import matplotlib.pyplot as plt
import pandas as pd
import tsplib95

from wsp import tsp, ds, util # TODO: refine imports

TREE_TYPE = ds.PKPRQuadTree
BF_THRESHOLD = 14

## Loading Euclidean TSPs

In [2]:
problems : list[tsplib95.models.StandardProblem] = []

for file in os.listdir("ALL_tsp"): # Loop through every tsp
    if not file.endswith(".tsp"):
        continue
    problem = tsplib95.load(f"ALL_tsp/{file}")
    if problem.edge_weight_type != "EUC_2D": # Skip non-Euclidean TSPs
        continue # TODO: include ATT and GEO and maybe EUC_3D
    
    if problem.name in ["a280", "rd100"]: # Special skip cases, TODO: remove
        continue
    
    problems.append(problem)
    print(f"Added {problem.name}")

print("Found", len(problems), "euclidean TSPs")

Added pr439
Added rl5934
Added pcb442
Added u2319
Added gil262
Added pcb3038
Added lin105
Added fl417
Added tsp225
Added fl1400
Added nrw1379
Added d2103
Added kroA150
Added pcb1173
Added d198
Added fl1577
Added ch130
Added kroB100
Added u1060
Added berlin52
Added eil51
Added rl1304
Added u2152
Added u724
Added kroD100
Added lin318
Added pr299
Added rd400
Added vm1084
Added rat575
Added d1655
Added ch150
Added d15112
Added pr107
Added kroB200
Added brd14051
Added d1291
Added pr264
Added pr76
Added d493
Added pr136
Added rat195
Added rl11849
Added kroA100
Added kroB150
Added bier127
Added kroC100
Added usa13509
Added eil76
Added pr124
Added rl1323
Added p654
Added rl1889
Added d657
Added eil101
Added fnl4461
Added pr2392
Added rat783
Added ts225
Added u1432
Added u1817
Added lin318
Added d18512
Added rl5915
Added st70
Added rat99
Added fl3795
Added u159
Added kroA200
Added u574
Added pr1002
Added pr152
Added pr226
Added vm1748
Added pr144
Added kroE100
Found 76 euclidean TSPs


## Setting up data science

In [3]:
# Create a data frame with columns for the name opt solution, nearest neighbor solution, and untouched solution

if os.path.exists("results.csv"):
    df = pd.read_csv("results.csv")
else:
    df = pd.DataFrame(columns=['name', 'num_points', 'opt_len', 'nn_len', 'untouched_len'])
    df.name = pd.Series(sorted([problem.name for problem in problems]))
    df.set_index("name", inplace = True)
df

Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
berlin52,,,,
bier127,,,,
brd14051,,,,
ch130,,,,
ch150,,,,
...,...,...,...,...
u574,,,,
u724,,,,
usa13509,,,,
vm1084,,,,


## Save Default Values into dataframe

In [4]:
# fig, ax = plt.subplots(1, 2, figsize=(12,6))
ax = None
i = 0
for problem in problems:
    # ax[0].clear()
    # ax[1].clear()

    points = [ds.Point(*problem.node_coords[i]) for i in problem.get_nodes()]

    ts_problem = tsp.TravellingSalesmanProblem[TREE_TYPE](TREE_TYPE, points, ax, s=1.0)
    
    # MARK: load the dp solution
    num_path = None
    if os.path.exists(f"ALL_tsp/{problem.name}.opt.tour"): # If there is an optimal tour file
        num_path = tsplib95.load(f"ALL_tsp/{problem.name}.opt.tour").tours[0]
    elif len(problem.tours) > 0:
        num_path = problem.tours[0] # NOTE: I do not know of any euclidean which has this property
        raise ValueError("A shooting star")
    if num_path is not None:
        path = [ts_problem.points[i-1] for i in num_path]
        ts_problem.dp_path = (path + [path[0]], util.calc_dist(path), None)
        # ts_problem.draw_path(ts_problem.dp_path[0], '#FFC0CB')
    elif len(ts_problem.points) <= BF_THRESHOLD:
        print("Brute forcing", problem.name)
        ts_problem.dp_path # Load the property
        
    # MARK: Testing
    assert ts_problem.check_path(ts_problem.nnn_path[0]), "Invalid path"
    assert ts_problem.check_path(ts_problem.untouched_path[0]), "Invalid path"

    # MARK: Populate row values
    df.loc[problem.name, "num_points"] = len(ts_problem.points)
    
    if "dp_path" in ts_problem.__dict__:
        assert ts_problem.check_path(ts_problem.dp_path[0]), "Invalid path"
        df.loc[problem.name, "opt_len"] = ts_problem.dp_path[1]
    df.loc[problem.name, "nn_len"] = ts_problem.nnn_path[1]
    df.loc[problem.name, "untouched_len"] = ts_problem.untouched_path[1]
    
    i += 1
    print(i)
print("done") # TODO: use progress instead of prints

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
done


In [5]:
df #[df['opt_len'] > 0]

Unnamed: 0_level_0,num_points,opt_len,nn_len,untouched_len
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
berlin52,52,7498.26818,8980.918279,20985.156714
bier127,127,,135751.778048,387386.275887
brd14051,14051,,577037.162832,23581476.344809
ch130,130,6045.299871,7575.286292,47719.135732
ch150,150,6511.241138,8194.614332,52433.396122
...,...,...,...,...
u574,574,,46881.866537,40215.237808
u724,724,,55223.203988,157309.596628
usa13509,13509,,25047673.205267,1590360148.854726
vm1084,1084,,301469.227592,5338569.767839


In [6]:
df.to_csv('result.csv')