In [20]:
import dspy
import os
from dspy.teleprompt import *
import random
import math
import numpy as np
from python_tsp.exact import solve_tsp_dynamic_programming
import re

os.environ['TOGETHER_API_KEY'] = '35ba5bebf6288e43fdc8989965161592e3335d7067c772c0c6995cdc0e60cd88'
os.environ['TOGETHER_API_BASE'] = 'https://api.together.xyz/v1'

In [21]:
# constants
NUM_CITIES = 10
TRAIN_INSTANCES = 100
TEST_INSTANCES = 100
CITIES = " ".join(map(str, list(np.arange(NUM_CITIES))))
NUM_THREADS = 5
K = 6

In [22]:
# helper functions
def euclidean_distance(point1, point2):
    return round(math.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2), 1)

def calc_path_distance(path, distances):
    total_distance = 0
    if len(path) < 2:
        return 0
    for i in range(len(path)-1):
        total_distance += distances[path[i]][path[i + 1]]
    total_distance += distances[0][path[len(path)-1]]
    return total_distance

def make_graphs(num_instances, num_cities):
    x_range = (-20, 20)
    y_range = (-20, 20)

    distanceList = []
    for _ in range(num_instances):
        coordinates = [(round(random.uniform(*x_range), 1), round(random.uniform(*y_range), 1)) for _ in range(num_cities)]
        distance_matrix = [[euclidean_distance(coordinates[i], coordinates[j]) for j in range(num_cities)] for i in range(num_cities)]
        distanceList.append(np.array(distance_matrix))
    return coordinates, distanceList

def make_dataset(coordinates, distanceList):
    dataset = []
    for matrix in distanceList:
        permutation, distance = solve_tsp_dynamic_programming(matrix)
        # permutation, distance = tsp_nearest_neighbor(matrix)
        data_point = {
            "distance_matrix": matrix.tolist(),
            "coordinates": coordinates,
            "route": permutation,
            "optimal_distance": distance
        }
        dataset.append(data_point)
    return dataset

def makeDSPYExamples(dataset):
    exampleList = []
    for example in dataset:
        distances = "\n".join([f"[{', '.join(map(str, row))}]" for row in example["distance_matrix"]])
        route = ", ".join(map(str, example["route"]))
        coordinates = ', '.join([f'({x:.2f}, {y:.2f})' for x, y in example["coordinates"]])
        exampleObj = dspy.Example(coordinates=coordinates, distances=distances, route=route).with_inputs("coordinates", "distances")
        exampleList.append(exampleObj)
    return exampleList

def random_baseline(distances):
    numbers = list(range(NUM_CITIES))
    random.shuffle(numbers)
    curr = numbers[0]
    for i in range(len(numbers)):
        if numbers[i] == 0:
            numbers[i] = curr
            break
    numbers[0] = 0
    path_length = calc_path_distance(path=numbers, distances=distances)
    return numbers, path_length

In [23]:
# Train set:
train_c, train_dl = make_graphs(TRAIN_INSTANCES, NUM_CITIES)
train_ds = make_dataset(train_c, train_dl)
tsp_trainset = makeDSPYExamples(train_ds)

In [24]:
print(tsp_trainset[0])

Example({'coordinates': '(-0.50, 2.40), (15.00, 2.90), (-12.20, -16.80), (-19.70, 19.50), (15.20, -0.90), (-0.20, -0.40), (-1.30, 3.30), (-9.60, -14.90), (19.40, 17.80), (19.30, -20.00)', 'distances': '0.0 7.1 44.0 32.8 33.3 16.5 24.6 21.9 16.5 33.6\n7.1 0.0 42.0 31.2 26.7 15.8 20.3 26.5 10.0 29.8\n44.0 42.0 0.0 11.2 33.9 27.5 23.4 36.0 35.7 15.5\n32.8 31.2 11.2 0.0 28.6 16.3 14.6 26.3 26.0 10.9\n33.3 26.7 33.9 28.6 0.0 26.8 15.8 44.1 16.8 18.7\n16.5 15.8 27.5 16.3 26.8 0.0 12.2 17.3 14.8 19.1\n24.6 20.3 23.4 14.6 15.8 12.2 0.0 29.2 12.4 9.5\n21.9 26.5 36.0 26.3 44.1 17.3 29.2 0.0 30.5 33.9\n16.5 10.0 35.7 26.0 16.8 14.8 12.4 30.5 0.0 21.7\n33.6 29.8 15.5 10.9 18.7 19.1 9.5 33.9 21.7 0.0', 'route': '0 1 8 4 6 9 2 3 5 7'}) (input_keys={'distances', 'coordinates'})


In [25]:
# Test set:
test_c, test_dl = make_graphs(TEST_INSTANCES, NUM_CITIES)
test_ds = make_dataset(test_c, test_dl)
tsp_testset = makeDSPYExamples(test_ds)

In [26]:
llama = dspy.Together(model="meta-llama/Meta-Llama-3-70B", max_tokens=50)
dspy.configure(lm=llama)

In [27]:
class TSP(dspy.Module):
    def __init__(self):
        super().__init__()
        self.make_route = dspy.Predict(TSPSignature)
        
    def forward(self, coordinates, distances):
        pred_route = self.make_route(coordinates=coordinates, distances=distances)
        return pred_route
    
class TSPSignature(dspy.Signature):
    """Generate a route, starting at city 0, visiting all cities once, minimizing distance traveled. The distance from the final node to city 0 will be added."""
    coordinates = dspy.InputField()
    distances = dspy.InputField()
    route = dspy.OutputField()

def extract_route(route, N=NUM_CITIES):
    # Extract the first N numbers from the route string
    numbers = re.findall(r'\d+', route)[:N]
    
    # Convert the numbers to integers
    numbers = list(map(int, numbers))
    
    return numbers

def eval_tour(route, distances):
    distances_matrix = np.array([list(map(float, row.split())) for row in distances.split('\n')])
    try:
        route = extract_route(route) # make it a list of ints
        # print(route)
    except ValueError:
        raise ValueError(f"Invalid route: {route}")
    if len(route) != len(distances_matrix):
        raise ValueError(f"Route length {len(route)} does not match number of cities {len(distances_matrix)}")

    # make it so that it's the difference between the two
    total_distance = sum(distances_matrix[route[i]][route[i+1]] for i in range(len(route) - 1))
    total_distance += distances_matrix[route[-1]][route[0]]
    return total_distance

# validation function for the TSP
def metric(example, pred, trace=None):
    print(pred)
    try:
        distance = eval_tour(pred.route, example.distances)
        return -distance  # Return negative distance to maximize the metric
    except ValueError as e:
        dspy.logger.error(e)
        return float('-inf')
    

In [28]:
# DSPy optimizer to improve the TSP solution
# config = dict(
#     max_bootstrapped_demos=4,   # Number of bootstrapped demonstrations
#     max_labeled_demos=4,        # Number of labeled demonstrations
#     num_candidate_programs=10,  # Number of candidate programs to evaluate
#     num_threads=4               # Number of threads for parallel evaluation
# )
# teleprompter = BootstrapFewShotWithRandomSearch(metric=metric, **config)

teleprompter = LabeledFewShot(k=K)
compiled_tsp = teleprompter.compile(TSP(), trainset=tsp_trainset)

In [29]:
evaluater = Evaluate(devset=tsp_testset, metric=metric, num_threads=NUM_THREADS, display_progress=True, display_table=0)
evaluater(compiled_tsp)

Average Metric: -929.0000000000001 / 5  (-18580.0):   4%| | 4/100 [00:04<07:48, 

Prediction(
    route='0 2 3 1 4 5 6 7 8 9\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)
Prediction(
    route='0 7 1 2 3 4 5 6 8 9\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)
Prediction(
    route='0 3 2 1 4 5 6 7 9 8\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)
Prediction(
    route='0 2 3 1 4 5 6 7 9 8\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)
Prediction(
    route='0 2 3 1 4 5 6 7 8 9\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)


Average Metric: -1931.2 / 10  (-19312.0):   9%| | 9/100 [00:09<01:30,  1.01it/s]

Prediction(
    route='0 2 3 1 4 5 6 7 8 9\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)
Prediction(
    route='0 2 3 4 5 1 6 7 8 9\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)
Prediction(
    route='0 3 4 1 2 5 6 9 7 8\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)
Prediction(
    route='0 8 2 3 7 1 6 4 9 5\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)
Prediction(
    route='0 4 3 5 6 1 7 2 9 8\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)




Prediction(
    route='0 1 2 3 4 5 6 7 8 9\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)Prediction(
    route='0 1 2 3 4 5 6 7 8 9\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)
Prediction(
    route='0 1 2 3 4 5 6 7 8 9\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)

Prediction(
    route='0 2 3 1 4 5 6 7 9 8\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)
Prediction(
    route='0 3 4 2 5 1 6 7 9 8\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)
Prediction(
    route='0 1 2 3 4 5 6 7 8 9\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)Prediction(
    route='0 1 2 3 4 5 6 7 8 9\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)

Prediction(
    route='0 2 3 1 4 5 6 7 9 8\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)
Prediction(
    route='0 2 1 3 4 5 6 7 8 9\n\n---\n\nCoordinates: (0.80, 5.30), (13.10, -16.70), (-4.10,'
)
Prediction(
    route='0 2 3

KeyboardInterrupt: 

In [None]:
test_example = tsp_testset[0]
numerical_test_example = test_ds[4]
print(numerical_test_example)

In [None]:
predicted_result = compiled_tsp(cities=test_example.cities, distances=test_example.distances)

predicted_route = predicted_result.route

predicted_distance = eval_tour(test_example.cities, predicted_route, test_example.distances)

print(f"Predicted route: {predicted_route}")
print(f"Total distance of the predicted route: {predicted_distance}")

optimal_route = test_example.route
optimal_distance = eval_tour(test_example.cities, optimal_route, test_example.distances)
print(f"Optimal route: {optimal_route}")
print(f"Total distance of the optimal route: {optimal_distance}")

In [None]:
print(numerical_test_example)
path, distance = random_baseline(numerical_test_example["distance_matrix"])
print(f"path is {path}")
print(f"distance is {distance}")

Random baseline eval:

In [None]:
total_dis = 0
for i in range(TEST_INSTANCES):
    curr_example = test_ds[i]
    _, distance = random_baseline(curr_example["distance_matrix"])
    total_dis += distance
print(f"(RANDOM) total distance is {total_dis}")
print(f"(RANDOM) average distance is {total_dis/TEST_INSTANCES}")

In [None]:
zs_TSP = evaluater(TSP())
print(f"(Zero Shot) average distance is {zs_TSP / len(tsp_testset)}")

Model eval:

In [None]:
print("(MODEL) average distance is 193.12")

Optimal route eval:

In [None]:
total_dis = 0
for i in range(TEST_INSTANCES):
    curr_example = test_ds[i]
    total_dis += curr_example["optimal_distance"]
print(f"(OPTIMAL) total distance is {total_dis}")
print(f"(OPTIMAL) average distance is {total_dis/TEST_INSTANCES}")