In [151]:
import dspy
import os
from dspy.teleprompt import *
import random
import math
import numpy as np
from python_tsp.exact import solve_tsp_dynamic_programming
import re
import ast

os.environ['TOGETHER_API_KEY'] = '35ba5bebf6288e43fdc8989965161592e3335d7067c772c0c6995cdc0e60cd88'
os.environ['TOGETHER_API_BASE'] = 'https://api.together.xyz/v1'

In [152]:
# constants
NUM_CITIES = 10
TRAIN_INSTANCES = 100
TEST_INSTANCES = 100
CITIES = " ".join(map(str, list(np.arange(NUM_CITIES))))
NUM_THREADS = 5
K = 6

In [153]:
# helper functions
def euclidean_distance(point1, point2):
    return round(math.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2), 1)

def calc_path_distance(path, distances):
    total_distance = 0
    if len(path) < 2:
        return 0
    for i in range(len(path) - 1):
        total_distance += distances[path[i]][path[i + 1]]
    total_distance += distances[0][path[len(path) - 1]]
    return total_distance

def make_graphs(num_instances, num_cities):
    x_range = (-20, 20)
    y_range = (-20, 20)

    distanceList = []
    coordinatesList = []
    for _ in range(num_instances):
        coordinates = {i: (round(random.uniform(*x_range), 1), round(random.uniform(*y_range), 1)) for i in range(num_cities)}
        distance_matrix = [[euclidean_distance(coordinates[i], coordinates[j]) for j in range(num_cities)] for i in range(num_cities)]
        distanceList.append(np.array(distance_matrix))
        coordinatesList.append(coordinates)
    return coordinatesList, distanceList

def make_dataset(coordinatesList, distanceList):
    dataset = []
    for coordinates, matrix in zip(coordinatesList, distanceList):
        permutation, distance = solve_tsp_dynamic_programming(matrix)
        data_point = {
            "coordinates": coordinates,
            "route": permutation,
            "optimal_distance": distance
        }
        dataset.append(data_point)
    return dataset

def makeDSPYExamples(dataset):
    exampleList = []
    for example in dataset:
        route = ", ".join(map(str, example["route"]))
        coordinates = ', '.join([f'{index}: ({x:.2f}, {y:.2f})' for index, (x, y) in example["coordinates"].items()])
        exampleObj = dspy.Example(coordinates=coordinates, route=route).with_inputs("coordinates")
        exampleList.append(exampleObj)
    return exampleList

def random_baseline(coordinates):
    num_cities = len(coordinates)
    numbers = list(range(num_cities))
    random.shuffle(numbers)
    curr = numbers[0]
    for i in range(len(numbers)):
        if numbers[i] == 0:
            numbers[i] = curr
            break
    numbers[0] = 0
    distances = [[euclidean_distance(coordinates[i], coordinates[j]) for j in range(num_cities)] for i in range(num_cities)]
    path_length = calc_path_distance(path=numbers, distances=distances)
    return numbers, path_length

In [154]:
# Train set:
train_c, train_dl = make_graphs(TRAIN_INSTANCES, NUM_CITIES)
train_ds = make_dataset(train_c, train_dl)
tsp_trainset = makeDSPYExamples(train_ds)

Average Metric: 0.0 / 3  (0.0):   3%|▏       | 3/100 [12:40<6:49:44, 253.45s/it]
Average Metric: 0.0 / 3  (0.0):   3%|▏       | 3/100 [12:23<6:40:53, 247.98s/it]
Average Metric: 0.0 / 3  (0.0):   3%|▏       | 3/100 [12:02<6:29:27, 240.90s/it]
Average Metric: 0.0 / 4  (0.0):   4%|▎       | 4/100 [11:45<4:42:05, 176.31s/it]
Average Metric: -inf / 5  (-inf):   5%|▎     | 5/100 [11:17<3:34:39, 135.58s/it]
  0%|                                                   | 0/100 [09:53<?, ?it/s]
  0%|                                                   | 0/100 [09:30<?, ?it/s]
  0%|                                                   | 0/100 [08:48<?, ?it/s]
  0%|                                                   | 0/100 [08:00<?, ?it/s]
Average Metric: 0 / 25  (0.0):  25%|██▊        | 25/100 [07:23<22:10, 17.74s/it]
Average Metric: 0 / 5  (0.0):   5%|▌          | 5/100 [06:45<2:08:32, 81.18s/it]
Average Metric: 0 / 10  (0.0):  10%|█          | 10/100 [06:35<59:21, 39.57s/it]
Average Metric: 0 / 5  (0.0)

In [155]:
print(tsp_trainset[0])

Example({'coordinates': '(17.00, 7.10), (0.70, 5.30), (-16.00, -3.50), (1.90, -7.20), (12.80, -3.50), (-3.90, -6.80), (3.90, 16.70), (-4.50, -7.10), (-5.00, -1.60), (-18.80, -2.20)', 'route': '0, 4, 3, 5, 7, 2, 9, 8, 1, 6'}) (input_keys={'coordinates'})


In [156]:
# Test set:
test_c, test_dl = make_graphs(TEST_INSTANCES, NUM_CITIES)
test_ds = make_dataset(test_c, test_dl)
tsp_testset = makeDSPYExamples(test_ds)

In [157]:
llama = dspy.Together(model="meta-llama/Meta-Llama-3-70B", max_tokens=50)
dspy.configure(lm=llama)

In [158]:
class TSP(dspy.Module):
    def __init__(self):
        super().__init__()
        self.make_route = dspy.Predict(TSPSignature)
        
    def forward(self, coordinates):
        pred_route = self.make_route(coordinates=coordinates)
        return pred_route
    
class TSPSignature(dspy.Signature):
    """Generate a route, starting at city 0, visiting all cities once, minimizing distance traveled. The distance from the final node to city 0 will be added."""
    coordinates = dspy.InputField()
    route = dspy.OutputField()

def extract_route(route, N=NUM_CITIES):
    # Extract the first N numbers from the route string
    numbers = re.findall(r'\d+', route)[:N]
    
    # Convert the numbers to integers
    numbers = list(map(int, numbers))
    # print(f"extracted: {numbers}")
    return numbers

def eval_tour(coordinates, route):
    try:
        route = extract_route(route) # make it a list of ints
    except ValueError:
        raise ValueError(f"Invalid route: {route}")
    if len(route) != NUM_CITIES:
        raise ValueError(f"Route length {len(route)} does not match number of cities {NUM_CITIES}")

    coordinates = eval(coordinates)
    total_distance = 0
    for i in range(len(route) - 1):
        total_distance += euclidean_distance(coordinates[route[i]], coordinates[route[i+1]])
    total_distance += euclidean_distance(coordinates[route[-1]], coordinates[route[0]])
    
    return total_distance

# validation function for the TSP
def metric(example, pred, trace=None):
    # print(f"pred.route: {pred.route}")
    try:
        distance = eval_tour(example.coordinates, pred.route)
        return -distance  # Return negative distance to maximize the metric
    except ValueError as e:
        dspy.logger.error(e)
        return float('-inf')
    

In [159]:
teleprompter = LabeledFewShot(k=K)
compiled_tsp = teleprompter.compile(TSP(), trainset=tsp_trainset)

In [160]:
evaluater = Evaluate(devset=tsp_testset, metric=metric, num_threads=NUM_THREADS, display_progress=True, display_table=0)
evaluater(compiled_tsp)

Average Metric: -20652.4 / 100  (-20652.4): 100%|█| 100/100 [00:43<00:00,  2.29i


-20652.4

In [161]:
test_example = tsp_testset[0]
numerical_test_example = test_ds[4]
print(numerical_test_example)

{'coordinates': [(-8.2, 5.6), (-12.0, -15.5), (-16.9, -19.0), (0.7, 18.1), (-5.0, 4.9), (13.6, -1.6), (-11.1, -5.7), (16.7, 17.9), (-12.9, 12.3), (15.0, -1.5)], 'route': [0, 4, 6, 2, 1, 5, 9, 7, 3, 8], 'optimal_distance': np.float64(125.0)}


In [162]:
predicted_result = compiled_tsp(cities=test_example.cities, distances=test_example.distances)

predicted_route = predicted_result.route

predicted_distance = eval_tour(test_example.cities, predicted_route, test_example.distances)

print(f"Predicted route: {predicted_route}")
print(f"Total distance of the predicted route: {predicted_distance}")

optimal_route = test_example.route
optimal_distance = eval_tour(test_example.cities, optimal_route, test_example.distances)
print(f"Optimal route: {optimal_route}")
print(f"Total distance of the optimal route: {optimal_distance}")

AttributeError: 'Example' object has no attribute 'cities'

In [None]:
print(numerical_test_example)
path, distance = random_baseline(numerical_test_example["distance_matrix"])
print(f"path is {path}")
print(f"distance is {distance}")

Random baseline eval:

In [None]:
total_dis = 0
for i in range(TEST_INSTANCES):
    curr_example = test_ds[i]
    _, distance = random_baseline(curr_example["distance_matrix"])
    total_dis += distance
print(f"(RANDOM) total distance is {total_dis}")
print(f"(RANDOM) average distance is {total_dis/TEST_INSTANCES}")

In [None]:
zs_TSP = evaluater(TSP())
print(f"(Zero Shot) average distance is {zs_TSP / len(tsp_testset)}")

Model eval:

In [None]:
print("(MODEL) average distance is 193.12")

Optimal route eval:

In [None]:
total_dis = 0
for i in range(TEST_INSTANCES):
    curr_example = test_ds[i]
    total_dis += curr_example["optimal_distance"]
print(f"(OPTIMAL) total distance is {total_dis}")
print(f"(OPTIMAL) average distance is {total_dis/TEST_INSTANCES}")