In [1]:
import dspy
import os
from dspy.teleprompt import *
import random
import math
import numpy as np
from python_tsp.exact import solve_tsp_dynamic_programming
import re

os.environ['TOGETHER_API_KEY'] = '35ba5bebf6288e43fdc8989965161592e3335d7067c772c0c6995cdc0e60cd88'
os.environ['TOGETHER_API_BASE'] = 'https://api.together.xyz/v1'

In [2]:
# constants
NUM_CITIES = 10
TRAIN_INSTANCES = 100
TEST_INSTANCES = 100
CITIES = " ".join(map(str, list(np.arange(NUM_CITIES))))
# CITIES = "[" + ", ".join(map(str, list(np.arange(NUM_CITIES)))) + "]"
NUM_THREADS = 5
K = 8

In [3]:
# helper functions
def euclidean_distance(point1, point2):
    return round(math.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2), 1)

def calc_path_distance(path, distances):
    total_distance = 0
    if len(path) < 2:
        return 0
    for i in range(len(path)-1):
        total_distance += distances[path[i]][path[i + 1]]
    total_distance += distances[0][path[len(path)-1]]
    return total_distance

def make_graphs(num_instances, num_cities):
    x_range = (-20, 20)
    y_range = (-20, 20)

    distanceList = []
    for _ in range(num_instances):
        coordinates = [(random.uniform(*x_range), random.uniform(*y_range)) for _ in range(num_cities)]
        distance_matrix = [[euclidean_distance(coordinates[i], coordinates[j]) for j in range(num_cities)] for i in range(num_cities)]
        distanceList.append(np.array(distance_matrix))    
    return distanceList

def make_dataset(distanceList):
    dataset = []
    for matrix in distanceList:
        permutation, distance = solve_tsp_dynamic_programming(matrix)
        data_point = {
            "distance_matrix": matrix.tolist(),
            "route": permutation,
            "optimal_distance": distance
        }
        dataset.append(data_point)
    return dataset

def makeDSPYExamples(dataset):
    exampleList = []
    for example in dataset:
        distances = "[" + ", ".join([f"[{', '.join(map(str, row))}]" for row in example["distance_matrix"]]) + "]"
        route = ", ".join(map(str, example["route"]))
        exampleObj = dspy.Example(cities=CITIES, distances=distances, route=route).with_inputs("cities", "distances")
        exampleList.append(exampleObj)
    return exampleList

def random_baseline(distances):
    numbers = list(range(NUM_CITIES))
    random.shuffle(numbers)
    curr = numbers[0]
    for i in range(len(numbers)):
        if numbers[i] == 0:
            numbers[i] = curr
            break
    numbers[0] = 0
    path_length = calc_path_distance(path=numbers, distances=distances)
    return numbers, path_length

In [4]:
# Train set:
train_dl = make_graphs(TRAIN_INSTANCES, NUM_CITIES)
train_ds = make_dataset(train_dl)
tsp_trainset = makeDSPYExamples(train_ds)

In [5]:
# Test set:
test_dl = make_graphs(TEST_INSTANCES, NUM_CITIES)
test_ds = make_dataset(test_dl)
tsp_testset = makeDSPYExamples(test_ds)

In [6]:
llama = dspy.Together(model="meta-llama/Meta-Llama-3-70B", max_tokens=50)
dspy.configure(lm=llama)

In [7]:
class TSP(dspy.Module):
    def __init__(self):
        super().__init__()
        self.make_route = dspy.Predict(TSPSignature)
        
    def forward(self, cities, distances):
        pred_route = self.make_route(cities=cities, distances=distances)
        # print(pred_route)
        return pred_route
    
class TSPSignature(dspy.Signature):
    """Generate a route, starting at city 0, visiting all cities once, minimizing distance traveled. The distance from the final node to city 0 will be added."""
    cities = dspy.InputField()
    distances = dspy.InputField()
    route = dspy.OutputField()

def extract_route(route, N=NUM_CITIES):
    # Extract the first N numbers from the route string
    numbers = re.findall(r'\d+', route)[:N]
    
    # Convert the numbers to integers
    numbers = list(map(int, numbers))
    # print(f"extracted: {numbers}")
    return numbers

def eval_tour(cities, route, distances):
    # distances_matrix = np.array([list(map(float, row.split())) for row in distances.split('\n')])
    distances_matrix = np.array(eval(distances))
    
    try:
        route = extract_route(route) # make it a list of ints
        # print(route)
        # print(route)
    except ValueError:
        raise ValueError(f"Invalid route: {route}")
    if len(route) != len(distances_matrix):
        raise ValueError(f"Route length {len(route)} does not match number of cities {len(distances_matrix)}")

    # make it so that it's the difference between the two
    total_distance = sum(distances_matrix[route[i]][route[i+1]] for i in range(len(route) - 1))
    total_distance += distances_matrix[route[-1]][route[0]]
    return total_distance

# validation function for the TSP
def metric(example, pred, trace=None):
    # print(f"pred.route: {pred.route}")
    try:
        distance = eval_tour(example.cities, pred.route, example.distances)
        return -distance  # Return negative distance to maximize the metric
    except ValueError as e:
        dspy.logger.error(e)
        return float('-inf')
    

In [8]:
# DSPy optimizer to improve the TSP solution
# config = dict(
#     max_bootstrapped_demos=4,   # Number of bootstrapped demonstrations
#     max_labeled_demos=4,        # Number of labeled demonstrations
#     num_candidate_programs=10,  # Number of candidate programs to evaluate
#     num_threads=4               # Number of threads for parallel evaluation
# )
# teleprompter = BootstrapFewShotWithRandomSearch(metric=metric, **config)

teleprompter = LabeledFewShot(k=K)
# teleprompter = BootstrapFewShot(max_labeled_demos=5, max_bootstrapped_demos=5)
compiled_tsp = teleprompter.compile(TSP(), trainset=tsp_trainset)

In [9]:
evaluater = Evaluate(devset=tsp_testset, metric=metric, num_threads=NUM_THREADS, display_progress=True, display_table=0)
evaluater(compiled_tsp)



KeyboardInterrupt: 

In [10]:
llama.inspect_history(n=1)




Generate a route, starting at city 0, visiting all cities once, minimizing distance traveled. The distance from the final node to city 0 will be added.

---

Follow the following format.

Cities: ${cities}
Distances: ${distances}
Route: ${route}

---

Cities: 0 1 2 3 4 5 6 7 8 9
Distances: [[0.0, 27.6, 41.1, 14.0, 30.0, 22.9, 17.9, 34.8, 26.5, 19.1], [27.6, 0.0, 19.3, 24.6, 27.0, 6.8, 33.0, 34.1, 2.3, 9.8], [41.1, 19.3, 0.0, 31.5, 22.4, 25.8, 37.8, 27.6, 21.6, 22.7], [14.0, 24.6, 31.5, 0.0, 16.2, 23.2, 8.5, 20.8, 24.7, 14.9], [30.0, 27.0, 22.4, 16.2, 0.0, 29.6, 18.4, 7.2, 28.3, 20.8], [22.9, 6.8, 25.8, 23.2, 29.6, 0.0, 31.7, 36.7, 4.7, 9.2], [17.9, 33.0, 37.8, 8.5, 18.4, 31.7, 0.0, 20.1, 33.1, 23.4], [34.8, 34.1, 27.6, 20.8, 7.2, 36.7, 20.1, 0.0, 35.5, 27.8], [26.5, 2.3, 21.6, 24.7, 28.3, 4.7, 33.1, 35.5, 0.0, 9.8], [19.1, 9.8, 22.7, 14.9, 20.8, 9.2, 23.4, 27.8, 9.8, 0.0]]
Route: 0, 3, 6, 7, 4, 2, 1, 8, 5, 9

---

Cities: 0 1 2 3 4 5 6 7 8 9
Distances: [[0.0, 13.6, 17.7, 36.8, 22.3,

'\n\n\nGenerate a route, starting at city 0, visiting all cities once, minimizing distance traveled. The distance from the final node to city 0 will be added.\n\n---\n\nFollow the following format.\n\nCities: ${cities}\nDistances: ${distances}\nRoute: ${route}\n\n---\n\nCities: 0 1 2 3 4 5 6 7 8 9\nDistances: [[0.0, 27.6, 41.1, 14.0, 30.0, 22.9, 17.9, 34.8, 26.5, 19.1], [27.6, 0.0, 19.3, 24.6, 27.0, 6.8, 33.0, 34.1, 2.3, 9.8], [41.1, 19.3, 0.0, 31.5, 22.4, 25.8, 37.8, 27.6, 21.6, 22.7], [14.0, 24.6, 31.5, 0.0, 16.2, 23.2, 8.5, 20.8, 24.7, 14.9], [30.0, 27.0, 22.4, 16.2, 0.0, 29.6, 18.4, 7.2, 28.3, 20.8], [22.9, 6.8, 25.8, 23.2, 29.6, 0.0, 31.7, 36.7, 4.7, 9.2], [17.9, 33.0, 37.8, 8.5, 18.4, 31.7, 0.0, 20.1, 33.1, 23.4], [34.8, 34.1, 27.6, 20.8, 7.2, 36.7, 20.1, 0.0, 35.5, 27.8], [26.5, 2.3, 21.6, 24.7, 28.3, 4.7, 33.1, 35.5, 0.0, 9.8], [19.1, 9.8, 22.7, 14.9, 20.8, 9.2, 23.4, 27.8, 9.8, 0.0]]\nRoute: 0, 3, 6, 7, 4, 2, 1, 8, 5, 9\n\n---\n\nCities: 0 1 2 3 4 5 6 7 8 9\nDistances: [[0.0, 

In [None]:
test_example = tsp_testset[0]
numerical_test_example = test_ds[4]
print(numerical_test_example)

In [None]:
predicted_result = compiled_tsp(cities=test_example.cities, distances=test_example.distances)

predicted_route = predicted_result.route

predicted_distance = eval_tour(test_example.cities, predicted_route, test_example.distances)

print(f"Predicted route: {predicted_route}")
print(f"Total distance of the predicted route: {predicted_distance}")

optimal_route = test_example.route
optimal_distance = eval_tour(test_example.cities, optimal_route, test_example.distances)
print(f"Optimal route: {optimal_route}")
print(f"Total distance of the optimal route: {optimal_distance}")

In [None]:
print(numerical_test_example)
path, distance = random_baseline(numerical_test_example["distance_matrix"])
print(f"path is {path}")
print(f"distance is {distance}")

Random baseline eval:

In [None]:
total_dis = 0
for i in range(TEST_INSTANCES):
    curr_example = test_ds[i]
    _, distance = random_baseline(curr_example["distance_matrix"])
    total_dis += distance
print(f"(RANDOM) total distance is {total_dis}")
print(f"(RANDOM) average distance is {total_dis/TEST_INSTANCES}")

In [None]:
zs_TSP = evaluater(TSP())
print(f"(Zero Shot) average distance is {zs_TSP / len(tsp_testset)}")

Model eval:

In [None]:
print("(MODEL) average distance is 159.6")

Optimal route eval:

In [None]:
total_dis = 0
for i in range(TEST_INSTANCES):
    curr_example = test_ds[i]
    total_dis += curr_example["optimal_distance"]
print(f"(OPTIMAL) total distance is {total_dis}")
print(f"(OPTIMAL) average distance is {total_dis/TEST_INSTANCES}")