In [1]:
import sys
sys.path.append('.')
sys.path.append('..')
from problem_loader import ProblemLoader
from helpers import obfuscate

data_urls = {
    'tsp': 'https://d18ky98rnyall9.cloudfront.net/_ae5a820392a02042f87e3b437876cf19_nn.txt?Expires=1629676800&Signature=IUbkcfnz4NNlmN3S3Q~WVAfiCzdKHBccrV5bbWLs8vqhYTw-bc0qDYcr~rbwpruXYwOsj~fevQbreOVyxtN1rhDIhHq6CRy-XH93jmJvG11a3t3J9FlHRPpDzHZbJ9rGO9KZLE21kuTwlcr5gwlBn6txOlyZ25qs8wkcesJoh6A_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A'
}

# Question 1

In this assignment we will revisit an old friend, the traveling salesman problem (TSP).  This week you will implement a heuristic for the TSP, rather than an exact algorithm, and as a result will be able to handle much larger problem sizes.  Here is a data file describing a TSP instance (original source: http://www.math.uwaterloo.ca/tsp/world/bm33708.tsp).
nn.txt

The first line indicates the number of cities. Each city is a point in the plane, and each subsequent line indicates the x- and y-coordinates of a single city.

TThe distance between two cities is defined as the Euclidean distance --- that is, two cities at locations $(x,y)$ and $(z,w)$ have distance $\sqrt{(x-z)^2 + (y-w)^2}$​ between them.  

You should implement the nearest neighbor heuristic:

1.  Start the tour at the first city.
2.  Repeatedly visit the closest city that the tour hasn't visited yet.  In case of a tie, go to the closest city with the lowest index.  For example, if both the third and fifth cities have the same distance from the first city (and are closer than any other city), then the tour should begin by going from the first city to the third city.
3.  Once every city has been visited exactly once, return to the first city to complete the tour.

In the box below, enter the cost of the traveling salesman tour computed by the nearest neighbor heuristic for this instance, rounded down to the nearest integer.

### Hint: 
when constructing the tour, you might find it simpler to work with squared Euclidean distances (i.e., the formula above but without the square root) than Euclidean distances.  But don't forget to report the length of the tour in terms of standard Euclidean distance.

In [2]:
from collections import namedtuple

City = namedtuple('City', ['x', 'y', 'index'])

def process_cities(data):
  v = []
  for edge in data.split(b'\n'):
    sa = edge.decode('utf-8').split(' ')
    if len(sa) > 1:
        v.append(City(x=float(sa[1]), y=float(sa[2]), index=int(sa[0]))) # float in python is double
  return v

cities = ProblemLoader(
    data_urls['tsp'], 
    fname="tsp.p", 
    preprocessor=process_cities,
).fetch()
print(len(cities), cities[:10])
# note that the second city is closer to the first city than the third city

33708 [City(x=9983.3333, y=98550.0, index=1), City(x=10000.0, y=98533.3333, index=2), City(x=10000.0, y=98550.0, index=3), City(x=10000.0, y=98566.6667, index=4), City(x=10016.6667, y=98516.6667, index=5), City(x=10033.3333, y=98533.3333, index=6), City(x=10033.3333, y=98550.0, index=7), City(x=10033.3333, y=98583.3333, index=8), City(x=10050.0, y=98550.0, index=9), City(x=10066.6667, y=98516.6667, index=10)]


In [3]:
def get_distance(left, right):
  """ return the euclidean distance between tuples left and right, which are coordinates"""
  return ((left.x - right.x) ** 2 + (left.y - right.y) ** 2) ** 0.5


In [4]:
# 1.  Start the tour at the first city. 
tour = [cities[0]]

In [5]:
from math import inf

# 2.  Repeatedly visit the closest city that the tour hasn't visited yet. 
def nearest_neightbor(graph, node):
  """ return the nearest city to the node in the graph (excluding the node itself) """
  candidate = None

  # traverse the graph which is sorted along the x-axis
  # splitting the graph in half to avoid traversing the left half unnecessarily
  i = graph.index(node)
  left = list(reversed(graph[:i]))
  right = graph[i + 1:]
  ll = len(left)
  lr = len(right)
  
  min_x = inf
  j = 0
  while True:
    if j < ll: # if we have not exhausted the entries with x less than the current node
      city = left[j]
      if (node.x - city.x) > min_x: # once we have exhausted the entries with x_distance less than min_x, we can stop traversing this list
        left = []
        ll = 0
      else:
        d = get_distance(city, node)
        better_distance = (d < min_x) or (candidate is None)
        # In case of a tie, go to the closest city with the lowest index.
        earlier = (d == min_x) and (candidate.index > city.index)
        if better_distance or earlier:
          candidate = city
          min_x = d
    if j < lr: # if we have not exhausted the entries with x greater than the current node
      city = right[j]
      if (city.x - node.x) > min_x: 
        right = []
        lr = 0
      else:
        d = get_distance(city, node)
        if ((d < min_x) or (candidate is None)) or ((d == min_x) and (candidate.index > city.index)):
          min_x = d
          candidate = city
    if j >= lr and j >= ll:
      break
    j += 1
  return candidate   
  
graph = cities[:]
while len(tour) < len(cities):
  city = nearest_neightbor(graph, tour[-1])
  graph.remove(tour[-1])
  tour.append(city)


In [7]:
# 3.  Once every city has been visited exactly once, return to the first city to complete the tour.
def tour_cost(tour):
  """ sum of distances between each sequential pair in the tour"""
  return sum([get_distance(tour[i - 1], tour[i]) \
    for i in range(1, len(tour))]) + get_distance(tour[-1], tour[0])

print(tour[1].index, tour[100].index, tour[1000].index, tour[10000].index)


3 166 1205 29596
