# <font color="blue">Submitted by: Kaspar Kadalipp </font>
# HW9. Search heuristics and TSP optimization

### <font color='orange'> Less important code is placed here</font>
### <font color='orange'> Report is below </font>

In [1]:
import math
import random
from queue import PriorityQueue
import cv2

In [2]:
inf = float('inf')
ex1_data = """circle 10 10 3 blue
circle 240 240 3 red
text "A" 15 20 20 black
text "B" 225 235 20 black
rectangle 0 50 150 60 pink
rectangle 0 110 50 125 pink
rectangle 70 110 250 125 pink
rectangle 100 125 130 220 pink
rectangle 160 150 180 250 pink"""

ex3_data = """circle 500 920 8 blue
circle 500 80 8 red
circle 100 250 75 lightgreen
circle 300 250 75 lightgreen
circle 500 250 75 lightgreen
circle 700 250 75 lightgreen
circle 900 250 75 lightgreen
rectangle 100 400 900 500 lightblue
rectangle 0 400 100 500 lightgreen
rectangle 900 400 1000 500 lightgreen
rectangle 0 600 400 650 pink
rectangle 600 600 1000 650 pink
rectangle 200 700 800 750 pink
line 200 100 800 100 2 pink
line 200 100 200 0 2 pink
line 1000 1000 700 800 7 pink
line 750 950 600 700 7 pink
text "A" 510 930 20 black
text "B" 510 80 20 black"""


def interpretColor(color):
    if color == 'pink':
        return 'wall'
    if color == 'lightgreen':
        return 'swamp'
    if color == 'lightblue':
        return 'sea'
    return inf


def ex1_graph():
    graph = [[inf for _ in range(250)] for _ in range(250)]
    for line in ex1_data.split("\n"):
        values = line.strip().split(" ")
        if values[0] == "rectangle":
            name, x_start, y_start, x_end, y_end, color =  values
            for x in range(int(x_start), int(x_end)):
                for y in range(int(y_start), int(y_end)):
                    graph[x][y] = 'wall'
    return graph

def ex3_graph():
    graph = [[inf for _ in range(1000)] for _ in range(1000)]
    img = cv2.imread('big_world.png', 0)
    for row, values in enumerate(img):
        for col, val in enumerate(values):
            color = ''
            if val == 204:
                color = 'lightblue'
            if val == 212:
                color = 'pink'
            if val == 199:
                color = 'limegreen'
            graph[col][row] = interpretColor(color)
    return graph

def manhattanDistance(node1, node2):
    x1, y1 = node1
    x2, y2 = node2
    return abs(x1 - x2) + abs (y1 - y2)

def chebyshevDistance(node1, node2):
    x1, y1 = node1
    x2, y2 = node2
    return max(abs(x1 - x2), abs (y1 - y2))

def euclideanDistance(node1, node2):
    x1, y1 = node1
    x2, y2 = node2
    return math.sqrt((x1 - x2)**2 + (y1 - y2)**2)

def getAdjacentManhattan(vertex):
    x, y = vertex
    return [(x-1, y), (x+1, y), (x, y-1), (x, y+1)]

def getAdjacentChebyshev(vertex):
    x, y = vertex
    return [(x-1, y), (x-1, y-1), (x, y-1), (x+1, y-1), (x+1, y), (x+1, y+1), (x, y+1), (x-1, y+1)]

def minimumDistance(graph, adjacent):
    size = len(graph)
    result = []
    for row, col in adjacent:
        if 0 <= row < size and 0 <= col < size:
            if graph[row][col] != 'wall':
                result.append(graph[row][col])
    return min(result)

def adjacentVertices(graph, adjacent):
    result = []
    size = len(graph)
    for row, col in adjacent:
        if 0 <= row < size and 0 <= col < size:
            if graph[row][col] != 'wall':
                result.append((row, col))
    return result

def visited_vertices(result, graph, distance, task):
    with open(f'{task}_{distance}.txt', 'w+') as f:
        for x,y in result:
            f.write(f"{x} {y}\n")

def graphvis_visisted_vertices(result, graph, distance, task):
    if task == 'ex3':
        map = ex3_data
    else:
        map = ex1_data
    with open(f'{task}_visited_{distance}.txt', 'w+') as f:
        for x,y in result:
            f.write(f"circle {x} {y} 1 orange\n")
        f.write(map)

def graphvis_path(result, graph, goal, distance, task, iterations = 1):
    if distance == "manhattan":
        adjacentIndexes = getAdjacentManhattan
    elif distance == "chebyshev":
        adjacentIndexes = getAdjacentChebyshev
    else:
        print("invalid distance type")
        return

    if task == 'ex3':
        map = ex3_data
    else:
        map = ex1_data

    last = goal
    goal_x, goal_y = goal
    if graph[goal_x][goal_y] == 'wall':
        print("Goal can't be a wall")
        return
    path = set()
    for _ in range(iterations):
        min_distance = graph[goal_x][goal_y]
        while min_distance > 0:
            vertices = adjacentVertices(graph, adjacentIndexes(last))
            min_distance = minimumDistance(graph, adjacentIndexes(last))
            next_best = [(x,y) for x, y in vertices if graph[x][y] == min_distance]
            last = random.choice(next_best)
            # last = random.choice([(x,y) for x, y in vertices if graph[x][y] == min_distance])
            # last = [(x,y) for x, y in vertices if graph[x][y] == min_distance][0]
            path.add(last)

    with open(f'{task}_path_{distance}.txt', 'w+') as f:
        for x,y in path:
            f.write(f"circle {x} {y} 1 orange\n")
        f.write(map)

In [3]:
# Exercise 1

def dijkstra(start, distance):
    if distance == "manhattan":
        adjacentIndices = getAdjacentManhattan
    elif distance == "chebyshev":
        adjacentIndices = getAdjacentChebyshev
    else:
        print("invalid distance type")
        return

    result = []
    graph = ex1_graph()
    visited = [[val == 'wall' for val in graph] for row in graph]

    start_x, start_y = start
    if graph[start_x][start_y] == 'wall':
        print("Start can't be a wall")
        return
    graph[start_x][start_y] = 0
    visited[start_x][start_y] = True
    queue = adjacentVertices(graph, adjacentIndices(start))
    while queue:
        vertex = queue.pop(0)
        x,y = vertex
        if visited[x][y]:
            continue

        visited[x][y] = True
        graph[x][y] = minimumDistance(graph, adjacentIndices(vertex)) + 1
        queue += adjacentVertices(graph, adjacentIndices(vertex))
        result.append((x,y))
    return result, graph

def ex1(start, goal, distance):
    vertex_list, distance_matrix = dijkstra(start, distance)
    visited_vertices(vertex_list, distance_matrix, distance, "ex1")
    graphvis_path(vertex_list, distance_matrix, goal, distance, "ex1")
    graphvis_visisted_vertices(vertex_list, distance_matrix, distance, "ex1")
    print(f"Distance from A to B is {distance_matrix[goal[0]][goal[1]]} pixels using {distance} distance.")

In [4]:
# Exercise 2

def prioritizedVertices(graph, start, goal, adjacent, distance, prev):
    result = []
    size = len(graph)
    for row, col in adjacent:
        if 0 <= row < size and 0 <= col < size:
            if graph[row][col] != 'wall':
                priority = prev + distance(goal, (row, col))
                result.append((priority,(row, col)))
    return result

def aStar(start, goal, distance):
    if distance == "manhattan":
        adjacent = getAdjacentManhattan
        distance_ = manhattanDistance
    elif distance == "chebyshev":
        adjacent = getAdjacentChebyshev
        distance_ = chebyshevDistance
    else:
        print("invalid distance type")
        return

    result = []
    graph = ex1_graph()
    visited = [[val == 'wall' for val in row] for row in graph]
    queue = PriorityQueue()
    start_x, start_y = start
    if graph[start_x][start_y] == 'wall':
        print("Start can't be a wall")
        return
    graph[start_x][start_y] = 0
    visited[start_x][start_y] = True
    for vertex in prioritizedVertices(graph, start, goal, adjacent(start), distance_, 0 ):
        queue.put(vertex)

    while not queue.empty():
        priority, vertex = queue.get()
        x,y = vertex
        if visited[x][y]:
            continue
        visited[x][y] = True
        graph[x][y] = minimumDistance(graph, adjacent(vertex)) + 1
        for val in prioritizedVertices(graph, start, goal, adjacent(vertex), distance_, graph[x][y]):
            queue.put(val)
        result.append(vertex)
        if vertex == goal:
            break
    return result, graph

def ex2(start, goal, distance):
    vertex_list, distance_matrix = aStar(start, goal, distance)
    visited_vertices(vertex_list, distance_matrix, distance, "ex2")
    graphvis_path(vertex_list, distance_matrix, goal, distance, "ex2")
    graphvis_visisted_vertices(vertex_list, distance_matrix, distance, "ex2")
    print(f"Distance from A to B is {distance_matrix[goal[0]][goal[1]]} pixels using {distance} distance.")

In [5]:
# Exercise 3

def minimumDistance_ex3(graph, visited, adjacent):
    size = len(graph)
    result = []
    for row, col in adjacent:
        if 0 <= row < size and 0 <= col < size:
            vertex = graph[row][col]
            if vertex != 'wall':
                if vertex == 'swamp' or vertex == 'sea':
                    continue
                else:
                    result.append(graph[row][col])
    return min(result)

def dijkstra_ex3(start, distance):
    if distance == "manhattan":
        adjacentIndices = getAdjacentManhattan
    elif distance == "chebyshev":
        adjacentIndices = getAdjacentChebyshev
    else:
        print("invalid distance type")
        return

    result = []
    graph = ex3_graph()
    visited = [[val == 'wall' for val in graph] for row in graph]

    start_x, start_y = start
    if graph[start_x][start_y] == 'wall':
        print("Start can't be a wall")
        return
    graph[start_x][start_y] = 0
    visited[start_x][start_y] = True
    queue = adjacentVertices(graph, adjacentIndices(start))
    while queue:
        vertex = queue.pop(0)
        x,y = vertex
        if visited[x][y]:
            continue

        visited[x][y] = True
        new_distance = minimumDistance_ex3(graph, visited, adjacentIndices(vertex))
        if graph[x][y] == 'sea':
            new_distance  += 4
        elif graph[x][y] == 'swamp':
            new_distance += 2
        else:
            new_distance += 1
        graph[x][y] = new_distance

        queue += [(x,y) for x,y in adjacentVertices(graph, adjacentIndices(vertex)) if not visited[x][y]]
        result.append((x,y))
    return result, graph

def ex3(start, goal, distance):
    vertex_list, distance_matrix = dijkstra_ex3(start, distance)
    visited_vertices(vertex_list, distance_matrix, distance, "ex3")
    graphvis_path(vertex_list, distance_matrix, goal, distance, "ex3")
    #graphvis_visisted_vertices(vertex_list, distance_matrix, distance, "ex3")
    print(f"Distance from A to B is {distance_matrix[goal[0]][goal[1]]} pixels using using {distance} distance.")

In [6]:
# Exercise 4

def knn(vertices, distance_matrix, start):
    current_vertex = start
    path = [start]
    visited = [False for val in range(len(vertices))] # Initialize all vertices as unvisited.
    total_distance = 0
    for i in range(len(vertices)-1):
        # Select an vertex, set it as the current vertex u. Mark u as visited.
        visited[current_vertex] = True
        # Find out the shortest edge connecting the current vertex u and an unvisited vertex v.
        added_distance, unvisited_vertex = min([(distance, vertex) for distance, vertex in distance_matrix[current_vertex] if not visited[vertex]])
        path.append(unvisited_vertex)
        total_distance += added_distance
         # Set v as the current vertex u
        current_vertex = unvisited_vertex
    total_distance += euclideanDistance(vertices[start], vertices[path[-1]])
    return path, total_distance

def repetitive_nn(size):
    with open(f'{size}.txt', 'r') as f:
        vertices = [tuple(int(val) for val in line.strip().split(" ")) for line in f.readlines()]
    distance_matrix = [[0 for col in range(len(vertices))] for row in range(len(vertices))]
    for row in range(len(vertices)):
        for col in range(row, len(vertices)):
            ed = euclideanDistance(vertices[row], vertices[col])
            distance_matrix[row][col] = (ed, col)
            distance_matrix[col][row] = (ed, row)

    best_path = []
    best_distance = inf
    for start, vertex in enumerate(vertices):
        new_path, new_distance = knn(vertices, distance_matrix, start)
        if new_distance < best_distance:
            best_path = new_path
            best_distance = new_distance
    return best_path, best_distance


def ex4(sizes):
    for size in sizes:
        path, distance = repetitive_nn(size)
        print(f'size = {size}, distance: {distance:.2f}, path: {path}')
        with open(f'{size}_knn.txt', 'w+') as f:
            for index in path:
                f.write(f'{index}\n')

In [1]:
# Exercise 5
def two_opt_switch(distance_matrix, vertices, prev_path, prev_distance):
    best_path = prev_path
    best_distance = prev_distance
    for i in range(len(prev_path)):
        for j in range(len(prev_path)):
            path = best_path[:]
            path[i], path[j] = path[j], path[i]
            distance = 0
            for k in range(1, len(path)):
                distance += distance_matrix[path[k-1]][path[k]][0]
            distance += euclideanDistance(vertices[path[0]], vertices[path[-1]])
            if distance < best_distance:
                best_path = path
                best_distance = distance
    return best_path, best_distance

def ex5_path_optimization(size):
    with open(f'{size}.txt', 'r') as f:
        vertices = [tuple(int(val) for val in line.strip().split(" ")) for line in f.readlines()]
    distance_matrix = [[0 for col in range(len(vertices))] for row in range(len(vertices))]
    for row in range(len(vertices)):
        for col in range(row, len(vertices)):
            ed = euclideanDistance(vertices[row], vertices[col])
            distance_matrix[row][col] = (ed, col)
            distance_matrix[col][row] = (ed, row)

    best_path = []
    best_distance = inf
    for start, vertex in enumerate(vertices):
        new_path, new_distance = knn(vertices, distance_matrix, start)
        new_path, new_distance = two_opt_switch(distance_matrix,vertices, new_path, new_distance)
        if new_distance < best_distance:
            best_path = new_path
            best_distance = new_distance
    return best_path, best_distance

def ex5(sizes):
    for size in sizes:
        path, distance = ex5_path_optimization(size)
        print(f'size = {size}, distance: {distance:.2f}, path: {path}')
        with open(f'{size}_knn_optimised.txt', 'w+') as f:
            for index in path:
                f.write(f'{index}\n')

# EX1

##### Getting started with pathfinding in the small virtual world. The first task is to implement Dijkstra algorithm to find the shortest path from A (blue) to B (red) on the following small scale world (250x250). Pink is a block, you can not go through it! Report the shortest distance from A to B and the time it took to run the algorithm. Visualize the final path and pixels that were visited by the algorithm at any time. Comment on your results.

##### To generate the image for the world use the following <a href=https://abercus.github.io/searchvis/>web tool</a>. Set the world size to 250x250 at the top and use the following object instructons to get the small world:

I implemented Dijkstra algorithm using Manhattan and Chebyshev distance.

Here I reconstructed the map from keywords and coordinates, since the map is rather simple. But in ex3 I detected the colors from the image file.

The results seem correct. The shown paths aren't the only minimal length paths since when I randomly picked the next closest vertex it resulted in a squiggly line.

In [8]:
%timeit dijkstra(start=(10,10), distance="manhattan")

93.2 ms ± 2.27 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
%timeit dijkstra(start=(10,10), distance="chebyshev")

616 ms ± 29.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
ex1(start=(10,10), goal=(240,240), distance="manhattan")
ex1(start=(10, 10), goal=(240, 240), distance="chebyshev")

Distance from A to B is 764 pixels using manhattan distance.
Distance from A to B is 550 pixels using chebyshev distance.


![ex1](https://i.imgur.com/yusywV6.png)

# EX2

##### Implement A* search algorithm on the small world (the same as above). Similarly to the previous exercise report: execution time and the shortest distance. Visualize the visited area and the final path. Comment on the differences between Dijkstra and A*!

I used sum of distance traveled and estimated distance to goal as the heuristic. The number of pixels visited is slightly smalled compared to Dijkstra, but this could be reduced with a different heuristic. Some suboptimal and non-admissible heuristic functions resulted in the image looking like a piece of modern art.

Compared to Dijkstra A* finds the path without checking values of all pixels. A* has the advantage of knowing the goal and can use that info to optimise the path. A* is faster than Dijkstra but not for such small worlds in my case, using priority queue slowed down the algorithm quite a bit.

In [11]:
%timeit aStar(start=(10,10), goal=(240, 240), distance="manhattan")

489 ms ± 13.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
%timeit aStar(start=(10,10), goal=(240, 240), distance="chebyshev")

856 ms ± 20 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
ex2(start=(10, 10), goal=(240, 240), distance="manhattan")
ex2(start=(10, 10), goal=(240, 240), distance="chebyshev")

Distance from A to B is 764 pixels using manhattan distance.
Distance from A to B is 550 pixels using chebyshev distance.


![ex2](https://i.imgur.com/gbcNEvz.png)

# EX3

##### Now let’s consider the bigger version of the virtual world (1000x1000), which is provided below (Figure 4). The goal is still the same - find the path from A (BLUE) to B (RED). But now with some additional obstacles: SWAMP and SEA. SWAMP in light green, is 2 times slower compared to usual. SEA in light blue and is 4 times slower than the usual (taking one step in a sea, is equal to taking 4 steps on normal land). Measure and report the search time and the shortest distance. Visualize the visited area and the final path. How sea and swamp affect the final trajectory?

##### Hint: make sure to use efficient data structures for keeping visited/discovered nodes, e.g. consider the priority queue for A*. Using inefficient data structures can end up your search running for a really long time!. Another way to speed up your search is by making steps longer. So, instead of considering every immediate nearest neighbor as a potential next step, consider only nodes that are 2-3 pixels away (or even 10-20 pixels).

##### Hint II: for the slower regions like the sea or swamp decide the distance or time spent on each step based on the slow-down in the location of the target pixel.

##### The instructions to build a bigger version of the virtual world are provided in the file and below (<a href=https://raw.githubusercontent.com/PuuraJ/AdvancedAlgorithmics/master/big_world.txt>URL</a>). Feel free to either ignore or remove the “text” lines. The world size is 1000x1000

I used Dijkstra's algorithm to calculate all distances. For swamp and sea I added either +1 or +3 to the manhattan distance to accommodate the slow-down. Despite this it seems with manhattan distance the optimal path goes through the slow sea. However, the swamp was avoided.

Because the larger map was slightly more complex than the previous one I detected elements using color values of the image using OpenCV.

In [14]:
ex3(start=(500, 80), goal=(500, 920), distance="manhattan")

Distance from A to B is 2323 pixels using using manhattan distance.


![ex3](https://i.imgur.com/QwBusAv.png)

-----
#### Traveling Salesman Problem, Optimization

##### Take a look at the coordinates of hypothetical "cities" - <a href=https://courses.cs.ut.ee/2022/algorithmics/fall/uploads/Main/tsp.zip> TSP.zip</a>. Our goal is to find the shortest route through all cities (solve the TSP). Assume that there exists a direct road between any pair of cities (Euclidean distance; two points p1=(100,100), p2=(103,104) will have distance (p1, p2)=5 ).

# EX4

##### Run the Nearest Neighbour (NN) algorithm to generate a route through all cities starting from any node (e.g. from node 0). Your route should also end in this node. If distances tie, choose the first city. Report the shortest tour and time it took to complete it for cities of size 10, 20 and 100. Visualize resulting paths either using a webtool: <a href=https://abercus.github.io/tspvis/>https://abercus.github.io/tspvis/</a> or your own code.

##### To get a better feeling of the algorithm, feel free to try it also "on paper" for small size problems (e.g. 10 or 20-cities task). Implementation is preferred, of course, feeding to the next problems.


I applied NN algorithm with every possible starting vertex and chose the shortest route from the results.

In [15]:
%timeit repetitive_nn(10)

128 µs ± 3.96 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [16]:
%timeit repetitive_nn(20)

536 µs ± 7.76 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [17]:
%timeit repetitive_nn(100)

40.4 ms ± 542 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [18]:
ex4(sizes = [10, 20, 100])

size = 10, distance: 2261.66, path: [2, 0, 4, 8, 1, 3, 6, 5, 9, 7]
size = 20, distance: 4479.17, path: [4, 9, 7, 12, 10, 11, 5, 15, 17, 18, 1, 19, 16, 3, 14, 0, 13, 2, 6, 8]
size = 100, distance: 8449.27, path: [32, 30, 13, 34, 1, 49, 75, 64, 68, 66, 76, 56, 48, 63, 40, 39, 91, 43, 84, 82, 21, 89, 65, 94, 80, 22, 58, 46, 54, 78, 2, 26, 9, 33, 28, 98, 57, 8, 3, 69, 61, 83, 67, 71, 62, 6, 86, 7, 97, 14, 60, 5, 27, 47, 52, 23, 25, 79, 17, 45, 95, 10, 20, 77, 0, 74, 29, 24, 88, 4, 85, 18, 59, 44, 11, 42, 51, 19, 37, 36, 31, 90, 92, 50, 99, 93, 70, 53, 81, 15, 12, 73, 87, 38, 41, 55, 72, 35, 96, 16]


![ex4](https://i.imgur.com/3LQVW70.png)

# EX5

##### Implement an optimization method of your choice (e.g. simulated annealing, genetic algorithm, ant colony optimization, hill climbing, tabu search etc.) on provided cites. For the sake of acceptable performance, you may calculate all possible distances before running your algorithm (distances between each two cities). It is recommended to work on small size problems (10, 20 nodes) first. As usual - report resulting path, its length, and execution time. Comment on comparison with nearest neighbour result.

##### Hint: Many of the heuristic search algorithms described in the lecture, require the notion of "neighbourhood". Think of your current state as a sequence of cities, e.g. 1-2-3-4-5 (the resulting path). Then one way you can describe a neighbourhood of a certain sequence, is to try all possible swaps of two cities (or only sequential ones). Two possible example of a neighbour for 1-2-3-4-5 could then be 2-1-3-4-5, 1-3-2-4-5. Of course, you can describe neighbourhood in some other way as well, but you most likely don't want to consider every possible permutation as your neighbour! If you can generate the neighbours, then for example when implementing hill climb/simulated annealing/..., you would choose your next state to be the best result out of all the neighbours (the sequence with shortest total distance).


I optimised the path by applying 2-opt improvements on each iteration of NN. Distance improved with 20 and 100 nodes compared to NN. Result for 10 nodes was identical to the previous exercise.

In [19]:
%timeit ex5_path_optimization(10)

1.28 ms ± 33.8 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [20]:
%timeit ex5_path_optimization(20)

14.2 ms ± 1.12 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [21]:
%timeit ex5_path_optimization(100)

6.31 s ± 375 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [22]:
ex5([10, 20, 100])

size = 10, distance: 2261.66, path: [2, 0, 4, 8, 1, 3, 6, 5, 9, 7]
size = 20, distance: 4113.34, path: [5, 11, 10, 9, 7, 12, 15, 17, 18, 1, 19, 16, 3, 14, 0, 6, 8, 2, 13, 4]
size = 100, distance: 8119.80, path: [32, 30, 13, 34, 1, 49, 75, 64, 68, 66, 76, 56, 48, 63, 40, 39, 91, 43, 84, 97, 21, 89, 65, 94, 80, 22, 58, 46, 54, 78, 2, 26, 9, 33, 28, 98, 57, 8, 3, 69, 61, 83, 67, 71, 62, 6, 86, 7, 82, 14, 60, 5, 27, 95, 52, 47, 23, 45, 17, 79, 25, 10, 20, 77, 4, 74, 0, 88, 24, 29, 85, 59, 18, 11, 44, 42, 51, 19, 36, 37, 31, 90, 92, 93, 99, 50, 70, 53, 81, 15, 12, 73, 87, 38, 41, 55, 72, 35, 96, 16]


![ex5](https://i.imgur.com/bI3bjLy.png)