# Total Time Complexity: O(n**2)



# Import Libraries

In [None]:
# import pandas as pd
import numpy as np
import pandas as pd
import seaborn as sns
# import library for visualizing data 
import matplotlib.pyplot as plt
import os
print(os.getcwd())
print(os.listdir("../input"))

# Import Data


In [None]:
#read in csv file into 
cities = pd.read_csv('../input/traveling-santa-2018-prime-paths/cities.csv')

# avoid run time errors - bug fix for display formats
pd.set_option('display.float_format', lambda x:'%f'%x)
cities = cities.apply(pd.to_numeric, errors='coerce')

# Create csv file with 10% of total city

In [None]:
#function to get 10% data from original dataset
def divide_record(n):
    #create list to store Id,X,Y of each city
    CityId=["null" for i in range(n+1)]
    City_X=["null" for i in range(n+1)]
    City_Y=["null" for i in range(n+1)]
    #adding data from cities file to list
    for i in range(n+1):
        CityId[i]=i
        City_X[i]=cities['X'][i]
        City_Y[i]=cities['Y'][i]
    #initialized a dictionary to with key and value from 3 lists 
    dict = {'CityId': CityId, 'X': City_X, 'Y': City_Y}  
    df = pd.DataFrame(dict) 
    #write data from dataframe to csv file
    df.to_csv('mini_cites.csv', index=False)

In [None]:
#divide cities file to 10% 
divide_record(max(cities.CityId)//10+1)

In [None]:
#read in csv file into 
mini_cities = pd.read_csv('mini_cites.csv', low_memory=False,dtype={'X': np.float64, 'Y': np.float64}) #increase efficiency


# avoid run time errors - bug fix for display formats
pd.set_option('display.float_format', lambda x:'%f'%x)
mini_cities = mini_cities.apply(pd.to_numeric, errors='coerce')

In [None]:
mini_cities.head()

# Prime Cities

To find the prime city, we use a algorithm call "Sieve of Eratosthenes" which will running a loop for all city. For every number k>=2, if I is prime and I multiple k less than or equal n, we set prime[i*k] as a non-prime number. Finally, the algorithm will retrieve a list with Boolean data type and show that corresponding city with CityId in list is True if it is prime city and False if it is not prime city.

In [None]:
#function to find which are prime cities
def find_primes(n):
    primes = [True for i in range(n+1)] # Start assuming all numbers are primes
    primes[0] = False # 0 is not a prime
    primes[1] = False # 1 is not a prime
    for i in range(2,int(np.sqrt(n)) + 1):
        if primes[i]:
            k = 2
            while i*k <= n:
                primes[i*k] = False
                k += 1
    return(primes)


In [None]:
mini_cities['is_prime'] = find_primes(max(mini_cities.CityId))


In [None]:
prime_cities = find_primes(max(mini_cities.CityId))


In [None]:
mini_cities.head()

# Visualization of citites

We using plot to visualize the map of all city. The North Pole city will be mark by a big red circle. The prime city will be presented by purple circle while non-prime city will be presented by blue circle

In [None]:
%matplotlib notebook
fig = plt.figure(figsize=(10,10))
plt.scatter(mini_cities[mini_cities['CityId']==0].X , mini_cities[mini_cities['CityId']==0].Y, s= 200, color = 'red')
plt.scatter(mini_cities[mini_cities['is_prime']==True].X , mini_cities[mini_cities['is_prime']==True].Y, s= 0.8, color = 'purple')
plt.scatter(mini_cities[mini_cities['is_prime']==False].X , mini_cities[mini_cities['is_prime']==False].Y, s= 0.1)
plt.grid(False)


# Distance between two cities


In [None]:
#function to calculate distance between two cities by using euclidean distance
def pair_distance(x,y):
    x1 = (mini_cities.X[x] - mini_cities.X[y]) ** 2
    x2 = (mini_cities.Y[x] - mini_cities.Y[y]) ** 2
    return np.sqrt(x1 + x2)

# Calculate total distance 

In [None]:
def total_distance(path):
    distance = [pair_distance(path[x], path[x+1]) + 0.1 * pair_distance(path[x], path[x+1])
    if (x+1)%10 == 0 and mini_cities.is_prime[path[x]] == False else pair_distance(path[x], path[x+1]) for x in range(len(path)-1)]
    return np.sum(distance)

# Dumbest Path 

In [None]:
dumbest_path = mini_cities['CityId'].values
#add North Pole add the end of trip
dumbest_path =  np.append(dumbest_path,0)

In [None]:
print('Total distance with the sorted city path is '+ "{:,}".format(total_distance(dumbest_path)))

# Using Queue with binary tree

**Angorithm 1**

The first algorithm I used is build a binary tree with all node are CityId. . To build this tree, first we need to add all CityId to one deques. After that, with the linked_list, we will create each node for the tree by pop each CityId out of the deques. Then, we will arrange all node of the tree in the “Inorder Traversal” into a list. Then we pass this path list to our distance calculation function and we get the result. Because of the order of city is randomly so this algorithm not retrieve a good result.

In [None]:
## import file
os.chdir("../input/external/")
from linked_binary_tree import LinkedBinaryTree
from collections import deque
#create list contain all of CityId and passing to queue
path=[]
for x in range(max(mini_cities.CityId)+1):
        path.append(x)
array = deque(path)
#remove the CityId=0 
array.popleft()
#Create list of CityId from tree node
tree_path=[0]
def tree_build():
    #list to store node position
    node=[]
    tree = LinkedBinaryTree()
    node.append(tree._add_root(array.popleft()))
    #add node to the tree
    for i in range(len(path)):
        try:
            node.append(tree._add_left(node[i],array.popleft()))
            node.append(tree._add_right(node[i],array.popleft()))
        except:
            pass
    #get element of each node and add to final path
    if not tree.is_empty():
      for p in tree._subtree_inorder(tree.root()):
        tree_path.append(p.element())
      tree_path.append(0)
tree_build()

In [None]:
print('Total distance with the sorted city path is '+ "{:,}".format(total_distance(tree_path)))

# Using Insertion sort with X value

**Algorithm 2**

This algorithm is found in the "Data Structures and Algorithms in Python" textbook in chapter 5, on page 215 (237 on the pdf). The way a insertion sort algorithm works is by comparing the first 2 elements of a sequence and switching the elements if the second element is smaller, so that the smaller of the two elements come before the larger. The algorithm then moves up a step comparing the second and third element of the sequence switching the elements if the second element is smaller, so that the smaller of the two elements come before the larger. The algorithm moves up the sequence until it reaches the end, it then starts at the beginning and repeats the process until the entirety of the list has been arranged in an ascending order.  The insertion sort will be applied to the list of CityId but when comparing to sort, we will compare the X value of CityId store in list. The result of this algorithm is a list of CityId in order that increase of X value.

In [None]:
sortx_path=[]
for x in range(1,max(mini_cities.CityId)+1):
        sortx_path.append(x)

In [None]:
City_X=[]
for x in range(max(mini_cities.CityId)+1):
    City_X.append(mini_cities['X'][x])

In [None]:
def insertionSort(arr): 
    # Traverse through 1 to len(arr) 
    for i in range(1,len(arr)): 
        key = arr[i] 
        j = i-1
        while j >=0 and City_X[key] < City_X[arr[j]] : 
                arr[j+1] = arr[j] 
                j -= 1
        arr[j+1] = key 

In [None]:
insertionSort(sortx_path)

In [None]:
#create a path for calculating total distance
sortedx_path=[0]
for each in range(len(sortx_path)):
    sortedx_path.append(sortx_path[each])
sortedx_path.append(0)

In [None]:
print('Total distance with the sorted city path is '+ "{:,}".format(total_distance(sortedx_path)))

# Selection sort with Y value

**Algorithm 3**

The selection sort algorithm sorts an array by repeatedly finding the minimum element (considering ascending order) from unsorted part and putting it at the beginning. By using this algorithm, we will be sorting the CityId based on Y value. The result of this algorithm is a list of CityId in order that increase of Y value.

In [None]:
sorty_path=[]
for x in range(1,max(mini_cities.CityId)+1):
        sorty_path.append(x)

In [None]:
City_Y=[]
for x in range(max(mini_cities.CityId)+1):
    City_Y.append(mini_cities['Y'][x])

In [None]:
def selectionsort(alist):

   for i in range(len(alist)):

      # Find the minimum element in remaining
       minPosition = i

       for j in range(i+1, len(alist)):
           if City_Y[alist[minPosition]] > City_Y[alist[j]]:
               minPosition = j
                
       # Swap the found minimum element with minPosition       
       temp = alist[i]
       alist[i] = alist[minPosition]
       alist[minPosition] = temp


In [None]:
selectionsort(sorty_path)

In [None]:
#create a path for calculating total distance
sortedy_path=[0]
for each in range(len(sorty_path)-1):
    sortedy_path.append(sorty_path[each])
sortedy_path.append(0)

In [None]:
print('Total distance with the sorted city path is '+ "{:,}".format(total_distance(sortedy_path)))

# Using nearest neighbor algorithm

**Algorithm 4**

This algorithm is one of most suitable for Travelling problem, I found this algorithm by go through some example kernel in the project. This algorithm works by finding the closest neighbour of previous city and then finding the closest neighbour of the previous cities closest neighbour. To apply this algorithm, we will initialize a list of CityId and one array contain list of X, Y. We are running a “while” loop for each id, and get the X, Y of the last city in path list. Then we calculate all distance from other cities to the last city and get the city with shortest distance to add to the path. We also need to remove this city out of id list and coordinates array. Finally, we just add the id of North Pole city and pass the final path to the calculation distance function.

In [None]:
def nearest_neighbour():
    ids = mini_cities.CityId.values[1:]
    xy = np.array([mini_cities.X.values, mini_cities.Y.values]).T[1:]
    path = [0,]
    while len(ids) > 0:
        last_x, last_y = mini_cities.X[path[-1]], mini_cities.Y[path[-1]]
        dist = ((xy - np.array([last_x, last_y]))**2).sum(-1)
        nearest_index = dist.argmin()
        path.append(ids[nearest_index])
        ids = np.delete(ids, nearest_index, axis=0)
        xy = np.delete(xy, nearest_index, axis=0)
    path.append(0)
    return path

nnpath = nearest_neighbour()

In [None]:
print('Total distance with the Nearest Neighbor path '+  "is {:,}".format(total_distance(nnpath)))

In [None]:
print(os.getcwd())
os.chdir("/kaggle/working")

In [None]:
def submission():
    dict = {'Path': nnpath}  
    df = pd.DataFrame(dict) 
    #write data from dataframe to csv file
    df.to_csv('Final_Submission.csv', index=False)

In [None]:
submission()

In [None]:
#Visualize the traveling path of Nearest Neighbor algorithm
%matplotlib notebook
df_path = pd.DataFrame({'CityId':nnpath}).merge(mini_cities,how = 'left')
fig, ax = plt.subplots(figsize=(10,10))
ax.plot(df_path['X'], df_path['Y'])

In summary, this algorithm shows that the most efficiency path with 462,595.62466666184 units long which nearly 98% shorter than the path without sorting is 44,304,926.33344268.