In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random 
from rtree import index


https://www.kaggle.com/c/traveling-santa-2018-prime-paths

1. cost and prime functions taken from https://www.kaggle.com/tylerchenchen/understanding-the-problem-and-some-sample-paths and modified
2. greedy version optimizing in each step using rtree
3. incorporating 10% extra when reaching a multiple of ten step and not starting from prime city.

In [None]:
#df_cities = pd.read_csv('../cities.csv')
df_cities = pd.read_csv('../input/traveling-santa-2018-prime-paths/cities.csv')

In [None]:
coords = list(zip (df_cities.X, df_cities.Y))

## Prime cities

In [None]:
def sieve_of_eratosthenes(n):
    primes = [True for i in range(n+1)] # Start assuming all numbers are primes
    primes[0] = False # 0 is not a prime
    primes[1] = False # 1 is not a prime
    for i in range(2,int(np.sqrt(n)) + 1):
        if primes[i]:
            k = 2
            while i*k <= n:
                primes[i*k] = False
                k += 1
    return(primes)

prime_cities = sieve_of_eratosthenes(max(df_cities.CityId))

In [None]:
[sieve_of_eratosthenes(9)]

## Cost function

In [None]:
#Every 10th step (stepNumber % 10 == 0) is 10% more lengthy unless coming from a prime CityId.

In [None]:
def euclidian_distance(origin, destiny):
    return ((destiny[0]-origin[0])**2 + (destiny[1]-origin[1])**2)**(1/2)

In [None]:
def total_distance(coords, ordered_cities, prime_cities):
    total_distance=0
    for step in range( len(ordered_cities)-1):
        #print(step)
        origin = coords[ordered_cities[step]]
        destiny = coords[ordered_cities[step +1]]
        euc_dist = euclidian_distance(origin, destiny)
        extra_cost =  1 + 0.1 * ((step+1) % 10 == 0) * (prime_cities[ordered_cities[step]] == False)
        total_distance += euc_dist * extra_cost  
    return total_distance    

## Greedy version

In [None]:
l_cities = list(zip(df_cities.X, df_cities.Y) )
tree = index.Index(properties=index.Property())
res = [0]

for i, coord in enumerate(l_cities[1:]):
    tree.add(i+1, coord)
    
origin = 0
counter=1
res = [origin]
while counter < len(l_cities):
    if counter % 10000 == 0:
        print(counter)
    coord_origin = l_cities[origin] 
    destiny = list(tree.nearest((coord_origin),1))[0]
    tree.delete(destiny,l_cities[destiny])
    res.append(destiny)
    origin = destiny
    counter += 1

In [None]:
ordered_cities = res + [0]
print('Total distance with the sorted city path is '+ "{:,}".format(total_distance(coords, ordered_cities, prime_cities)))
#1,812,602

In [None]:
df_path = pd.DataFrame({'CityId':ordered_cities}).merge(df_cities,how = 'left')
fig, ax = plt.subplots(figsize=(20,20))
ax.plot(df_path['X'], df_path['Y'])

In [None]:
fig, ax = plt.subplots(figsize=(7,7))
elems = 1000
ax.plot(df_path.iloc[:elems,]['X'], df_path.iloc[:elems,]['Y'],marker = 'o')
ax.scatter(df_cities['X'][0], df_cities['Y'][0], c ='black', marker = 'p', s= 900)
ax.plot(df_path.iloc[-elems*2: -elems,]['X'], df_path.iloc[-elems*2: -elems,]['Y'],marker = 'o',c ='violet')
ax.plot(df_path.iloc[-elems:,]['X'], df_path.iloc[-elems:,]['Y'],marker = 'o',c ='red')



### incorporating 10% extra by doing 2 steps ahead to decide to go to prime city or not depending on the next min distance

In [None]:
def distances(l_cities, origin, destiny, tree):
    # calculates the next city (2 steps) and the distance to the following 2 cities
    coord_origin = l_cities[origin]
    coord_destiny = l_cities[destiny]
    next_cities = list(tree.nearest((coord_destiny),3))
    next_cities.remove(destiny)
    if origin in next_cities: 
        next_cities.remove(origin)
    next_city = next_cities[0]
    distance_to_destiny = euclidian_distance(coord_origin, coord_destiny)
    distance_from_destiny_to_next = euclidian_distance(coord_destiny, l_cities[next_city])
    return [distance_to_destiny, distance_from_destiny_to_next]





def greedy_with_cost (l_cities, prime_cities):
    tree = index.Index(properties=index.Property())
    res = [0]
    for i, coord in enumerate(l_cities[1:]):
        tree.add(i+1, coord)

    num_options = 20
    origin = 0
    counter=1
    res = [origin]


    while counter < len(l_cities):
        if counter % 10000 == 0:
            print(counter)

        coord_origin = l_cities[origin] 

        if counter % 10 != 9:
            destiny = list(tree.nearest((coord_origin),1))[0]  

        else: # in 9th step
            options = list(tree.nearest((coord_origin), num_options))
            near_primes = [option for option in options if prime_cities[option]]
            destiny = options[0] #initialize destiny

            if len(near_primes)>0 and (destiny not in near_primes):           
                # compares distance to destiny + next vs distance to all the num_options that are prime + their next 
                [distance_to_destiny, distance_from_destiny_to_next] = distances(l_cities, origin, destiny, tree)
                best = distance_to_destiny + distance_from_destiny_to_next * 1.1

                #calculates distance for all primes and their next
                for prime in near_primes:
                    [distance_to_prime, distance_from_prime_to_next] = distances(l_cities, origin, prime, tree)
                    cost = distance_to_prime + distance_from_prime_to_next

                    if cost < best:
                        best = cost
                        destiny = prime

        tree.delete(destiny,l_cities[destiny])
        res.append(destiny)
        origin = destiny
        counter += 1
    return res

res2 = greedy_with_cost(l_cities, prime_cities)

In [None]:
ordered_cities2 = res2 + [0]
print('Total distance with the sorted city path is '+ "{:,}".format(total_distance(coords, ordered_cities, prime_cities)))
#1,823,865

In [None]:
df_path2 = pd.DataFrame({'CityId':ordered_cities2}).merge(df_cities,how = 'left')
fig, ax = plt.subplots(figsize=(20,20))
ax.plot(df_path2['X'], df_path2['Y'])
ax.scatter(df_cities['X'][prime_cities], df_cities['Y'][prime_cities], color ='#ff000080', marker = '.')
ax.scatter(df_cities['X'][0], df_cities['Y'][0], c ='black', marker = 'o', s= 1000)

In [None]:
df_path = pd.DataFrame({'CityId':ordered_cities}).merge(df_cities,how = 'left')
fig, ax = plt.subplots(figsize=(20,20))
elems = 1000

ax.plot(df_path.iloc[:elems,]['X'], df_path.iloc[:elems,]['Y'],marker = 'o')
ax.scatter(df_cities['X'][0], df_cities['Y'][0], c ='black', marker = 'p', s= 900)
ax.plot(df_path.iloc[-elems*2: -elems,]['X'], df_path.iloc[-elems*2: -elems,]['Y'],marker = 'o',c ='violet')
ax.plot(df_path.iloc[-elems:,]['X'], df_path.iloc[-elems:,]['Y'],marker = 'o',c ='red')




In [None]:
#pd.DataFrame({'Path':sorted_cities}).to_csv('my_submission.csv',index  = False)
#!kaggle competitions submit -c traveling-santa-2018-prime-paths -f my_submission.csv -m "1st submission - greedy algo"