# Traveling Santa Problem

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

## Loading Data

In [None]:
cities_df = pd.read_csv("../input/cities.csv")

In [None]:
print(cities_df.head())

In [None]:
# define the North Pole
north_pole = cities_df[cities_df['CityId'] ==0]

## Getting Prime Cities

In [None]:
# define Prime Cities
def is_prime(n):
    if n < 2:
        return False
    if n ==2:
        return True
    if n % 2 == 0:
        return False
    if n > 2:
        for i in range(3, int(np.sqrt(n) + 1), 2):
            if n % i == 0:
                return False
        return True

In [None]:
# create a column in dataframe with the Prime Cities
cities_df['Prime Cities'] = cities_df.CityId.apply(is_prime)
# create a new dataframe with only Prime Cities
Prime_Cities_df = cities_df[cities_df['Prime Cities'] == True]
penalization = 1.1 * (1 - cities_df['Prime Cities']) +1

In [None]:
# plot cities dataframe with Prime Cities and North Pole
plt.style.use('seaborn-dark')
plt.figure(figsize = (10,6))
plt.scatter(cities_df.X, cities_df.Y, s = 0.1, c = 'Black')
plt.scatter(Prime_Cities_df.X, Prime_Cities_df.Y, marker = '*', 
            c = 'Yellow', s = 0.0009)
plt.scatter(north_pole.X, north_pole.Y, marker = '*', c = 'blue', s = 400)
plt.grid(False)
plt.show()

Almost a tenth of the cities is prime, which is a good because we want to visit prime a prime city every 10 cities.

There are prime cities approximately all around the map. Which is a good thing as well.

## Now to the Algorithm

The first function defines the distance of all the cities to the current city.

Using the penalization, we have to multiply the distance of non-prime cities by $1.1$

In [None]:
# function for compute the cities distancez
def dist_cities(coords, i, penalize = False):
    begin = cities_df[cities_df.CityId == i]
    X = int(begin.X)
    Y = int(begin.Y)
    diffX = (coords['X'] - X)**2
    diffY = (coords['Y'] - Y)**2
    diff = diffX + diffY
    if penalize:
       return pd.DataFrame(np.sqrt(diff) * penalization[i])
    else:
       return pd.DataFrame(np.sqrt(diff))

This one gets the closest city available

In [None]:
# function to determine the next city
def get_next_city(dist, left_cities):
    left_cities['Distances'] = dist
    dist_min = left_cities['Distances'].min()
    row_city = left_cities.loc[left_cities['Distances'] == dist_min]
    return row_city.CityId

In [None]:
# Initialization
north_pole = cities_df[cities_df['CityId'] ==0]
current_city = 0
coordinates = cities_df
coordinates = coordinates.drop(0, axis = 0)
left_cities = cities_df[1 :]
path = north_pole #our path start from the North Pole
step_numb = 1 # we start at 1 the first step

In [None]:
while len(left_cities) > 0:
    favorize_prime = step_numb % 10 == 9
    distances = dist_cities(coordinates, current_city, 
                            penalize = favorize_prime)
    current_city = get_next_city(distances, left_cities)
    left_cities = left_cities.drop(current_city, axis = 0)
    left_cities = left_cities.drop('Distances', 1)
    coordinates = coordinates.drop(current_city, axis = 0)
    current_city = int(current_city)
    path = path.append(cities_df[cities_df['CityId'] == current_city])
    step_numb += 1
path = path.append(cities_df[cities_df['CityId'] == 0])

#### End

 Plotting path

In [None]:
plt.figure(figsize = (13,8))
plt.plot(path.X, path.Y, linewidth = 1, c = 'black')
plt.scatter(path.X, path.Y, s = 0.1)
plt.scatter(north_pole.X, north_pole.Y, marker = '*', c = 'blue', s = 400)
plt.grid(False)
plt.show()
