In [None]:
import time
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

#from scipy.spatial import distance_matrix
#from concorde.tsp import TSPSolver

In [None]:
path = "/kaggle/input/traveling-santa-problem/"
df   = pd.read_csv(path+"santa_cities.csv") # index_col="id"
sub  = pd.read_csv(path+"random_paths_benchmark.csv")
df

In [None]:
plt.figure(figsize=(16,10))
plt.subplot(111, adjustable='box', aspect=1.0)
plt.plot(df.x, df.y, 'k,', alpha=0.3)
plt.plot(df.x[0], df.y[0], 'bx')
plt.xlabel('X', fontsize=16)
plt.ylabel('Y', fontsize=16)
plt.title('All cities (first in blue)', fontsize=18)
plt.show()

# Aux functions

In [None]:
def total_distance(df_path):
    dist = (np.sqrt((df_path.x - df_path.x.shift())**2 +
                    (df_path.y - df_path.y.shift())**2)).sum()
    return round(dist,2)

In [None]:
def plot_path(path, df, size=8, verbose=True):
    df_path = pd.merge(pd.DataFrame({'id':path}), df, on='id', how="left")
    
    plt.figure(figsize=(size,size))
    plt.subplot(111, adjustable='box', aspect=1.0)
    plt.plot(df_path.x, df_path.y, linewidth=1)
    plt.plot(df_path.x[0], df_path.y[0], 'ro') # r: red, o: circle
    
    if verbose:
        print(*path)
        for i in range(len(df)):
            plt.annotate(df.id[i], (df.x[i], df.y[i]))
            
    plt.title(str(len(set(path)))+' cities: '+str(total_distance(df_path)), fontsize=18)
    plt.show()

In [None]:
plot_path([0,2,5,1,8,3,4,9,6,7,0], df[:10], size=4)

# Greedy: Nearest Neighbour

In [None]:
def nearest_neighbour(df):
    ids = df.index.values[1:]
    xy = np.array([df.x.values, df.y.values]).T[1:]
    path = [0,]
    for _ in tqdm(range(len(df)-1)):
        last_x, last_y = df.x[path[-1]], df.y[path[-1]]
        dist = ((xy - np.array([last_x, last_y]))**2).sum(-1)
        nearest_index = dist.argmin()
        path.append(ids[nearest_index])
        ids = np.delete(ids, nearest_index, axis=0)
        xy  = np.delete(xy,  nearest_index, axis=0)
    path.append(0)
    return path

In [None]:
path = nearest_neighbour(df[:100])
plot_path(path, df[:100], size=8, verbose=True)

In [None]:
path = nearest_neighbour(df[:1000])
plot_path(path, df[:1000], size=8, verbose=False)

In [None]:
path = nearest_neighbour(df[:10000])
plot_path(path, df[:10000], size=8, verbose=False)

In [None]:
path = nearest_neighbour(df)
plot_path(path, df, size=8, verbose=False)

In [None]:
mySub = pd.DataFrame({'path1':path})
mySub.to_csv("sub.csv",index=False)