In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import silhouette_samples, silhouette_score
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import datetime
from importlib import reload

import a2a_clustering
import a2a_validation
import a2a_travellingsalesman
import a2a_kmeans_equalsize

a2a_clustering = reload(a2a_clustering)
a2a_validation = reload(a2a_validation)
a2a_kmeans_equalsize = reload(a2a_kmeans_equalsize)
a2a_travellingsalesman = reload(a2a_travellingsalesman)

n_clusters = 15
RANDOM_SEED = 0

start = datetime.datetime.now()
PATH = 'output/clustering/'
FILE_PREFIX = PATH + 'kmeans_equal_size_' + str(n_clusters) + '_'

df = pd.read_csv("output/data_preparation/first_visit.20190903.csv",
    parse_dates=['created_at'], date_parser=lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))

X = a2a_clustering.transform(df)

print("Clustering started")
clusterer = a2a_kmeans_equalsize.EqualGroupsKMeans(n_clusters=n_clusters).fit(X)

df = df.assign(**{
    'Cluster_labels': clusterer.labels_
})

seconds = (datetime.datetime.now() - start).seconds
print("Elapsed time for clustering: " + str(seconds) + " seconds")

centroid_csv = np.asarray(clusterer.cluster_centers_)
np.savetxt(FILE_PREFIX + "centroids.csv", 
    centroid_csv, 
    header="lat,lng", 
    delimiter=",", 
    comments='')

###################
# VALIDATION STEP #
###################

df = a2a_validation.silhouette(df, clusterer.cluster_centers_, FILE_PREFIX, "KMeans")
df.to_csv(FILE_PREFIX + "clusterized_dataset.csv")

###################
# TSP        STEP #
###################

tsp_solved = a2a_travellingsalesman.tsp(df, FILE_PREFIX)
tsp_solved.to_csv(FILE_PREFIX + 'tsp.csv')



Clustering started
Elapsed time for clustering: 5611 seconds
For n_clusters = 15 The average silhouette_score is : 0.20512462407249066


<Figure size 1800x700 with 2 Axes>

In [2]:
tsp_solved

Unnamed: 0,cluster,or_dist,meters,time,time_emptying,seconds,bins,waypoints
0,0,31673.697,31 Km 673.70 m.,1:16:23,2:49:23,4583.5,93.0,"[{""serial"": -1, ""coords"": [45.5069182, 9.26845..."
1,1,29465.499,29 Km 465.50 m.,1:07:40,2:40:40,4060.9,93.0,"[{""serial"": -1, ""coords"": [45.5069182, 9.26845..."
2,2,39766.799,39 Km 766.80 m.,1:34:58,3:07:58,5698.3,93.0,"[{""serial"": -1, ""coords"": [45.5069182, 9.26845..."
3,3,36230.199,36 Km 230.20 m.,1:13:48,2:46:48,4428.3,93.0,"[{""serial"": -1, ""coords"": [45.5069182, 9.26845..."
4,4,37530.098,37 Km 530.10 m.,1:28:24,3:01:24,5304.7,93.0,"[{""serial"": -1, ""coords"": [45.5069182, 9.26845..."
5,5,31015.5,31 Km 15.50 m.,1:06:08,2:35:08,3968.6,89.0,"[{""serial"": -1, ""coords"": [45.5069182, 9.26845..."
6,6,43678.6,43 Km 678.60 m.,1:42:38,3:15:38,6158.1,93.0,"[{""serial"": -1, ""coords"": [45.5069182, 9.26845..."
7,7,26573.798,26 Km 573.80 m.,1:02:30,2:35:30,3750.4,93.0,"[{""serial"": -1, ""coords"": [45.5069182, 9.26845..."
8,8,42523.0,42 Km 523.00 m.,1:35:20,3:08:20,5720.8,93.0,"[{""serial"": -1, ""coords"": [45.5069182, 9.26845..."
9,9,31077.497,31 Km 77.50 m.,1:11:09,2:44:09,4269.1,93.0,"[{""serial"": -1, ""coords"": [45.5069182, 9.26845..."
