In [None]:
import json
from datetime import datetime
import numpy as np

import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
# load all events from store
all_events = list( json.loads(event) for event in open('eventstore.json', 'rt') )
# convert timestamp to datetime object
for event in all_events:
    event.update({'datetime': datetime.fromtimestamp(event['timestamp'] / 1000)})

In [None]:
destination_events = list(filter(lambda e: e['class'] == 'SetDestinationEvent', all_events))

destinations = destination_events

In [None]:
from sklearn.cluster import DBSCAN
from sklearn import metrics

from pygeocoder import Geocoder

def cluster_destinations(destinations):
    S = np.array(destinations)
    X = np.array([ [ d['latitude'], d['longitude'] ] for d in destinations ])
    db = DBSCAN(eps=0.0005, min_samples=3).fit(X)
    Y = db.labels_
    #print('Overall score {score:.5f}'.format(score=metrics.silhouette_score(X, Y)))
    for label in np.unique(Y):
        if label == -1: continue
        positions = X[Y == label]
        subdestinations = S[Y == label]
        assert(len(positions) == len(subdestinations))
        latitude = np.mean(positions[:,0])
        longitude = np.mean(positions[:,1])        
        address = str(Geocoder.reverse_geocode(latitude, longitude)[0])
        yield {'latitude':latitude, 'longitude':longitude, 'address':address, 'points':subdestinations.tolist()}

In [None]:
from sklearn.neighbors import KernelDensity

priors = []
kernels = []

Xs = []  # store for plotting

clusters = list(cluster_destinations(destination_events))

for cluster in clusters:
    X = np.array( list(map(lambda e: e['datetime'].hour * 60 + e['datetime'].minute, cluster['points'])) )[:, np.newaxis]
    kernel = KernelDensity(kernel='gaussian', bandwidth=15).fit(X)
    kernels.append(kernel)
    Xs.append(X)
    
for cluster in clusters:
    number_of_points = len(cluster['points'])
    priors.append(number_of_points)

In [None]:
fig, ax = plt.subplots(figsize=(20,10))
xmin = 0
xmax = 24 * 60 - 1

colors = 'bgrcmyk'
for i, cluster in enumerate(clusters):
    kernel = kernels[i]
    X_plot = np.linspace(xmin, xmax, xmax+1)[:, np.newaxis]
    log_density = kernel.score_samples(X_plot)
    density = np.exp(log_density) * priors[i]
    ax.plot(X_plot[:, 0], density, '-', label=cluster['address'], color=colors[i])
    ax.plot(Xs[i], -0.01 - 0.03 * np.random.random(Xs[i].shape[0]), '+' + colors[i])

ax.legend()#loc='upper left')
ax.set_xlim(xmin, xmax)
plt.show()

In [None]:
def suggest(hour, minute):
    time = hour * 60 + minute
    print(time)
    posteriors = list( (i, np.exp(kernels[i].score(time))) for i, cluster in enumerate(clusters) )
    posteriors.sort(key=lambda x: x[1], reverse=True)
    suggested_destinations = list( clusters[i] for i, _ in posteriors )

    for i, suggested_destination in enumerate(suggested_destinations):
        print('{0}: {1}'.format(i, suggested_destination['address']))

now = datetime.now()
suggest(now.hour, now.minute)
print()
        
suggest(9, 45)
print()

suggest(19, 15)
print()