In [127]:
import json
import numpy as np
import matplotlib.pyplot as plt
import plotly.plotly as py
import pandas as pd
from sklearn.cluster import DBSCAN
%matplotlib inline

py.sign_in('azai91', 'LreJY5jHtBf0lSX5wUez')

In [128]:
# Load data into np 
json_data = open('./../trace/data/sample_locations.json').read()
data = json.loads(json_data)
X = np.array([(c.get('lat'), c.get('lng')) for c in data[500:]])

In [129]:
db = DBSCAN(eps=0.3, min_samples=5).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_

In [130]:
df = pd.DataFrame(columns=['lat','lng'], data=X)

In [131]:
def get_bounds(coords, buffer):
    min_lat, max_lat = min(coords[:,0]), max(coords[:,0])
    min_lng, max_lng = min(coords[:,1]), max(coords[:,1])
    lat_buffer = (max_lat - min_lat) * buffer
    lng_buffer = (max_lng - min_lng) * buffer
    return ([min_lat - lat_buffer, max_lat + lat_buffer], [min_lng - lng_buffer, max_lng + lng_buffer])

def get_clusters(X, labels, core_samples_mask):
    clusters = []
    unique_labels = set(labels)
    for label in unique_labels:
        # skip noise
        if label == -1:
            continue
        class_member_mask = (labels == label)
        xy = X[class_member_mask & core_samples_mask]
        if len(xy):
            clusters.append(xy.mean(axis=0))
    return pd.DataFrame(data=clusters,columns=['lat','lng'])

In [132]:
clusters = get_clusters(X, labels, core_samples_mask)

In [134]:
data = []
data.append(dict(
            lat = df['lat'],
            lon = df['lng'],
            type = 'scattergeo',
            mode = 'lines',
    ))
data.append(dict(
        lat = clusters['lat'],
        lon = clusters['lng'],
        type = 'scattergeo',
        mode = 'marker',
        geo = 'geo2'
    ))


lat_bounds, lng_bounds = get_bounds(X, 0.1)
layout = dict(
        title = 'Asia Trip',
        showlegend = False, 
        autosize=False,
        geo = dict(
            showland = True,
            landcolor = 'rgb(243, 243, 243)',
            countrycolor = 'rgb(204, 204, 204)',
            lonaxis = dict(
                range = lng_bounds,
            ),
            lataxis = dict(
                range = lat_bounds,
            ),
            domain = dict(
                x = [0,0.5],
                y = [0,1],
            )
        ),
        geo2 = dict(
            showland = True,
            landcolor = 'rgb(243, 243, 243)',
            countrycolor = 'rgb(204, 204, 204)',
            lonaxis = dict(
                range = lng_bounds,
            ),
            lataxis = dict(
                range = lat_bounds,
            ),
            domain = dict(
                x = [0.5,1],
                y = [0,1],
            )
        )
    )
fig = { 'data':data, 'layout': layout }
py.iplot(fig, validate=False, filename='test')