In [1]:
%matplotlib notebook

import os
import sys
from operator import itemgetter
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from ipyleaflet import (Map, GeoJSON)

sns.set()

from sklearn.cluster import DBSCAN

import matplotlib.dates as mdates
from matplotlib.colors import rgb2hex
import json

def get_geojson(features):
    return {
        'type': 'FeatureCollection',
        'features': features
    }

def save_geojson(features, directory, file_name):
    if not os.path.exists(directory): os.makedirs(directory)
    f = os.path.join(directory, file_name + '.geojson')
    geojson = {
      'type': 'FeatureCollection',
      'features': features
    }
    with open(f, 'w') as outfile:
        json.dump(geojson, outfile, indent = 4)
    print('Saved to ' + f)

def to_geojson(df, groupby, lat, lng, cols, dumps=True):

    def get_features(row, color):
        properties = { k: str(v) for k,v in zip(cols,[row[col] for col in cols]) }
        properties['marker-color'] = rgb2hex(color[:3])
        return {
            'type': 'Feature',
                'geometry': {
                'type': 'Point',
                'coordinates': [row[lng], row[lat]]
            },
            'properties': properties
        }

    clusters = df.groupby(groupby)

    features = []
    colors = plt.cm.Spectral(np.linspace(0, 1, len(clusters)))
    for name, group in clusters:
        i = np.random.randint(colors.shape[0])
        color = colors[i]
        group.apply(lambda row: features.append(get_features(row, color)), axis=1)
        colors = np.delete(colors, i, 0)

    if dumps:
        return json.dumps(get_geojson(features))
    return get_geojson(features)

In [2]:
nb_dir = os.path.normpath(os.path.join(os.getcwd(), '..'))
os.listdir(nb_dir)
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

f = '../data/UTSEUS-MOBIKE-shanghai_full.csv'

In [28]:
mobike = pd.read_csv(f, sep=',')

In [42]:
'''
data = mobike[
    (mobike['end_location_x'] >= 121.4637279510498) & 
    (mobike['end_location_x'] <= 121.47806167602539) & 
    (mobike['end_location_y'] >= 31.216912136753745) & 
    (mobike['end_location_y'] <= 31.228435764362718)
].sample(n=400)#[:300]
'''
data = mobike.sample(n=400)
data.head()

Unnamed: 0,orderid,bikeid,userid,start_time,start_location_x,start_location_y,end_time,end_location_x,end_location_y,track
1002506,1769350,139758,15981,2016-08-26 19:02,121.482,31.281,2016-08-26 19:09,121.478,31.27,"121.478,31.270#121.478,31.271#121.479,31.271#1..."
898236,1579365,66603,16918,2016-08-26 13:19,121.494,31.301,2016-08-26 13:26,121.489,31.294,"121.488,31.295#121.488,31.296#121.488,31.297#1..."
563250,981406,89137,4373,2016-08-26 08:28,121.397,31.284,2016-08-26 08:42,121.39,31.268,"121.390,31.268#121.391,31.269#121.391,31.273#1..."
30179,52310,138505,3091,2016-08-16 06:03,121.52,31.32,2016-08-16 06:13,121.506,31.321,"121.506,31.321#121.507,31.320#121.508,31.320#1..."
479660,831163,146084,8153,2016-08-14 14:15,121.464,31.251,2016-08-14 14:23,121.459,31.255,"121.459,31.253#121.459,31.254#121.460,31.253#1..."


In [43]:
endings = data[['bikeid', 'end_location_x', 'end_location_y']].copy()
to_geojson(endings, 'bikeid', 'end_location_y', 'end_location_x', ['bikeid'])

'{"features": [{"properties": {"marker-color": "#fffebe", "bikeid": "308.0"}, "geometry": {"coordinates": [121.456, 31.195999999999998], "type": "Point"}, "type": "Feature"}, {"properties": {"marker-color": "#4b68ae", "bikeid": "3699.0"}, "geometry": {"coordinates": [121.396, 31.224], "type": "Point"}, "type": "Feature"}, {"properties": {"marker-color": "#3b92b9", "bikeid": "4176.0"}, "geometry": {"coordinates": [121.486, 31.198], "type": "Point"}, "type": "Feature"}, {"properties": {"marker-color": "#fee695", "bikeid": "4203.0"}, "geometry": {"coordinates": [121.507, 31.281999999999996], "type": "Point"}, "type": "Feature"}, {"properties": {"marker-color": "#af1446", "bikeid": "4680.0"}, "geometry": {"coordinates": [121.443, 31.198], "type": "Point"}, "type": "Feature"}, {"properties": {"marker-color": "#af1446", "bikeid": "4680.0"}, "geometry": {"coordinates": [121.443, 31.198], "type": "Point"}, "type": "Feature"}, {"properties": {"marker-color": "#fca85e", "bikeid": "6037.0"}, "geo

In [44]:
X = endings[['end_location_x', 'end_location_y']].values

In [47]:
kms_per_radian = 6371.0088

def compute_dbscan(meters):
    epsilon = (meters * 0.001) / kms_per_radian
    db = DBSCAN(eps=epsilon, algorithm='ball_tree', metric='haversine').fit(np.radians(X))
    return db.labels_

computings = map(compute_dbscan, range(50, 300))
n_computings = map(lambda x: (len(set(x)), x), computings)
labels = max(n_computings, key=itemgetter(0))[1]

for label in set(labels):
    df = endings[(labels == label)]
    endings.loc[df.index, 'cluster_num'] = int(label)

In [48]:
to_geojson(endings, 'cluster_num', 'end_location_y', 'end_location_x', ['cluster_num'])

'{"features": [{"properties": {"marker-color": "#fdbf6f", "cluster_num": "-1.0"}, "geometry": {"coordinates": [121.478, 31.27], "type": "Point"}, "type": "Feature"}, {"properties": {"marker-color": "#fdbf6f", "cluster_num": "-1.0"}, "geometry": {"coordinates": [121.48899999999999, 31.294], "type": "Point"}, "type": "Feature"}, {"properties": {"marker-color": "#fdbf6f", "cluster_num": "-1.0"}, "geometry": {"coordinates": [121.39, 31.268], "type": "Point"}, "type": "Feature"}, {"properties": {"marker-color": "#fdbf6f", "cluster_num": "-1.0"}, "geometry": {"coordinates": [121.506, 31.320999999999998], "type": "Point"}, "type": "Feature"}, {"properties": {"marker-color": "#fdbf6f", "cluster_num": "-1.0"}, "geometry": {"coordinates": [121.459, 31.255], "type": "Point"}, "type": "Feature"}, {"properties": {"marker-color": "#fdbf6f", "cluster_num": "-1.0"}, "geometry": {"coordinates": [121.421, 31.328000000000003], "type": "Point"}, "type": "Feature"}, {"properties": {"marker-color": "#fdbf6f