In [None]:
import pandas as pd
from sklearn.cluster import DBSCAN
from pydeck.data_utils import assign_random_colors

FOIL_DATA = 'https://raw.githubusercontent.com/fivethirtyeight/uber-tlc-foil-response/master/uber-trip-data/uber-raw-data-jul14.csv'

df = pd.read_csv(FOIL_DATA)
df.head()

In [None]:
df['ts'] = df['Date/Time'].apply(lambda d: pd.Timestamp.strptime(d, '%m/%d/%Y %H:%M:%S'))

In [None]:
df['position'] = df.apply(lambda row: [row['Lon'], row['Lat']], axis=1)

In [None]:
import pydeck

ORANGE_RGB = [255, 140, 0, 50]

# Gives us a scatterplot with all the specified attributes
scatterplot = pydeck.Layer(
    'ScatterplotLayer',
    data=df,
    radius=5,
    get_fill_color=ORANGE_RGB,
    get_position='position')

In [None]:
# Fits a viewport to the center 50% of the data
viewport = pydeck.data_utils.autocompute_viewport(df['position'], view_proportion=0.5)

In [None]:
# Actually configures the plot
r = pydeck.Deck(scatterplot, initial_view_state=viewport)

In [None]:
r.show()

In [None]:
# Runs a DBSCAN clustering algorithm on the geospatial data
db = DBSCAN(eps=0.00001, min_samples=60).fit(list(df.position))
df['labels'] = db.labels_
centroids = df[df['labels'] != -1].groupby('labels').mean().reset_index()
# Gives each point a random color
colors_lookup = assign_random_colors(df['labels'])
colors_lookup['-1'] = [0, 0, 0, 0]  # make -1 (no group) transparent
df['color'] = df['labels'].apply(lambda g: colors_lookup[str(g)])

# Plot the new data above
centroids_plot = pydeck.Layer(
    'ScatterplotLayer',
    data=df,
    radius=30,
    stroked=False,
    filled=True,
    get_fill_color='color',
    get_position='position')
r.layers[0] = centroids_plot
r.update()