In [9]:
import numpy as np
import pandas as pd
import plotly.express as px 
import plotly.graph_objects as go
from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import silhouette_score

In [10]:
df = pd.read_csv('uber-raw-data-may14.csv')
df = df.sample(n=125000, random_state=0)

In [11]:
df["Date"] = df["Date/Time"].str.split(" ").str[0]
df['Time'] = df["Date/Time"].str.split(" ").str[1]
df['Date'] =  pd.to_datetime(df['Date'])
df["day_of_week"] = pd.DatetimeIndex(df['Date']).day_of_week
df["day"] = pd.DatetimeIndex(df['Date']).day

In [12]:
def split_df_days(df, day:int):
    X = df[(df["day_of_week"] == day)]
    X = X[["Lat", "Lon", "Base"]]
    return X

X_0 = split_df_days(df, 0)
X_1 = split_df_days(df, 1)
X_2 = split_df_days(df, 2)
X_3 = split_df_days(df, 3)
X_4 = split_df_days(df, 4)
X_5 = split_df_days(df, 5)
X_6 = split_df_days(df, 6)

In [13]:
def pipe_norm(X):
    numeric_features = []
    categorical_features = []
    for i,t in X.dtypes.items():
        if ('float' in str(t)) or ('int' in str(t)) :
            numeric_features.append(i)
        else :
            categorical_features.append(i)

    numeric_transformer = Pipeline(steps=[
        ('scaler', StandardScaler())
    ])

    categorical_transformer = Pipeline(
        steps=[
        ('encoder', OneHotEncoder(drop='first')) 
        ])
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ("cat", categorical_transformer, categorical_features)
        ])

    return preprocessor.fit_transform(X)

X_norm_0 = pipe_norm(X_0)
X_norm_1 = pipe_norm(X_1)
X_norm_2 = pipe_norm(X_2)
X_norm_3 = pipe_norm(X_3)
X_norm_4 = pipe_norm(X_4)
X_norm_5 = pipe_norm(X_5)
X_norm_6 = pipe_norm(X_6)

In [14]:
db = DBSCAN(eps=0.5, min_samples=300, metric="manhattan")

db.fit(X_norm_0)
X_0["cluster"] = db.labels_
db.fit(X_norm_1)
X_1["cluster"] = db.labels_
db.fit(X_norm_2)
X_2["cluster"] = db.labels_
db.fit(X_norm_3)
X_3["cluster"] = db.labels_
db.fit(X_norm_4)
X_4["cluster"] = db.labels_
db.fit(X_norm_5)
X_5["cluster"] = db.labels_
db.fit(X_norm_6)
X_6["cluster"] = db.labels_

In [15]:
X_0 = X_0[(X_0["cluster"] != -1)]
X_1 = X_1[(X_1["cluster"] != -1)]
X_2 = X_2[(X_2["cluster"] != -1)]
X_3 = X_3[(X_3["cluster"] != -1)]
X_4 = X_4[(X_4["cluster"] != -1)]
X_5 = X_5[(X_5["cluster"] != -1)]
X_6 = X_6[(X_6["cluster"] != -1)]

In [16]:
fig = go.Figure()

fig.add_trace(go.Scattermapbox(
                                
                                lat=X_0["Lat"],
                                lon=X_0["Lon"],
                                mode='markers',
                                marker=go.scattermapbox.Marker(  
                                size=8,
                                color=X_0["cluster"],
                                opacity=0.7
                                ),
))

fig.add_trace(go.Scattermapbox(
                                
                                lat=X_1["Lat"],
                                lon=X_1["Lon"],
                                mode='markers',
                                visible=False,
                                marker=go.scattermapbox.Marker(  
                                size=8,
                                color=X_1["cluster"],
                                opacity=0.7
                                ),
))
fig.add_trace(go.Scattermapbox(
                                
                                lat=X_2["Lat"],
                                lon=X_2["Lon"],
                                mode='markers',
                                visible=False,
                                marker=go.scattermapbox.Marker(     
                                size=8,
                                color=X_2["cluster"],
                                ),
))

fig.add_trace(go.Scattermapbox(
                                
                                lat=X_3["Lat"],
                                lon=X_3["Lon"],
                                mode='markers',
                                visible=False,
                                marker=go.scattermapbox.Marker(     
                                size=8,
                                color=X_3["cluster"],
                                ),
))

fig.add_trace(go.Scattermapbox(
                                
                                lat=X_4["Lat"],
                                lon=X_4["Lon"],
                                mode='markers',
                                visible=False,
                                marker=go.scattermapbox.Marker(     
                                size=8,
                                color=X_4["cluster"],
                                ),
))

fig.add_trace(go.Scattermapbox(
                                
                                lat=X_5["Lat"],
                                lon=X_5["Lon"],
                                mode='markers',
                                visible=False,
                                marker=go.scattermapbox.Marker(     
                                size=8,
                                color=X_5["cluster"],
                                ),
))

fig.add_trace(go.Scattermapbox(
                                
                                lat=X_6["Lat"],
                                lon=X_6["Lon"],
                                mode='markers',
                                visible=False,
                                marker=go.scattermapbox.Marker(     
                                size=8,
                                color=X_6["cluster"],
                                ),
))
fig.update_layout(
        title = go.layout.Title(text = "Exploring different variables", x = 0.5),
        showlegend = False)

fig.update_layout(
      updatemenus = [go.layout.Updatemenu(
        active = 0,
        buttons = [
            go.layout.updatemenu.Button( 
            label = "Lundi",
            method= "update",
            args= [{"visible" : [True, False, False, False, False, False, False]}]),
            go.layout.updatemenu.Button( 
            label = "Mardi",
            method= "update",
            args= [{"visible" : [False, True, False, False, False, False, False]}]),
            go.layout.updatemenu.Button( 
            label = "Mercredi",
            method= "update",
            args= [{"visible" : [False, False, True, False, False, False, False]}]),
              go.layout.updatemenu.Button( 
            label = "Jeudi",
            method= "update",
            args= [{"visible" : [False, False, False, True, False, False, False]}]),
              go.layout.updatemenu.Button( 
            label = "Vendredi",
            method= "update",
            args= [{"visible" : [False, False, False, False, True, False, False]}]),
              go.layout.updatemenu.Button( 
            label = "Samedi",
            method= "update",
            args= [{"visible" : [False, False, False, False, False, True, False]}]),
              go.layout.updatemenu.Button( 
            label = "Dimanche",
            method= "update",
            args= [{"visible" : [False, False, False, False, False, False, True]}])
        ]
      )], 
        mapbox_style="carto-positron",
        mapbox=dict(
        center=dict(
            lat=40.77,
            lon=-73.95
        ),
            zoom=8
        ),
)

fig.show()