In [None]:
import pandas as pd
import plotly.express as px
from sklearn.cluster import DBSCAN

In [None]:
afternoon_df = pd.read_csv('/Users/antoinebertin/Documents/jedha/full_stack/projects_full_stack/uber/db2/afternoon_df.csv') 
early_evening_df = pd.read_csv('/Users/antoinebertin/Documents/jedha/full_stack/projects_full_stack/uber/db2/early_evening_df.csv')
late_evening_df = pd.read_csv('/Users/antoinebertin/Documents/jedha/full_stack/projects_full_stack/uber/db2/late_evening_df.csv')
morning_df = pd.read_csv('/Users/antoinebertin/Documents/jedha/full_stack/projects_full_stack/uber/db2/morning_df.csv')
night_df = pd.read_csv('/Users/antoinebertin/Documents/jedha/full_stack/projects_full_stack/uber/db2/night_df.csv')

In [None]:
afternoon_X = afternoon_df[['Lat', 'Lon']].astype('float32')
early_evening_X = early_evening_df[['Lat', 'Lon']].astype('float32')
late_evening_X = late_evening_df[['Lat', 'Lon']].astype('float32')
morning_X = morning_df[['Lat', 'Lon']].astype('float32')
night_X = night_df[['Lat', 'Lon']].astype('float32')

In [None]:
afternoon_X = afternoon_X.sample(frac=0.3, random_state=42)
early_evening_X = early_evening_X.sample(frac=0.3, random_state=42)
late_evening_X = late_evening_X.sample(frac=0.3, random_state=42)
morning_X = morning_X.sample(frac=0.3, random_state=42)
night_X = night_X.sample(frac=0.3, random_state=42)

# DEF

In [None]:
def process_and_plot_dbscan(sampled_df, day_name):
    custom_color_scale = [
    [0, 'rgb(30,144,255)'],
    [0.05, 'rgb(255,165,0)'],   # Orange for mid-density
    [0.1, 'rgb(255,69,0)'],    # Red-orange
    [1, 'rgb(220,20,60)']     # Red color for highest density
    ]
    # Perform DBSCAN
    dbscan = DBSCAN(eps=0.0008, min_samples=50, metric="manhattan", algorithm="kd_tree", n_jobs=-1).fit(sampled_df[['Lat', 'Lon']])
    sampled_df['Cluster_DBSCAN'] = dbscan.labels_
    
    # Count the number of points in each cluster
    cluster_counts = sampled_df['Cluster_DBSCAN'].value_counts().to_dict()
    # Map the counts back to the original dataframe
    sampled_df['Cluster_Count'] = sampled_df['Cluster_DBSCAN'].map(cluster_counts)
    
    # Filter out outliers
    df_no_outliers = sampled_df[sampled_df['Cluster_DBSCAN'] != -1]

    # Add a column with the day name for all rows
    df_no_outliers['Day_Name'] = day_name  # Note: This line modifies the DataFrame in-place, which might show a warning
    
    # Plotting
    fig = px.scatter_mapbox(df_no_outliers, lat="Lat", lon="Lon", color="Cluster_Count",
                            hover_data=['Cluster_DBSCAN'],
                            color_continuous_scale=custom_color_scale,
                            size_max=15, zoom=10)
    
    fig.update_layout(mapbox_style="open-street-map")
    fig.update_layout(
        mapbox_style="open-street-map",
        margin={"r":0, "t":0, "l":0, "b":0},
        title={
            'text': f"{day_name}",
            'y':0.9,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': {
                'size': 24,
                'color': 'black',
                'family': "Arial, sans-serif"
                }
            },
        )

    fig.show()

# plot

In [None]:
process_and_plot_dbscan(morning_X, "Morning")

In [None]:
process_and_plot_dbscan(afternoon_X, "Afternoon")

In [None]:
process_and_plot_dbscan(early_evening_X, "Early Evening")

In [None]:
process_and_plot_dbscan(late_evening_X, "Late Evening")

In [None]:
process_and_plot_dbscan(night_X, "Night") 