In [None]:
import pydeck as pdk
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("traffy_flood.csv")

In [None]:
df

In [None]:
# Define a layer to display on a map
layer = pdk.Layer(
    "ScatterplotLayer",
    df,
    get_position=["longitude", "latitude"],
    get_radius=200,
    get_fill_color=[255, 140, 0],
    opacity=0.6,
    pickable=True
)

# Set the viewport location
view_state = pdk.data_utils.compute_view(df[["longitude", "latitude"]])
view_state.zoom = 10

# Render
deck = pdk.Deck(layers=[layer], initial_view_state=view_state, 
                tooltip={"text": "{subdistrict} {district}\n{timestamp}\n{comment}"})
deck.to_html("pydeck_traffy.html")

In [None]:
# Define the heatmap layer
heatmap_layer = pdk.Layer(
    "HeatmapLayer",
    df,
    get_position="[longitude, latitude]",
    opacity=0.5,
    pickable=True
)

view_state = pdk.ViewState(
    latitude=df['latitude'].mean(),
    longitude=df['longitude'].mean(),
    zoom=10
)
pdk.Deck(layers=[heatmap_layer], initial_view_state=view_state)

In [None]:

pdk.Deck(layers=[layer, heatmap_layer], initial_view_state=view_state)


In [None]:
# DBSCAN clustering
from sklearn.cluster import DBSCAN

coords = df[['latitude', 'longitude']]
db = DBSCAN(eps=0.005, min_samples=10).fit(coords)
df['cluster'] = db.labels_

# Filter out noise points
df = df[df['cluster'] != -1].copy()

# Count the number of points in each cluster and identify the largest clusters
clusters_count = df['cluster'].value_counts()

# Exclude the '-1' cluster, which represents noise
clusters_count = clusters_count[clusters_count.index != -1]

unique_clusters = df['cluster'].unique()
num_clusters = len(unique_clusters)

# Use a continuous colormap to generate colors, ensure we have enough colors for all clusters.
colormap = plt.get_cmap('hsv')
cluster_colors = {cluster: [int(x*255) for x in colormap(i/num_clusters)[:3]]
                      for i, cluster in enumerate(unique_clusters)}
    
# Map cluster ID to color for each row in the dataframe
df['color'] = df['cluster'].map(cluster_colors)

# Define the scatter plot layer
scatter_layer = pdk.Layer(
    "ScatterplotLayer",
    df,
    get_position="[longitude, latitude]",
    get_color='color',
    get_radius=200,
    opacity=0.5,
    pickable=True
)

view_state = pdk.ViewState(
    latitude=df['latitude'].mean(),
    longitude=df['longitude'].mean(),
    zoom=10
)
pdk.Deck(layers=[scatter_layer], initial_view_state=view_state, tooltip={"text": "{cluster}\n{subdistrict} {district}\n{timestamp}"})


In [None]:
import matplotlib.pyplot as plt

# Plotting the data
clusters_count.plot(kind='bar', color='blue')  # You can customize the color

plt.xticks(fontsize=8)

# Optional: adjust figure size if labels still overlap
plt.gcf().set_size_inches(12, 6)  # Adjust the size as needed

plt.xlabel('Cluster')  # Set x-axis label, if needed
plt.ylabel('Count')    # Set y-axis label
plt.title('Size of Clusters')  # Set title
plt.show()

