# Spatial DBSCAN Analysis with PyMapGIS

This notebook demonstrates spatial clustering using PyMapGIS's SpatialDBSCAN implementation on simulated incident data for Little Rock, Arkansas.

In [None]:
# ==============================================================================
# CELL 1 – Install required libraries
# ==============================================================================
# PyMapGIS pulls in GeoPandas + scikit-learn; folium/mapclassify handle mapping.
# ==============================================================================
!pip install geopandas pymapgis folium mapclassify -q

print("✅ Required libraries have been installed.")
print("\n🔴 IMPORTANT: PLEASE RESTART THE SESSION NOW! 🔴")
print("Runtime ➜ Restart session  (Ctrl+M .)")

In [None]:
# ==============================================================================
# CELL 2 – Spatial DBSCAN on simulated Little Rock incidents
# ==============================================================================
import pymapgis as pmg
from pymapgis.ml import SpatialDBSCAN
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd
import numpy as np
from IPython.display import display

print("✅ Supporting libraries imported.")

In [None]:
# ------------------------------------------------------------------ #
# 1. Simulate incidents: two Little Rock hotspots + statewide noise  #
# ------------------------------------------------------------------ #
np.random.seed(42)

# Hotspot 1 – Downtown (River Market area)
downtown = [
    Point(np.random.normal(-92.2896, 0.002),   # lon  (≈200 m spread)
          np.random.normal(34.7465, 0.002))    # lat
    for _ in range(50)
]

# Hotspot 2 – West Little Rock (around Chenal Pkwy)
west_lr = [
    Point(np.random.normal(-92.4300, 0.002),
          np.random.normal(34.7500, 0.002))
    for _ in range(40)
]

# Background noise across Arkansas bounding box
noise = [
    Point(np.random.uniform(-94.62, -89.64),   # AR longitudes
          np.random.uniform(33.0, 36.5))       # AR latitudes
    for _ in range(40)
]

incidents_gdf = gpd.GeoDataFrame(
    geometry=downtown + west_lr + noise,
    crs="EPSG:4326"
)
incidents_gdf["report_id"] = range(len(incidents_gdf))

print(f"✅ Generated {len(incidents_gdf)} simulated incidents.")

In [None]:
# ------------------------------------------------------------------ #
# 2. (Optional but cleaner) Re-project to metres                     #
# ------------------------------------------------------------------ #
# Arkansas South NAD83 / UTM 15N
incidents_m = incidents_gdf.to_crs(epsg=26952)

print("✅ Data reprojected to UTM coordinates (metres).")

In [None]:
# ------------------------------------------------------------------ #
# 3. Spatial DBSCAN (eps in metres now)                              #
# ------------------------------------------------------------------ #
print("\n🚀 Running Spatial DBSCAN…")
X_dummy = pd.DataFrame(index=incidents_m.index)  # geometry-only model

db = SpatialDBSCAN(eps=250,  # 250 m neighbourhood radius
                   min_samples=5,
                   spatial_weight=1.0)
db.fit(X_dummy, geometry=incidents_m.geometry)

incidents_gdf["cluster_id"] = db.labels_  # copy labels back to WGS-84

print("   ✅ DBSCAN complete.\n")
print("--- Cluster counts ---")
print(incidents_gdf["cluster_id"].value_counts())
print("----------------------")

In [None]:
# ------------------------------------------------------------------ #
# 4. Leaflet map                                                     #
# ------------------------------------------------------------------ #
print("\n🎨 Building interactive map…")
m = incidents_gdf.explore(
    column="cluster_id",
    cmap="viridis",
    categorical=True,
    tooltip=["report_id", "cluster_id"],
    style_kwds={"radius": 6},
    tiles="CartoDB positron"
)

display(m)
print("\n🎉 Map ready!  (Cluster −1 = noise)")