# Spatial DBSCAN Analysis - Modesto, California

This notebook demonstrates spatial clustering using DBSCAN on simulated incident data in Modesto, California.

**Running in secure Docker environment:** `nicholaskarlson/pymapgis-jupyter:secure`

In [1]:
# ==============================================================================
# CELL 1 ‚Äì Import required libraries (pre-installed in Docker environment)
# ==============================================================================
# PyMapGIS, GeoPandas, folium, and mapclassify are pre-installed in our secure
# Docker environment - no need to install packages!
# ==============================================================================

print("‚úÖ Using pre-installed libraries from secure Docker environment.")
print("üê≥ Running in nicholaskarlson/pymapgis-jupyter:secure")
print("üîí Security-hardened container with non-root user")

‚úÖ Using pre-installed libraries from secure Docker environment.
üê≥ Running in nicholaskarlson/pymapgis-jupyter:secure
üîí Security-hardened container with non-root user


In [2]:
# ==============================================================================
# CELL 2 ‚Äì Spatial DBSCAN on simulated Modesto incidents
# ==============================================================================
import pymapgis as pmg
from pymapgis.ml import SpatialDBSCAN
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd
import numpy as np
from IPython.display import display

print("‚úÖ Supporting libraries imported.")

# ------------------------------------------------------------------ #
# 1. Simulate incidents: two Modesto hotspots + regional noise      #
# ------------------------------------------------------------------ #
np.random.seed(42)

# Hotspot 1 ‚Äì Downtown Modesto (around 10th & I Street)
downtown_modesto = [
    Point(np.random.normal(-121.0018, 0.002),   # lon  (‚âà200 m spread)
          np.random.normal(37.6391, 0.002))     # lat
    for _ in range(50)
]

# Hotspot 2 ‚Äì Vintage Faire Mall area (North Modesto)
vintage_faire = [
    Point(np.random.normal(-121.0244, 0.002),   # lon
          np.random.normal(37.6764, 0.002))     # lat
    for _ in range(40)
]

# Background noise across Stanislaus County
noise = [
    Point(np.random.uniform(-121.3, -120.7),   # Stanislaus County longitudes
          np.random.uniform(37.4, 37.8))       # Stanislaus County latitudes
    for _ in range(40)
]

incidents_gdf = gpd.GeoDataFrame(
    geometry=downtown_modesto + vintage_faire + noise,
    crs="EPSG:4326"
)
incidents_gdf["report_id"] = range(len(incidents_gdf))

print(f"‚úÖ Generated {len(incidents_gdf)} simulated incidents in Modesto area.")

PyJWT not available - JWT token validation limited
cryptography not available - advanced encryption features disabled
bcrypt not available - using fallback password hashing
WebSockets not available - install websockets package
Kafka not available - install kafka-python package
MQTT not available - install paho-mqtt package
Redis not available - install redis package
pytest not available - some testing features limited
locust not available - load testing limited
memory_profiler not available - memory analysis limited
aiohttp not available - async HTTP testing limited
memory_profiler not available - detailed memory analysis limited


‚úÖ Supporting libraries imported.
‚úÖ Generated 130 simulated incidents in Modesto area.


In [3]:
# ------------------------------------------------------------------ #
# 2. (Optional but cleaner) Re-project to metres                     #
# ------------------------------------------------------------------ #
# California Zone 3 NAD83 / UTM 10N (appropriate for Central Valley)
incidents_m = incidents_gdf.to_crs(epsg=26910)

print("‚úÖ Reprojected to UTM Zone 10N for accurate distance calculations.")

‚úÖ Reprojected to UTM Zone 10N for accurate distance calculations.


In [4]:
# ------------------------------------------------------------------ #
# 3. Spatial DBSCAN (eps in metres now)                              #
# ------------------------------------------------------------------ #
print("\nüöÄ Running Spatial DBSCAN‚Ä¶")
X_dummy = pd.DataFrame(index=incidents_m.index)  # geometry-only model

db = SpatialDBSCAN(eps=250,  # 250 m neighbourhood radius
                   min_samples=5,
                   spatial_weight=1.0)
db.fit(X_dummy, geometry=incidents_m.geometry)

incidents_gdf["cluster_id"] = db.labels_  # copy labels back to WGS-84

print("   ‚úÖ DBSCAN complete.\n")
print("--- Cluster counts ---")
print(incidents_gdf["cluster_id"].value_counts())
print("----------------------")


üöÄ Running Spatial DBSCAN‚Ä¶
   ‚úÖ DBSCAN complete.

--- Cluster counts ---
cluster_id
 0    49
-1    41
 1    40
Name: count, dtype: int64
----------------------


In [5]:
# ------------------------------------------------------------------ #
# 4. Leaflet map centered on Modesto                                 #
# ------------------------------------------------------------------ #
print("\nüé® Building interactive map of Modesto‚Ä¶")
m = incidents_gdf.explore(
    column="cluster_id",
    cmap="viridis",
    categorical=True,
    tooltip=["report_id", "cluster_id"],
    style_kwds={"radius": 6},
    tiles="CartoDB positron"
)

# Center the map on Modesto
m.location = [37.6391, -121.0018]  # Downtown Modesto coordinates
m.zoom_start = 12

display(m)
print("\nüéâ Map ready! (Cluster ‚àí1 = noise)")
print("üìç Map centered on Downtown Modesto, California")


üé® Building interactive map of Modesto‚Ä¶



üéâ Map ready! (Cluster ‚àí1 = noise)
üìç Map centered on Downtown Modesto, California


In [None]:
# ------------------------------------------------------------------ #
# 5. Analysis Summary                                                 #
# ------------------------------------------------------------------ #
print("\nüìä SPATIAL DBSCAN ANALYSIS SUMMARY")
print("=" * 40)
print(f"Location: Modesto, California")
print(f"Total incidents: {len(incidents_gdf)}")
print(f"Clusters found: {len(incidents_gdf[incidents_gdf['cluster_id'] >= 0]['cluster_id'].unique())}")
print(f"Noise points: {len(incidents_gdf[incidents_gdf['cluster_id'] == -1])}")
print(f"DBSCAN parameters: eps=250m, min_samples=5")
print(f"Coordinate system: WGS84 (EPSG:4326)")
print(f"Analysis projection: UTM Zone 10N (EPSG:26910)")

# Show cluster statistics
cluster_stats = incidents_gdf[incidents_gdf['cluster_id'] >= 0].groupby('cluster_id').size()
if len(cluster_stats) > 0:
    print("\nüéØ Cluster sizes:")
    for cluster_id, size in cluster_stats.items():
        print(f"   Cluster {cluster_id}: {size} incidents")
        
print("\n‚úÖ Analysis complete! Check the map above for spatial patterns.")