In [3]:
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN
import folium

# -----------------------------------------
# 1. Load data kecelakaan
# -----------------------------------------
df = pd.read_csv("accidents.csv")

# Kita pakai hanya kolom latitude & longitude
df = df.dropna(subset=["latitude", "longitude"])
coords = df[["latitude", "longitude"]].to_numpy()

print("Jumlah titik kecelakaan:", coords.shape[0])

# -----------------------------------------
# 2. DBSCAN dengan metric 'haversine'
#    → jarak di permukaan bumi, dalam km
# -----------------------------------------
# Konversi ke radian (syarat metric haversine sklearn)
coords_rad = np.radians(coords)

# Konversi eps km -> radian
kms_per_radian = 6371.0088  # jari-jari bumi rata-rata
eps_km = 1.0                # radius cluster ~1 km (bisa kamu ubah)
eps = eps_km / kms_per_radian

min_samples = 5             # minimal 5 kecelakaan untuk dianggap hotspot

db = DBSCAN(
    eps=eps,
    min_samples=min_samples,
    metric="haversine"
)
labels = db.fit_predict(coords_rad)

df["cluster"] = labels

n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
n_noise = np.sum(labels == -1)
print(f"Cluster ditemukan : {n_clusters}")
print(f"Titik noise       : {n_noise}")

# -----------------------------------------
# 3. Buat peta dasar (center = rata-rata koordinat)
# -----------------------------------------
center_lat = df["latitude"].mean()
center_lon = df["longitude"].mean()

m = folium.Map(location=[center_lat, center_lon], zoom_start=12)

# Warna untuk cluster (loop kalau klusternya banyak)
color_palette = [
    "red", "blue", "green", "purple", "orange",
    "darkred", "lightred", "beige", "darkblue", "darkgreen",
    "cadetblue", "darkpurple", "white", "pink", "lightblue",
    "lightgreen", "gray", "black", "lightgray"
]

# -----------------------------------------
# 4. Tambahkan titik kecelakaan ke peta
#    - cluster -1 (noise) → warna abu-abu
#    - cluster lain → warna berdasarkan ID
# -----------------------------------------
for _, row in df.iterrows():
    lat = row["latitude"]
    lon = row["longitude"]
    c  = int(row["cluster"])

    if c == -1:
        # noise
        color = "gray"
        popup_text = f"Noise (kecelakaan terisolasi)<br>ID: {row['accident_id']}"
    else:
        color = color_palette[c % len(color_palette)]
        popup_text = (
            f"Cluster: {c}<br>"
            f"ID: {row['accident_id']}<br>"
            f"Severity: {row['severity']}<br>"
            f"Tanggal: {row['date']} {row['time']}<br>"
            f"Road: {row['road_type']}<br>"
            f"Light: {row['light_conditions']}<br>"
            f"Weather: {row['weather_conditions']}"
        )

    folium.CircleMarker(
        location=[lat, lon],
        radius=5,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.8,
        popup=folium.Popup(popup_text, max_width=250)
    ).add_to(m)

# -----------------------------------------
# 5. Simpan peta ke file HTML
# -----------------------------------------
output_file = "accidents_clusters_map.html"
m.save(output_file)
print(f"Peta disimpan sebagai: {output_file}")
print("Silakan buka file ini di browser (double-click di file explorer).")


Jumlah titik kecelakaan: 65
Cluster ditemukan : 3
Titik noise       : 10
Peta disimpan sebagai: accidents_clusters_map.html
Silakan buka file ini di browser (double-click di file explorer).
