In [None]:
import pandas as pd
import folium
from folium.plugins import HeatMap
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from shapely.geometry import MultiPoint, Polygon
import json
import os

# Define output directory
output_dir = 'output_files'
os.makedirs(output_dir, exist_ok=True)

# Load and preprocess the crash data
filename = 'data.csv'
data = pd.read_csv(filename, low_memory=False)

data = data[['latitude', 'longitude', 'crash_sev_id', 'Crash timestamp (US/Central)']].dropna()

scaler = StandardScaler()
coords_scaled = scaler.fit_transform(data[['latitude', 'longitude']].values)

# Train clustering model to find high-crash zones
def train_cluster_model(data):
    model = DBSCAN(eps=0.002, min_samples=20).fit(coords_scaled)
    data['Cluster'] = model.labels_
    return data, model

clustered_data, model = train_cluster_model(data)

clusters = clustered_data[clustered_data['Cluster'] != -1].groupby('Cluster')
polygons = []

for cluster_id, points in clusters:
    cluster_points = points[['latitude', 'longitude']].drop_duplicates().values
    if len(cluster_points) >= 4:  # Ensure valid polygon with at least 4 points
        polygon = Polygon(MultiPoint(cluster_points).convex_hull)
        polygons.append({
            "type": "Feature",
            "geometry": polygon.__geo_interface__,
            "properties": {"cluster_id": int(cluster_id)}
        })

geojson_data = {
    "type": "FeatureCollection",
    "features": polygons
}

# Save high-crash zones as GeoJSON
geojson_path = os.path.join(output_dir, 'high_crash_zones.geojson')
with open(geojson_path, 'w') as f:
    json.dump(geojson_data, f, indent=2)

# Visualize crash data on a map
austin_map = folium.Map(location=[30.2672, -97.7431], zoom_start=12)
aggregated_crashes = data.groupby(['latitude', 'longitude']).agg({'crash_sev_id': 'sum'}).reset_index()
max_severity = aggregated_crashes['crash_sev_id'].max()
aggregated_crashes['size'] = aggregated_crashes['crash_sev_id'].apply(lambda x: (x / max_severity) * 20 + 5)

for _, row in aggregated_crashes.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=row['size'],
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.6
    ).add_to(austin_map)

html_path = os.path.join(output_dir, "austin_all_crashes_refined.html")
austin_map.save(html_path)

# Save processed crash data as CSV
csv_path = os.path.join(output_dir, 'processed_crash_data.csv')
clustered_data.to_csv(csv_path, index=False)

print(f"Files saved in '{output_dir}':")
print(f"- {csv_path}")
print(f"- {geojson_path}")
print(f"- {html_path}")
