In [2]:

import pandas as pd
import folium
from folium.plugins import HeatMap
import numpy as np


filename = 'data.csv'
data = pd.read_csv(filename)

data = data[['latitude', 'longitude', 'crash_sev_id', 'Crash timestamp (US/Central)']]
data = data.dropna()

from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

scaler = StandardScaler()
coords_scaled = scaler.fit_transform(data[['latitude', 'longitude']].values)


def train_cluster_model(data):
    model = DBSCAN(eps=0.005, min_samples=10).fit(coords_scaled)  # Reduced eps for tighter clusters
    data['Cluster'] = model.labels_
    return data, model

clustered_data, model = train_cluster_model(data)

austin_map = folium.Map(location=[30.2672, -97.7431], zoom_start=12)


aggregated_crashes = data.groupby(['latitude', 'longitude']).agg({'crash_sev_id': 'sum'}).reset_index()

# Normalize crash severity for dot sizes
max_severity = aggregated_crashes['crash_sev_id'].max()
aggregated_crashes['size'] = aggregated_crashes['crash_sev_id'].apply(lambda x: (x / max_severity) * 15 + 3)  # Scale dot size

# Add crash points to the map with varying dot sizes
for _, row in aggregated_crashes.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=row['size'],
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.6
    ).add_to(austin_map)


austin_map.save("austin_all_crashes_refined.html")

heat_data = [[row['latitude'], row['longitude'], row['crash_sev_id']] for _, row in aggregated_crashes.iterrows()]
heat_map = folium.Map(location=[30.2672, -97.7431], zoom_start=12)
HeatMap(heat_data).add_to(heat_map)
heat_map.save("austin_crash_heatmap_refined.html")

clustered_data.to_csv('processed_crash_data.csv', index=False)

ModuleNotFoundError: No module named 'pandas'