# Ambulance Posting Project

## Modeling



In [72]:
# Imports
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.cluster import BisectingKMeans

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import folium

In [54]:
# Define columns
column_dtypes = {
    'incident_type_id': str, 'latitude_x': np.float64, 'longitude_x': np.float64, 
    'beat': str, 'priority': np.int64
}

dt_columns= ['create_time_incident', 'dispatch_time_primary_unit', 'arrival_time_primary_unit', 'closed_time_incident']

df = pd.read_csv('data/cleaned_incidents.csv', parse_dates=dt_columns, dtype=column_dtypes)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614803 entries, 0 to 614802
Data columns (total 10 columns):
 #   Column                      Non-Null Count   Dtype         
---  ------                      --------------   -----         
 0   create_time_incident        614803 non-null  datetime64[ns]
 1   dispatch_time_primary_unit  614803 non-null  datetime64[ns]
 2   arrival_time_primary_unit   584851 non-null  datetime64[ns]
 3   closed_time_incident        614803 non-null  datetime64[ns]
 4   incident_type_id            614803 non-null  object        
 5   latitude_x                  614803 non-null  float64       
 6   longitude_x                 614803 non-null  float64       
 7   beat                        614760 non-null  object        
 8   priority                    614803 non-null  int64         
 9   shift                       614803 non-null  int64         
dtypes: datetime64[ns](4), float64(2), int64(2), object(2)
memory usage: 46.9+ MB


In [131]:
# Alpha = Value of pri 2 calls.  Pri 1 calls are always 1
# rwrate affect the combined weight
# Lmb = exp scaling for time decay
#  Good values for lmb 0.05 to 0.5

results_df = df.copy()

# Normalize locations
scaler = StandardScaler()
results_df[['latitude_scaled','longitude_scaled']] = scaler.fit_transform(results_df[['latitude_x','longitude_x']])

def get_combined_weights(data, alpha=0.5, lmb=0.1, rwratio=0.2):
    # Normalize the incident times, use exp decay to create weight
    incident_time_scaler = MinMaxScaler(feature_range=(0, 10))
    incident_time_norm = 10 - incident_time_scaler.fit_transform(data[['create_time_incident']]) 
    recency_weights = np.exp(-lmb * incident_time_norm)
    
    # Create a priority weight
    priority_weights = np.where(data['priority'] == 1, 1, alpha)
    
    # Combine the weights
    combined_weights = (rwratio * recency_weights.flatten()) + ((1 - rwratio) * priority_weights)

    return combined_weights

num_clusters = 26

results_df['weight'] = get_combined_weights(results_df)

bisect_kmeans = BisectingKMeans(n_clusters=num_clusters, random_state=0).fit(results_df[['latitude_scaled','longitude_scaled']], sample_weight=results_df['weight'])
cluster_centers = bisect_kmeans.cluster_centers_

# Inverse transform cluster centers to lat & long for mapping
cluster_centers = scaler.inverse_transform(cluster_centers)

# Get predictions
results_df['cluster'] = bisect_kmeans.predict(results_df[['latitude_scaled','longitude_scaled']])

In [132]:
# Map clusters for one month of data
year_month = '2024-12'

mask = results_df['create_time_incident'].dt.to_period('M') == pd.Period(year_month, freq='M')
month_slice = results_df[mask]
month_slice = month_slice.sample(frac=0.5)

m = folium.Map(location=[results_df['latitude_x'].mean(), results_df['longitude_x'].mean()], zoom_start=12)
colors = cm.viridis(np.linspace(0, 1, num_clusters))
color_map = {i: f'rgba({int(r*255)}, {int(g*255)}, {int(b*255)}, 1.0)' for i, (r, g, b, _) in enumerate(colors)}

# Add points from the DataFrame to the map
for _, row in month_slice.iterrows():
    cluster_index = row['cluster']
    folium.CircleMarker(
        location=(row['latitude_x'], row['longitude_x']),
        radius=6,
        color=color_map[cluster_index],
        fill=True,
        fill_color=color_map[cluster_index],
        fill_opacity=0.6,
        popup=f'Time: {row['create_time_incident']}, Priority: {row['priority']}'
    ).add_to(m)

# Add cluster centers to the map
for index, center in enumerate(cluster_centers):
    folium.Marker(
        location=center,
        icon=folium.Icon(color='black', icon='info-sign'),
        popup=f'Center for Cluster {index}'
    ).add_to(m)
    
display(m)