### KNN Model â€“ Geospatial Gun Violence Analysis

#### Importing Libraries

In [13]:
import pandas as pd
import geopandas as gpd
import numpy as np
import folium
from folium.plugins import HeatMap
from shapely.geometry import Point
from scipy.spatial import cKDTree
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier #https://www.geeksforgeeks.org/machine-learning/k-nearest-neighbours/
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt

#### Loading and Preparing the Dataset

In [14]:
data = pd.read_csv("GVA_w-injury_2015_2021apr_geocoded.csv")
data = data.rename(columns={"X": "longitude", "Y": "latitude"})
data['n_killed'] = data['n_killed'].fillna(0)
data['n_injured'] = data['n_injured'].fillna(0)

#### Creating GeoDataFrame and Setting Coordinate Reference System

In [15]:
data = data.drop(columns=['state', 'city_or_county', 'address', 'incident_url', 'source_url'])

gdf = gpd.GeoDataFrame(
    data,
    geometry=gpd.points_from_xy(data.longitude, data.latitude),
    crs="EPSG:4326"
)
gdf = gdf.to_crs(epsg=3857)

#### Calculating Distance to Nearest Point

In [16]:
coords = np.array(list(zip(gdf.geometry.x, gdf.geometry.y)))
tree = cKDTree(coords)
distances, indices = tree.query(coords, k=2)
gdf['dist_to_nearest'] = distances[:, 1]

buffer_radius = 2000

#### Calculating Local Neighborhood Statistics (2 km Radius)

In [17]:
gdf['count_2km'] = gdf.geometry.apply(
    lambda point: len(tree.query_ball_point([point.x, point.y], r=buffer_radius)) - 1
)

def mean_in_radius(point, radius=buffer_radius):
    neighbors_idx = tree.query_ball_point([point.x, point.y], r=radius)
    neighbors = gdf.iloc[neighbors_idx]
    return pd.Series({
        'mean_killed_2km': neighbors['n_killed'].mean(),
        'mean_injured_2km': neighbors['n_injured'].mean()
    })

gdf[['mean_killed_2km','mean_injured_2km']] = gdf.geometry.apply(lambda pt: mean_in_radius(pt))


#### Defining Features and Splitting Data for Risk Classification

In [18]:
gdf['risk_level'] = ((gdf['n_killed'] > 0) | (gdf['n_injured'] >= 3)).astype(int)
feature_cols = ['latitude', 'longitude', 'month', 'year', 'dist_to_nearest', 'count_2km']
X = gdf[feature_cols]
y = gdf['risk_level']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=53)

#### Training K-Nearest Neighbors Classifier

In [25]:
knn = KNeighborsClassifier(
    n_neighbors=15,
    weights='distance',
    n_jobs=-1
)
knn.fit(X_train, y_train)
gdf["pred_risk"] = knn.predict(gdf[feature_cols])

#### Evaulating accuracy

In [26]:
preds = knn.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, preds))

Validation Accuracy: 0.5870784663103016


##### The accuracy of KNN model on predictions are pretty high with 58.7%

#### Defining a Function to Predict Safety Risk at a Given Location

In [27]:
def predict_safety(lat, lon, month, year):
    point = Point(lon, lat)
    point_proj = gpd.GeoSeries([point], crs="EPSG:4326").to_crs(epsg=3857)
    x, y_ = point_proj.geometry.x.iloc[0], point_proj.geometry.y.iloc[0]

    dist, idx = tree.query([(x, y_)], k=2)
    dist_to_nearest = dist[0][1]

    count_2km = len(tree.query_ball_point((x, y_), r=buffer_radius)) - 1

    X_new = pd.DataFrame([{
        'latitude': lat,
        'longitude': lon,
        'month': month,
        'year': year,
        'dist_to_nearest': dist_to_nearest,
        'count_2km': count_2km
    }])
    pred = knn.predict(X_new)[0]
    return "HIGH RISK" if pred == 1 else "LOW RISK"

#### Visualizing Local Gun Violence Risk Around a Target Location

In [28]:
gdf_folium = gdf.to_crs(epsg=4326)
gdf_viz = gdf_folium.sample(frac=1/20, random_state=42)


m_knn = folium.Map(
    location=[39.5, -98.35],
    zoom_start=4,
    tiles="CartoDB positron"
)

risk_layer = folium.FeatureGroup(name="High Risk Locations")

for _, row in gdf_viz.iterrows():
    if row["pred_risk"] == 1:
        folium.CircleMarker(
            location=[row.geometry.y, row.geometry.x],
            radius=5,
            color="red",
            fill=True,
            fill_opacity=0.7
        ).add_to(risk_layer)

risk_layer.add_to(m_knn)

heat_data = [
    [row.geometry.y, row.geometry.x, 1]
    for _, row in gdf_viz.iterrows()
    if row["pred_risk"] == 1
]

HeatMap(
    heat_data,
    radius=15,
    blur=20,
    max_zoom=10,
    name="High Risk Density"
).add_to(m_knn)

folium.LayerControl(collapsed=False).add_to(m_knn)

m_knn