In [11]:
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely import wkt
import json
import datetime
import pyproj
import os
os.chdir("D:/Projects/crime-prediction")

In [12]:
pre_processed_file_path = 'data/preprocessed/preprocessed-4.csv'
streetlights_data_file_path = 'data/preprocessed/streetlights_data.geojson'

In [13]:
def parse_preprocessed_data():
    df = pd.read_csv(
                         pre_processed_file_path,
                         parse_dates=['Offense Date'],                       
                         dtype={
                             'Precinct': 'category',
                             'Offense Category': 'category',
                             'Latitude': np.float32,
                             'Longitude': np.float32
                         }
                        )
    df["Id"] = df.index
    gdf = gpd.GeoDataFrame(df, geometry= df["geometry"].apply(wkt.loads), crs='EPSG:4326')
    return gdf

In [14]:
crime_gdf = parse_preprocessed_data()

In [15]:
utm_crs = crime_gdf.estimate_utm_crs()
crime_gdf_proj = crime_gdf.to_crs(utm_crs)

In [16]:
streetlights_gdf = gpd.read_file(streetlights_data_file_path)

In [17]:
streetlights_gdf_proj = streetlights_gdf.to_crs(utm_crs)

In [18]:
streetlights_within_100m = crime_gdf_proj.sjoin_nearest(streetlights_gdf_proj, how="left", max_distance=100, distance_col="distance_from_crime") \
                            .groupby("Id")["distance_from_crime"] \
                            .count() \
                            .rename('StreetLight Count') \
                            .to_frame() 


In [19]:
crime_gdf['StreetLight Count'] = streetlights_within_100m['StreetLight Count']

In [20]:
crime_gdf.to_csv('data/preprocessed/preprocessed-5.csv', index=False)