In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from os.path import join
import os
import xarray as xr

import crime_patterns.utilities as utils
import crime_patterns.config as config
import crime_patterns.data_management as dm

from crime_patterns.final import plotting
from crime_patterns.analysis import point_patterns 

%load_ext autoreload
%autoreload 2

In [2]:
src = config.SRC
bld = config.BLD
data_raw = src / "data"
data_clean = bld / "python" / "data"
results_dir = bld / "python" / "results" 
plots_dir = bld / "python" / "figures"

if not os.path.isdir(results_dir):
    os.makedirs(results_dir)

if not os.path.isdir(plots_dir):
    os.makedirs(plots_dir)

In [3]:
## define paths
shapefiles_dir = os.path.join(data_raw, "statistical-gis-boundaries-london", "statistical-gis-boundaries-london", "ESRI")
london_ward_shp_path = os.path.join(shapefiles_dir, "London_Ward.shp")
london_borough_shp_path = os.path.join(shapefiles_dir, "London_Borough_Excluding_MHW.shp")
london_lsoa_shp_path = os.path.join(shapefiles_dir, "LSOA_2011_London_gen_MHW.shp")
london_greater_area_shp_path = os.path.join(data_clean,  "Greater_London_Area.shp")

crime_incidences_path = os.path.join(data_clean, r"city-of-london-burglaries-2019-cleaned.csv") 

In [4]:
london_lsoa = gpd.read_file(london_lsoa_shp_path)
london_ward = gpd.read_file(london_ward_shp_path)
london_borough = gpd.read_file(london_borough_shp_path)
london_greater_area = gpd.read_file(london_greater_area_shp_path)

In [5]:
## Load data
crime_incidences = pd.read_csv(crime_incidences_path)

#### Analysis

In [16]:
densities = point_patterns.evaluate_hotspots(longitudes=crime_incidences["Longitude"], latitudes=crime_incidences["Latitude"], region=london_greater_area)

In [17]:
densities.to_netcdf(os.path.join(results_dir, "kernel_density_estimates.nc"), mode='w', format="NETCDF4", engine="netcdf4")

In [20]:
with xr.open_dataset(os.path.join(results_dir, "kernel_density_estimates.nc")) as densities:
    
    densities.load()

In [27]:
X_coords, Y_coords, densities = densities["lon"].to_numpy(), densities["lat"].to_numpy(), densities["densities"].to_numpy()

In [30]:
densities

array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])

In [None]:
dbscan = point_patterns.cluster_crime_incidents_dbscan(
                        latitudes=crime_incidences["Latitude"],
                        longitudes=crime_incidences["Longitude"],
                        epsilon=1.5, # km
                        min_samples=330,
                        )

In [None]:
labels = dbscan.labels_

In [None]:
utils.save_object_to_pickle(dbscan, os.path.join(results_dir, "dbscan.pickle"))

In [None]:
dbscan = utils.load_object_from_pickle(os.path.join(results_dir, "dbscan.pickle"))

#### Plotting

In [None]:
# Setup figure and axis
height = 8
width = height*0.75

In [None]:
fig, ax = plotting.plot_crime_incidents(crime_incidences["Longitude"], crime_incidences["Latitude"], london_borough, figsize=(height, width))

plt.suptitle("Burglary Incidences 2019")
fig.savefig(os.path.join(plots_dir, 'burglary_incidents.png'), dpi=300, bbox_inches='tight')

In [None]:
fig, ax, cbar = plotting.plot_hotspots(X_coords, Y_coords, densities, london_borough, figsize=(height, width))

cbar.ax.get_yaxis().labelpad = 15
cbar.ax.set_ylabel("Density (KDE)", rotation=270)

plt.suptitle("Burglary Hotspots")
plt.savefig(os.path.join(plots_dir, 'burglary_hotspots.png'), dpi=300, bbox_inches='tight')

In [None]:

fig, ax = plotting.plot_dbscan_clusters(crime_incidences, labels, london_borough, figsize=(height, width))

ax.legend(bbox_to_anchor = (0, 0.5))
plt.suptitle("Clustered Burglary Incidences (DBCAN)")

fig.savefig(os.path.join(plots_dir, 'burglary_clusters.png'), dpi=300, bbox_inches='tight')