In [6]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt 

from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans, DBSCAN, OPTICS, AgglomerativeClustering
from esda.adbscan import ADBSCAN

# Reading in data

### 1. Violent crime data

In [13]:
allcrime = pd.read_csv("data/MPS Ward Level Crime (Historical).csv")
print(allcrime.shape)
allcrime.head()

(18598, 155)


Unnamed: 0,WardName,WardCode,MajorText,MinorText,LookUp_BoroughName,201004,201005,201006,201007,201008,...,202112,202201,202202,202203,202204,202205,202206,202207,202208,202209
0,Heathrow Villages,E05013570,ARSON AND CRIMINAL DAMAGE,CRIMINAL DAMAGE,Aviation Security (SO18),0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Heathrow Villages,E05013570,MISCELLANEOUS CRIMES AGAINST SOCIETY,MISC CRIMES AGAINST SOCIETY,Aviation Security (SO18),0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Heathrow Villages,E05013570,THEFT,OTHER THEFT,Aviation Security (SO18),0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Heathrow Villages,E05013570,THEFT,SHOPLIFTING,Aviation Security (SO18),0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Heathrow Villages,E05013570,VEHICLE OFFENCES,THEFT FROM A VEHICLE,Aviation Security (SO18),0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [46]:
# filtering dataset to crimes of "Violence against the person"
df = allcrime[allcrime.MajorText == "VIOLENCE AGAINST THE PERSON"]

# excluding crimes in "Aviation Security" borough
df = df[df.LookUp_BoroughName != "Aviation Security (SO18)"]

# seeing number of wards 
print(f"Number of wards included in this study: {len(df.WardCode.unique())}")


Number of wards included in this study: 679


### 2. London geometry

In [31]:
# note that this dataset contains all 7201 MSOAs in England. This dataset is used as it contains the high resolution boundary and neighbour topology of MSOAs 
url = 'https://github.com/jreades/fsds/raw/master/data/src/Middle_Layer_Super_Output_Areas__December_2011__EW_BGC_V2-shp.zip'
! wget -P data $url

--2025-04-10 14:49:36--  https://github.com/jreades/fsds/raw/master/data/src/Middle_Layer_Super_Output_Areas__December_2011__EW_BGC_V2-shp.zip
Resolving github.com (github.com)... 20.26.156.215, 64:ff9b::141a:9cd7
Connecting to github.com (github.com)|20.26.156.215|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/jreades/fsds/master/data/src/Middle_Layer_Super_Output_Areas__December_2011__EW_BGC_V2-shp.zip [following]
--2025-04-10 14:49:37--  https://raw.githubusercontent.com/jreades/fsds/master/data/src/Middle_Layer_Super_Output_Areas__December_2011__EW_BGC_V2-shp.zip
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7381177 (7.0M) [application/zip]
Saving to: ‘data/Middle_Layer_Super_Output_Area

In [47]:
gdf = gpd.read_file(f"zip://data/Middle_Layer_Super_Output_Areas__December_2011__EW_BGC_V2-shp.zip!Middle_Layer_Super_Output_Areas__December_2011__EW_BGC_V2.shp")
gdf.head()

Unnamed: 0,OBJECTID,MSOA11CD,MSOA11NM,MSOA11NMW,BNG_E,BNG_N,LONG,LAT,Shape__Are,Shape__Len,geometry
0,1,E02000001,City of London 001,City of London 001,532384,181355,-0.09349,51.51561,2906361.0,8936.818478,"POLYGON ((532135.138 182198.131, 532158.25 182..."
1,2,E02000002,Barking and Dagenham 001,Barking and Dagenham 001,548267,189685,0.138756,51.58652,2166163.0,8150.405928,"POLYGON ((548881.563 190845.265, 548881.125 19..."
2,3,E02000003,Barking and Dagenham 002,Barking and Dagenham 002,548259,188520,0.138149,51.57605,2143568.0,9118.196243,"POLYGON ((549102.438 189324.625, 548954.517 18..."
3,4,E02000004,Barking and Dagenham 003,Barking and Dagenham 003,551004,186412,0.176828,51.55638,2491467.0,8206.551627,"POLYGON ((551550.056 187364.705, 551478 187263..."
4,5,E02000005,Barking and Dagenham 004,Barking and Dagenham 004,548733,186824,0.144267,51.56069,1186053.0,6949.688798,"POLYGON ((549099.634 187656.076, 549161.375 18..."
