In [1]:
import os
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import pandas as pd
import altair
import seaborn
import folium
from folium import plugins
from folium.plugins import HeatMap
RANDOM_SEED = 511
rng = np.random.default_rng(RANDOM_SEED)
seaborn.set_theme(style="darkgrid")
altair.renderers.enable('mimetype')
db_url = os.getenv('DB_URL')
%load_ext autoreload
%autoreload 2
import geopandas
import geoplot

# NYPD Complaints

1. Do we see a different city-wide trend than arrests?
2. Do we see a different localized trend in Harlem?
3. What if we limit to "well-reported major crimes"—from econ, serious crimes are reported more often than others, and more often than arrests are carried out.
4. What about "public nuisance" minor crimes? In the street?

In [7]:
sql = """
SELECT
    cb.bctcb2020,
    cb.cdta_name,
    cd.borough_district_code,
    cb.boundary,
    coalesce(ct_complaints, 0) as ct_complaints,
    cd.district_name
FROM census_blocks as cb
    JOIN community_districts as cd USING (borough_district_code)
    LEFT JOIN (
        SELECT bctcb2020, count(*) AS ct_complaints
        FROM complaints
        WHERE complaint_date >= '2021-01-01'::date
            AND distance_to_precinct_meters > 125
        GROUP BY 1
    ) AS a USING (bctcb2020)
"""

df = geopandas.GeoDataFrame.from_postgis(sql, db_url, geom_col='boundary', index_col='bctcb2020')
print(df.shape)
df.head(3).T

(37173, 5)


bctcb2020,10001001000,10001001001,20001001001
cdta_name,MN01 Financial District-Tribeca (CD 1 Equivalent),MN01 Financial District-Tribeca (CD 1 Equivalent),QN01 Astoria-Queensbridge (CD 1 Equivalent)
borough_district_code,101,101,201
boundary,MULTIPOLYGON Z (((-74.03995023067303 40.700890...,MULTIPOLYGON Z (((-74.04387746892309 40.690187...,MULTIPOLYGON Z (((-73.8842970359619 40.7886857...
ct_complaints,0,0,1
district_name,Tribeca/FIDI,Tribeca/FIDI,"Mott Haven, Melrose"


In [4]:
precinct_loc_sql = """
SELECT
    precinct,
    precinct_name,
    full_address,
    latitude, 
    longitude,
    location
FROM nypd_precincts
"""
precincts_locs_df = geopandas.GeoDataFrame.from_postgis(
    precinct_loc_sql, db_url, 
    geom_col='location', index_col='precinct', 
    crs=df.crs
)
print(precincts_locs_df.shape)
precincts_locs_df.head(3).T


(77, 5)


precinct,1.0,5.0,6.0
precinct_name,1st Precinct,5th Precinct,6th Precinct
full_address,"16 Ericsson Place New York, New York","19 Elizabeth Street New York, New York","233 West 10 Street New York, New York"
latitude,40.72022,40.71615,40.73396
longitude,-74.00701,-73.99735,-74.00543
location,POINT (-74.00701 40.72022),POINT (-73.99735 40.71615),POINT (-74.00543 40.73396)


In [9]:
# Upper manhattan only
map = df[(df.borough_district_code.isin([107, 108, 109, 110, 111, 112]))]\
    .explore(
        column='ct_complaints',
        legend=True,
        style_kwds={'stroke': False}
    )
map = precincts_locs_df.explore(m=map, marker_kwds={'radius': 2.5, 'color': 'blue', 'fill': True})
map
