In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import pandas as pd
import altair
import seaborn
RANDOM_SEED = 511
rng = np.random.default_rng(RANDOM_SEED)
seaborn.set_theme(style="darkgrid")
altair.renderers.enable('mimetype')
db_url = os.getenv('DB_URL')
%load_ext autoreload
%autoreload 2
import geopandas
import geoplot

In [None]:
## Felony arrests by census tract (larger than block)

# By census tract
sql = """
SELECT
    boroct2020,
    cdta_name,
    borough_district_code,
    boundary,
    ct_arrests,
    districts.district_name
FROM census_tracts
    JOIN districts USING (borough_district_code)
    LEFT JOIN (
        SELECT boroct2020, count(*) AS ct_arrests
        FROM arrests
        GROUP BY 1
    ) AS a USING (boroct2020)
"""

# By block—slow!
sql2 = """
SELECT
    bctcb2020,
    cdta_name,
    borough_district_code,
    boundary,
    coalesce(ct_arrests, 0) as ct_arrests,
    districts.district_name
FROM census_blocks
    JOIN districts USING (borough_district_code)
    LEFT JOIN (
        SELECT bctcb2020, count(*) AS ct_arrests
        FROM arrests
        WHERE distance_to_precinct_meters > 125
        GROUP BY 1
    ) AS a USING (bctcb2020)
"""

# df = geopandas.GeoDataFrame.from_postgis(sql, db_url, geom_col='boundary', index_col='boroct2020')
df = geopandas.GeoDataFrame.from_postgis(sql2, db_url, geom_col='boundary', index_col='bctcb2020')

print(df.shape)
df.head(3).T

In [None]:
precinct_loc_sql = """
SELECT
    precinct,
    precinct_name,
    full_address,
    latitude, 
    longitude,
    location
FROM nypd_precincts
"""
precincts_locs_df = geopandas.GeoDataFrame.from_postgis(
    precinct_loc_sql, db_url, 
    geom_col='location', index_col='precinct', 
    crs=df.crs
)
print(precincts_locs_df.shape)
precincts_locs_df.head(3).T


In [None]:
precinect_geom_sql = """
SELECT
    precinct,
    precinct_name,
    full_address,
    boundary,
    1 as indicator
FROM nypd_precincts
    join nypd_precinct_geometries using (precinct)
"""
precincts_geom_df = geopandas.GeoDataFrame.from_postgis(
    precinect_geom_sql, db_url, 
    geom_col='boundary', index_col='precinct', 
    crs=df.crs
)
print(precincts_geom_df.shape)
precincts_geom_df.head(3).T


In [None]:
otp_locs_sql = """
SELECT
    program_number,
    program_name,
    _record_source,
    capacity_estimate,
    address_full,
    program_status,
    latitude, 
    longitude,
    ST_SetSRID(ST_POINT(longitude, latitude), 4326) :: GEOGRAPHY AS location
FROM programs
WHERE program_category = 'Opioid Treatment Program'
    AND latitude IS NOT NULL

"""
otp_locs_df = geopandas.GeoDataFrame.from_postgis(
    otp_locs_sql, db_url, 
    geom_col='location', index_col='program_number', 
    crs=df.crs
)
print(otp_locs_df.shape)
otp_locs_df.head(3).T


In [None]:
map = df.explore(
    column='ct_arrests',
    legend=True,
    color='white',
    style_kwds={'stroke': False}
)
precincts_locs_df.explore(m=map, marker_kwds={'radius': 2.5, 'color': 'blue', 'fill': True})
otp_locs_df.explore(m=map, marker_kwds={'radius': 2.5, 'color': 'red', 'fill': True})


In [None]:
# OK: take arrests that happened within a, say, 100m radius of a precinct, and spread them evenly throughout the entire precinct.
# In proportion to the area of each census * its fraction of arrests in the precinct. (Or, just to the precinct that contains the majority)
# Need: precinct geographies; intersections of precinct geographies with 


sql = """
select *
from arrests_by_month
"""
arrests_df = pd.read_sql(sql, con=db_url)
arrests_df["month"] = pd.to_datetime(arrests_df["month"], utc=True)
ct_cols = [c for c in arrests_df.columns if 'Arrests' in c and '1k' not in c and 'Felony' not in c]
rate_cols = [c for c in arrests_df.columns if '1k' in c and 'Felony' not in c]

print(arrests_df.shape)
arrests_df.head(3).T

In [None]:
arrest_counts = (
    arrests_df
        # [arrests_df['borough_district_code'].isin([110, 111])]
        .groupby('month')[rate_cols].sum()
)
# arrest_counts.dtypes
seaborn.lineplot(
    arrest_counts,
)