In [68]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from jupyterthemes import jtplot
import requests
import json
jtplot.style()

# Problem 1: Burglaries

A dataset containing details about Metro Nashville Police Department reported incidents is available at https://data.nashville.gov/Police/Metro-Nashville-Police-Department-Incidents/2u6v-ujjs. Make use of the API to find all aggravated burglary incidents that were reported during the six month period from January 1, 2021 through June 30, 2021.

In [64]:
endpoint = "https://data.nashville.gov/resource/2u6v-ujjs.geojson?"
offset = 0
limit = 10000
burg = gpd.GeoDataFrame()
while True:
    params1 = {
        "$where":"incident_reported between '2021-01-01T00:00:00.000' and '2021-06-30T00:00:00.000'",
        "$offset":offset,
        "$limit":limit
    }
    res = requests.get(endpoint, params1)
    res_gdf = gpd.read_file(res.text)
    burg = gpd.GeoDataFrame(pd.concat([burg, res_gdf], ignore_index = True))
    
    if len(res_gdf) != limit:
        break
        
    offset += limit
    
burg

Unnamed: 0,victim_county_resident,zip_code,victim_number,offense_nibrs,rpa,latitude,victim_race,incident_number,investigation_status,offense_number,...,weapon_primary,report_type_description,victim_type,incident_status_code,incident_status_description,zone,victim_gender,incident_occurred,primary_key,geometry
0,RESIDENT,37206,1,740,1021,36.165,B,20200797495,Closed,1,...,17,DISPATCHED,I,U,UNFOUNDED,211,F,2020-12-31T23:20:00,20200797495_11,POINT (-86.75600 36.16500)
1,RESIDENT,37115,1,740,1727,36.27,B,20200797477,Closed,1,...,17,DISPATCHED,I,U,UNFOUNDED,715,F,2020-12-31T23:56:00,20200797477_11,POINT (-86.69500 36.27000)
2,,37207,1,90Z,3003,36.196,,20210000019,Closed,1,...,17,SUSPECT,S,U,UNFOUNDED,631,,2021-01-01T00:06:00,20210000019_11,POINT (-86.77600 36.19600)
3,RESIDENT,37076,1,740,9509,36.198,W,20200797392,Closed,1,...,17,DISPATCHED,I,U,UNFOUNDED,525,M,2020-12-31T22:00:00,20200797392_11,POINT (-86.60600 36.19800)
4,RESIDENT,37076,2,740,9509,36.198,W,20200797392,Closed,1,...,17,DISPATCHED,I,U,UNFOUNDED,525,F,2020-12-31T22:00:00,20200797392_12,POINT (-86.60600 36.19800)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52464,RESIDENT,37076,1,740,9601,36.188,B,20210354199,Closed,1,...,17,DISPATCHED,I,U,UNFOUNDED,525,M,2021-06-29T21:30:00,20210354199_11,POINT (-86.63000 36.18800)
52465,,,1,26B,9115,36.15,,20210354130,Open,1,...,17,,B,O,OPEN,517,,2021-06-29T06:00:00,20210354130_11,POINT (-86.69000 36.15000)
52466,RESIDENT,37027,1,740,8619,36.04,W,20210354189,Closed,1,...,17,DISPATCHED,I,U,UNFOUNDED,825,M,2021-06-26T19:00:00,20210354189_11,POINT (-86.76900 36.04000)
52467,RESIDENT,,1,695,4525,36.17,W,20210354177,Open,1,...,17,DISPATCHED,I,O,OPEN,621,M,2021-06-29T14:00:00,20210354177_11,POINT (-86.82000 36.17000)


# Problem 2: Census Tract Shapefiles

Download the 2019 census tract shapefiles for Tennessee from https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.2019.html. (The FIPS code for Tennessee is 47). Perform a spatial join to determine the census tract in which each burglary incident occurred. Which census tract had the highest number of burglaries? Warning - each incident can appear multiple times if there are multiple victims, so be sure that you aren't double-counting any incidents.

In [65]:
tn = gpd.read_file("data/tl_2019_47_tract.shp").to_crs(epsg = 4326)

In [66]:
tn_burgs = gpd.sjoin(burg, tn, op = 'within')

# Problem 3: Populations

For this part, you'll need to request a census API key. Using the 2019 American Community Survey API, obtain, for each census tract, the population (B01001_001E in the detailed tables) and the median income (S1901_C01_012E in the subject tables). Hint: Tennessee's FIPS code is 47 and Davidson County's FIPS code is 37. Merge this new data with the burglaries data above.

In [90]:
with open('C:/Users/jrior/Documents/NSS/api_keys.json') as fi:
    credentials = json.load(fi)
    
api_key = credentials['census_data']

In [91]:
endpoint = "https://api.census.gov/data/2019/acs/acs5?"
query = f"get=NAME,B01001_001E&for=tract:*&in=state:47"#&key={api_key}"
response = requests.get(endpoint+query)
response

<Response [200]>

In [92]:
res_dict = response.json()
tn_pops = pd.DataFrame().from_dict(res_dict)

In [93]:
tn_pops.columns = tn_pops.loc[0]
tn_pops = tn_pops.drop(tn_pops.index[0]).reset_index(drop = True)

In [94]:
tn_pops

Unnamed: 0,NAME,B01001_001E,state,county,tract
0,"Census Tract 156.31, Davidson County, Tennessee",12176,47,037,015631
1,"Census Tract 158.04, Davidson County, Tennessee",4098,47,037,015804
2,"Census Tract 177.01, Davidson County, Tennessee",2466,47,037,017701
3,"Census Tract 177.02, Davidson County, Tennessee",5210,47,037,017702
4,"Census Tract 183.01, Davidson County, Tennessee",8254,47,037,018301
...,...,...,...,...,...
1492,"Census Tract 115, Bradley County, Tennessee",9011,47,011,011500
1493,"Census Tract 112.01, Bradley County, Tennessee",6174,47,011,011201
1494,"Census Tract 101, Bradley County, Tennessee",5468,47,011,010100
1495,"Census Tract 105, Bradley County, Tennessee",3638,47,011,010500


# Problem 4

Create a choropleth showing the number of burglaries per 1000 residents for each census tract.

# Problem 5

Finally, we'll build some statistical models to see how well we can explain the number of aggravated burglaries using the median income of each census tract. For this, we'll be using the Generalized Linear Models module of the statsmodels library.

a. Build a "base model" - a Poisson regression model with just an intercept term with target variable the rate of burglaries per census tract. (Offset using the [log of the] population so that we are looking at the rate of burglaries per population instead of the number of burglaries.)

b. Now, build a Poisson regression model with target variable the rate of burglaries and predictor variable the median income. (Don't forget to offset by the population).

c. Finally, try out a negative binomial model. To get started with a negative binomial model, you can check out this tutorial.

d. How do your models compare? Hint: the fit models have an AIC attribute.

# Problem 6: Presentation