In [1]:
import requests
import matplotlib.pyplot as plt
from IPython.display import Image
import pandas as pd
import json
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
from io import StringIO
%matplotlib inline

1. A dataset containing details about Metro Nashville Police Department reported incidents is available at https://data.nashville.gov/Police/Metro-Nashville-Police-Department-Incidents/2u6v-ujjs. Make use of the API to find all aggravated burglary incidents that were reported during the six month period from January 1, 2021 through June 30, 2021.

In [2]:
#get geojson from nashville.gov
crime_endpoint = 'https://data.nashville.gov/resource/2u6v-ujjs.geojson'
#params to select for aggravated burglary and dates
crime_params = {
    'offense_description': 'BURGLARY- AGGRAVATED',
    '$where': 'incident_reported between \'2021-01-01\' and\'2021-06-30\'',
    '$limit': 2000
}
crime_response = requests.get(crime_endpoint, params = crime_params)

#convert dictionary to text and convert to dataframe with geopandas
crime = (gpd.read_file(StringIO(crime_response.text)))

#drop duplicates to prevent double counting incidents with multiple victims
crime = crime.drop_duplicates(subset = 'incident_number')

2. Download the 2019 census tract shapefiles for Tennessee from https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.2019.html. (The FIPS code for Tennessee is 47). Perform a spatial join to determine the census tract in which each burglary incident occurred. Which census tract had the highest number of burglaries? Warning - each incident can appear multiple times if there are multiple victims, so be sure that you aren't double-counting any incidents.

In [3]:
#download census tract file and place in data folder. Read the .shp in with Geopandas and assign to census_tract
census_tract = gpd.read_file('../data/tl_2019_47_tract/tl_2019_47_tract.shp')

#convert from geoseries to geodataframe and change crs type to match crime GeoDataFrame 'EPSG:4326'
census_tract = gpd.GeoDataFrame(census_tract, 
                           crs = crime.crs, 
                           geometry = crime['geometry'])

#confirm identical crs type
print(census_tract.crs)
print(crime.crs)
print(type(crime))
print(type(census_tract))

epsg:4326
epsg:4326
<class 'geopandas.geodataframe.GeoDataFrame'>
<class 'geopandas.geodataframe.GeoDataFrame'>


In [25]:
#join crime with census_tract
crime_census_tract = gpd.sjoin(crime, census_tract, op = 'within')

#Find the census tract with the largest number of aggravated burglaries
crime_census_tract.groupby('TRACTCE')['incident_number'].nunique().nlargest(n=1)

TRACTCE
980100    57
Name: incident_number, dtype: int64

In [17]:
crime_census_tract.columns

Index(['victim_county_resident', 'zip_code', 'victim_number', 'offense_nibrs',
       'rpa', 'latitude', 'victim_race', 'incident_number',
       'investigation_status', 'offense_number', 'offense_description',
       'longitude', 'domestic_related', 'victim_ethnicity',
       'incident_location', 'victim_description', 'location_code',
       'report_type', 'incident_reported', 'weapon_description',
       'location_description', 'weapon_primary', 'report_type_description',
       'victim_type', 'incident_status_code', 'incident_status_description',
       'zone', 'victim_gender', 'incident_occurred', 'primary_key', 'geometry',
       'index_right', 'STATEFP', 'COUNTYFP', 'TRACTCE', 'GEOID', 'NAME',
       'NAMELSAD', 'MTFCC', 'FUNCSTAT', 'ALAND', 'AWATER', 'INTPTLAT',
       'INTPTLON'],
      dtype='object')

3. For this part, you'll need to request a [census API key](https://api.census.gov/data/key_signup.html). Using the [2019 American Community Survey API](https://www.census.gov/data/developers/data-sets/acs-5year.html), obtain, for each census tract, the population (B01001_001E in the detailed tables) and the median income (S1901_C01_012E in the subject tables). Hint: Tennessee's FIPS code is 47 and Davidson County's FIPS code is 37. Merge this new data with the burglaries data above.

In [5]:
#get API key
with open('../data/jg_census_API_key.json') as fi:
    credentials = json.load(fi)
    
api_key = credentials['api_key']

In [6]:
#set up endpoint and params for population dataset
pop_endpoint = 'https://api.census.gov/data/2019/acs/acs5'
pop_params = {
    'get': 'B01001_001E',
    'for': 'tract:*',
    'in': 'state:47 county:037',
    'key': api_key
}
pop_response = requests.get(pop_endpoint, params = pop_params)
              

In [7]:
#convert pop_response to json
pop_response = pop_response.json()
#create the dataframe and simplify the name.
pop = pd.DataFrame(data = pop_response[1:], columns = pop_response[0])
#convert population column to int and rename to something that at least looks like "population"
pop['B01001_001E'] = pop['B01001_001E'].astype(int)
pop = pop.rename(columns = {'B01001_001E': 'population'})

In [10]:
#set up endpoint and params for median income dataset 
income_endpoint = 'https://api.census.gov/data/2019/acs/acs5/subject'
income_params = {
    'get': 'S1901_C01_012E',
    'for': 'tract:*',
    'in': 'state:47 county:037',
    'key': api_key
}
income_response = requests.get(income_endpoint, params = income_params)

In [12]:
income_response = income_response.json()
inc = pd.DataFrame(data = income_response[1:], columns = income_response[0])
inc['S1901_C01_012E'] = inc['S1901_C01_012E'].astype(int)
inc = inc.rename(columns = {'S1901_C01_012E': 'Median Income'})

In [13]:
from functools import reduce

In [16]:
#go back and clean up this dataset
crime_census_tract.head()

Unnamed: 0,victim_county_resident,zip_code,victim_number,offense_nibrs,rpa,latitude,victim_race,incident_number,investigation_status,offense_number,...,TRACTCE,GEOID,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON
0,RESIDENT,,1,220,8203,36.15,B,20210249540,Open,1,...,21002,47165021002,210.02,Census Tract 210.02,G5020,S,30654374,207994,36.3521602,-86.6464309
158,RESIDENT,,1,220,8153,36.15,W,20210223220,Open,1,...,21002,47165021002,210.02,Census Tract 210.02,G5020,S,30654374,207994,36.3521602,-86.6464309
630,RESIDENT,,1,220,8201,36.15,B,20210284867,Open,2,...,21002,47165021002,210.02,Census Tract 210.02,G5020,S,30654374,207994,36.3521602,-86.6464309
0,RESIDENT,,1,220,8203,36.15,B,20210249540,Open,1,...,80802,47155080802,808.02,Census Tract 808.02,G5020,S,22956501,0,35.8454674,-83.5220092
158,RESIDENT,,1,220,8153,36.15,W,20210223220,Open,1,...,80802,47155080802,808.02,Census Tract 808.02,G5020,S,22956501,0,35.8454674,-83.5220092


In [15]:
pop.head()

Unnamed: 0,population,state,county,tract
0,12176,47,37,15631
1,4098,47,37,15804
2,2466,47,37,17701
3,5210,47,37,17702
4,8254,47,37,18301


In [14]:
inc.head()

Unnamed: 0,Median Income,state,county,tract
0,75579,47,37,15631
1,45129,47,37,15804
2,134786,47,37,17701
3,107813,47,37,17702
4,87591,47,37,18301


In [19]:
#https://stackoverflow.com/questions/44327999/python-pandas-merge-multiple-dataframes

data = [crime_census_tract, pop, inc]
crime_pop_inc = reduce(lambda left, right: pd.merge(left, right), data)
# incidents_population_income.head()

MergeError: No common columns to perform merge on. Merge options: left_on=None, right_on=None, left_index=False, right_index=False