In [1]:
# Imports.
import requests
import pandas as pd
import numpy as np
import json
import geopandas as gpd
from shapely.geometry import Point
from census import Census
from us import states
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Read in JSON file using API with selection of aggravated burglary incidents 
#between floating timestamp datatype values.
response = requests.get('https://data.nashville.gov/resource/2u6v-ujjs.json?offense_description=BURGLARY- AGGRAVATED&\
$where=incident_reported between "2021-01-01T00:00:00.000" and "2021-07-01T00:00:00.000"')
burglaries = (
    pd.read_json(response.text)
    .drop_duplicates(subset = "incident_number")
)

# Pull out crime dates and times.
burglaries['Month'] = (
    burglaries['incident_occurred'].\
    str.extract(r'\d{4}-(\d{2})-\d{2}T\d{2}:\d{2}:\d{2}.000', expand = True)
)
burglaries['Day'] = (
    burglaries['incident_occurred'].\
    str.extract(r'\d{4}-\d{2}-(\d{2})T\d{2}:\d{2}:\d{2}.000', expand = True)
)
burglaries['Time'] = (
    burglaries['incident_occurred'].\
    str.extract(r'\d{4}-\d{2}-\d{2}T(\d{2}:\d{2}:\d{2}).000', expand = True)
)

burglaries['Month'] = (
    pd.to_datetime(burglaries['Month'], format = '%m').dt.month_name()
)
burglaries['Day'] = (
    pd.to_datetime(burglaries['Day'], format = '%d').dt.day
)
burglaries['Time'] = (
    pd.to_datetime(burglaries['Time'], format = '%X').dt.time
)

burglaries.to_csv("../data/Write_Data_Here/burglaries.csv", index = False)

#Create GeoDataFrame using longitude and latitude in burglaries dataframe.
geometry = [Point(xy) for xy in zip(burglaries['longitude'], burglaries['latitude'])]
burglaries = burglaries.drop(['longitude', 'latitude'], axis = 1)
geo_burglaries = gpd.GeoDataFrame(burglaries, crs = "EPSG:4269", geometry = geometry)

In [3]:
# Use Geopandas to read in census tract files
census_tracts = gpd.read_file("../data/tl_2019_47_tract.shp")
geo_burg_census = (
    gpd.sjoin(geo_burglaries, census_tracts, how = "inner", op = "within")
    .rename(columns = {'NAMELSAD':'Census_Tract'})
)

# Census Tract 163 is where the most incidents have occurred.
# Also create dataframe for number of burglaries per census tract in Davidson County, TN for later question.
burglaries_per_1000 = (
    geo_burg_census['Census_Tract'].value_counts().reset_index()
    .rename(columns = {'index':'Census_Tract', 'Census_Tract':'Burglary_Number'})
)
burglaries_per_1000[burglaries_per_1000['Burglary_Number'] == burglaries_per_1000['Burglary_Number'].max()]

Unnamed: 0,Census_Tract,Burglary_Number
0,Census Tract 163,52


In [4]:
# Load census API key.
with open('../data/census.json') as fi:
    credentials = json.load(fi)
c = Census(credentials['api_key'])

# Get requests using census package.
davidson_pop = c.acs5.state_county_tract(['NAME', 'B01001_001E'], '47', '037', Census.ALL, year = 2019)
davidson_inc = c.acs5st.state_county_tract(['NAME', 'S1901_C01_012E'], '47', '037', Census.ALL, year = 2019)

# Create GeoDataFrames, clean them up a little and merge them into geo_burg_census with an attribute join.
def convert_clean_and_merge(acquirer, *args):
    """Takes in variables, creates dataframes, cleans them and then merges them with acquirer dataframe."""
    for df in args:
        df = pd.DataFrame(df)
        df["Census_Tract"] = df['NAME'].str.split(',', expand = True)[0]
        df = (
            df.rename(columns = {'B01001_001E':'Total_Population',\
                                      'S1901_C01_012E':'Median_Income'})
                    .drop(columns = ['NAME', 'state', 'county', 'tract'])
        )
        acquirer = acquirer.merge(df)
    return acquirer

geo_burg_census = convert_clean_and_merge(geo_burg_census, davidson_pop, davidson_inc)

In [5]:
# Merge tracts and population subset of geo_burg_census with burglaries_per_1000.
merged_burglaries_per_1000 = (
    pd.merge(burglaries_per_1000, 
             geo_burg_census[['Census_Tract', 'Total_Population', 'Median_Income','incident_occurred']]\
             .drop_duplicates(subset = {'Census_Tract'}), 
             on = "Census_Tract")
)

# Then merge Davidson only census tract info with merged_burglaries_per_1000 (for mapping purposes).
merged_burglaries_per_1000 = (
    pd.merge(census_tracts.loc[census_tracts['COUNTYFP'].isin(['037'])], 
             merged_burglaries_per_1000, 
             left_on = 'NAMELSAD', 
             right_on = 'Census_Tract')
)

# Create a column for burglaries per 1,000 people.
merged_burglaries_per_1000["Burglaries_per_1000"] = (
    merged_burglaries_per_1000["Burglary_Number"]/merged_burglaries_per_1000["Total_Population"] * 1000
)

# Remove potentially hidden infinite or NaN values.
merged_burglaries_per_1000.replace([np.inf, -np.inf], np.nan, inplace=True)
merged_burglaries_per_1000 = (
    merged_burglaries_per_1000.dropna()
    .loc[(merged_burglaries_per_1000['Total_Population'] > 0)\
         & (merged_burglaries_per_1000['Median_Income'] > 0)]
)

# Create log and intercept columns of each of Total_Population and Median_Income.
merged_burglaries_per_1000['Intercept'] = 1
merged_burglaries_per_1000 = merged_burglaries_per_1000[['ALAND',
                                                         'AWATER',
                                                         'INTPTLAT',
                                                         'INTPTLON',
                                                         'geometry',
                                                         'Census_Tract',
                                                         'Burglary_Number',
                                                         'Total_Population',
                                                         'Median_Income',
                                                         'Burglaries_per_1000',
                                                         'Intercept']]
merged_burglaries_per_1000.to_file("../data/Write_Data_Here/merged_burglaries_per_1000.shp")