In [None]:
# Dependencies
import pandas as pd
import numpy as np
import requests
import json
import re
from pprint import pprint

### Neighborhood info in the ACS file
http://www.mncompass.org/profiles/neighborhoods/minneapolis-saint-paul

Source: 2014-2018 American Community Survey 5-year estimates, adjusted to fit current neighborhood boundaries using the 2010 Census counts. The 5-year estimates represent averages of data collected over that time period. (courtesy of mncompass.org)

In [None]:
# Load & Visualize ACS data
ACS = pd.read_csv('resources/profile_download_file_msp_neighborhoods_2014_2018.csv', encoding='UTF-8')
print(ACS.shape)
ACS.head(1)

In [None]:
# Reduce columns (Note I did not include margin of error columns)
# *** Folwell has no info
ACS_reduced = ACS[['Neighborhood', 'City', 'Total population',
                         'White - Share',
                         'Of Color - Share',
#                          'Foreign born - Share',
#                          'English only - Share',
#                          'Language other than English - Share',
#                          'Speak English less than "very well" - Share',
#                          'Population with a disability - Share',
                         'Total housing units',
                         'Total households',
                         'Family households - Share',
                         'Married-couple family households - Share',
#                          'Single-person family households - Share',
                         'Nonfamily households - Share',
#                          'Householder living alone - Share',
#                          '65 years and over - Share',
#                          'Households with one or more children under 18 years - Share',
#                          'Households with one or more people 65 years and over',
                         'Vacant housing units - Share',
                         'Occupied housing units - Share',
                         'Average household size (occupied)',
                         'Owner-occupied - Share',
                         'Average owner-occupied household size',
                         'Renter-occupied - Share',
                         'Average renter-occupied household size',
#                          'No vehicles - Share',
#                          'Moved in 2010 or later - Share',
#                          'Moved in 2000 to 2009 - Share',
#                          'Cost-burdened households - Share',
#                          'Cost-burdened owner households - Share',
#                          'Cost-burdened renter households - Share',
#                          'Median rent (2008-2012; 2012 dollars)',
#                          'Median household income (2008-2012; 2012 dollars)',
#                          'Less than $35000 - Share',
#                          '$35000-$49999 - Share',
#                          '$50000-$74999 - Share',
#                          '$75000-$99999 - Share',
#                          '$100000 or more - Share',
#                          'With income below poverty - Share',
#                          'With income 100-149% of poverty - Share',
#                          'With income 150-199% of poverty - Share',
#                          'With income 200% of poverty or higher - Share',
#                          'Proportion of working age adults who are employed - Share',
#                          'Unemployment rate - Share',
#                          'Population (25 years and older) - Share',
#                          'Less than high school - Share',
#                          'High school diploma or GED - Share',
#                          "Some college or associate's degree - Share",
#                          "Bachelor's degree - Share",
#                          'Graduate or professional degree - Share',
#                          'Car; truck; or van (including passengers) - Share',
#                          'Public transportation - Share',
#                          'Walked; biked; worked at home; or other - Share',
#                          'Less than 10 minutes - Share',
#                          '10-19 minutes - Share',
#                          '20-29 minutes - Share',
#                          '30 minutes or longer - Share',
#                          'Population without health insurance coverage - Share',
                         ]]
print(ACS_reduced.shape)
ACS_reduced.head(1)

### Neighborhood info in the Census file
http://www.mncompass.org/profiles/neighborhoods/minneapolis-saint-paul

Source: 2010 Census counts (courtesy of mncompass.org)

In [None]:
# Load & Visualize Census data
census = pd.read_csv('resources/profile_download_file_msp_neighborhoods_2010.csv', encoding='UTF-8')
print(census.shape)
census.head(1)

In [None]:
# Reduce columns, matching to ACS dataframe
census_reduced = census[['Neighborhood', 'City', 'Total population',
                         'White - Share',
                         'Of Color - Share',
                         'Total housing units',
                         'Total households',
                         'Family households - Share',
                         'Married-couple family households - Share',
                         'Nonfamily households - Share',
                         'Vacant housing units - Share',
                         'Occupied housing units - Share',
                         'Average household size (occupied)',
                         'Owner-occupied - Share',
                         'Average owner-occupied household size',
                         'Renter-occupied - Share',
                         'Average renter-occupied household size',
                         ]]
print(census_reduced.shape)
census_reduced.head(1)

In [None]:
# Remove Percent Signs? Rename columns?

In [None]:
# Merge ACS & census files & visualize data
merged_census_mini = pd.merge(ACS_reduced, census_reduced, how = "left", on = ['Neighborhood', 'City', 'Total population',
                         'White - Share',
                         'Of Color - Share',
                         'Total housing units',
                         'Total households',
                         'Family households - Share',
                         'Married-couple family households - Share',
                         'Nonfamily households - Share',
                         'Vacant housing units - Share',
                         'Occupied housing units - Share',
                         'Average household size (occupied)',
                         'Owner-occupied - Share',
                         'Average owner-occupied household size',
                         'Renter-occupied - Share',
                         'Average renter-occupied household size',
                         ])
print(merged_census_mini.shape)
merged_census_mini.head(1)

In [None]:
# Save merged file to new csv
# Save to csv
merged_census_mini.to_csv('resources/merged_census_mini.csv', index=False)

### Minneapolis crime data
https://opendata.minneapolismn.gov/datasets/neighborhood-crime-stats

In [None]:
# Load & Visualize Mpls crime data
mplsCrime = pd.read_csv('resources/NEIGHBORHOOD_CRIME_STATS_MPLS.csv', encoding='UTF-8')
print(mplsCrime.shape)
mplsCrime.head(1)

In [None]:
# Filter out unassigned neighborhoods
mplsCrime = mplsCrime.loc[(mplsCrime['neighborhood'] != '** NOT ASSIGNED **') &\
                        (mplsCrime['neighborhood'] != 'Z_** NOT ASSIGNED **'), :]

print(mplsCrime.shape)
mplsCrime.head(1)

In [None]:
# Filter to 2018 - 2020
mplsCrime = mplsCrime.loc[(mplsCrime['reportYear'] >= 2017), :]
print(mplsCrime.shape)
mplsCrime.head()

### St. Paul crime data
https://information.stpaul.gov/Public-Safety/Crime-Incident-Report-Dataset/gppb-g9cg

In [None]:
# Load & Visualize St. Paul crime data
sp_crime = pd.read_csv('resources/Crime_Incident_Report_-_Dataset_StPaul.csv', encoding='UTF-8')
print(sp_crime.shape)
sp_crime.head(1)

In [None]:
# Filter out unwanted crimes
sp_crime = sp_crime.loc[(sp_crime['INCIDENT'] == 'Theft') |\
                        (sp_crime['INCIDENT'] == 'Auto Theft') |\
                        (sp_crime['INCIDENT'] == 'Burglary') |\
                        (sp_crime['INCIDENT'] == 'Agg. Assault') |\
                        (sp_crime['INCIDENT'] == 'Robbery') |\
                        (sp_crime['INCIDENT'] == 'Agg. Assault Dom.') |\
                        (sp_crime['INCIDENT'] == 'Arson') |\
                        (sp_crime['INCIDENT'] == 'Rape') |\
                        (sp_crime['INCIDENT'] == 'Homicide'), :]

print(sp_crime.shape)
sp_crime.head(1)

In [None]:
# Convert date column
sp_crime['DATE'] = pd.to_datetime(sp_crime['DATE'])

# Filter to 2018 - 2020
sp_crime = sp_crime.loc[(sp_crime['DATE'] >= '01-01-2018') &\
                        (sp_crime['DATE'] <= '12-31-2020'), :]
print(sp_crime.shape)
sp_crime.tail()