In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as stats
from scipy.stats import linregress
import requests
import json
from pprint import pprint
import gmaps
from config import g_key


In [2]:
# Read in the Crimes
clean_crime_df = pd.read_csv("./Resources/Crime_clean.csv")
clean_personal_crime_df = pd.read_csv("./Resources/Crime_Persons_clean.csv")
clean_crime_df.head()

Unnamed: 0,X,Y,FID,ActivityNumber,District,Neighborhood,OccurenceStartDate,OccurenceEndDate,ReportDate,OccurenceLocation,OccurenceCity,ZipCode,PrimaryViolation,DateTime,Year,PC
0,0.0001517236,-2.607703e-06,2,"]\=""-0092335",North Central,Carmichael Park,2018/03/19 21:30:00,2018/03/19 21:59:59,2018/03/20 17:00:00,5100 Block of Janell,Carmichael,95608,PC 273.5 Inflict Crpl Inj Sp/Cohab,2018-03-19 21:30:00,2018,273.5
1,6756477.0,1974923.0,3,03/30-0055028,East,Cordova Meadows,2015/03/06 01:11:00,2015/03/06 01:11:00,2015/03/06 00:00:00,10000 Block of Terra,Rancho Cordova,95670,PC 594(B)(2)(A) Vandalism Less Than $400,2015-03-06 01:11:00,2015,594.0
2,6754609.0,1968828.0,226871,2014-0000030,East,Lincoln Village,2014/01/01 00:14:59,2014/01/01 00:14:59,2014/01/01 00:42:00,3300 Block of Explor,Sacramento,95827,PC 246 Shoot At Inhabited Dwelling/Vehicle/Etc,2014-01-01 00:14:59,2014,246.0
3,6747809.0,1969023.0,226872,2014-0000031,Elk Grove,Butterfield,2014/01/01 00:27:00,2014/01/01 00:27:00,2014/01/01 00:30:00,9300 Block of Mira D,Sacramento,95827,PC 459 Burglary,2014-01-01 00:27:00,2014,459.0
4,6728042.0,1985577.0,226874,2014-0000033,North Central,Bellview,2014/01/01 00:24:59,2014/01/01 00:27:00,2014/01/01 00:48:00,2600 Block of Darwin,Sacramento,95821,PC 594(A) Vandalism,2014-01-01 00:24:59,2014,594.0


In [3]:
# Create a new data frame with just the zip codes and the sum of the pc's
clean_crime_min_df = pd.DataFrame()
clean_crime_min_df["PC"] = clean_crime_df["PC"]
clean_crime_min_df["Year"] = clean_crime_df["Year"]
clean_crime_min_df["ZipCode"] = clean_crime_df["ZipCode"]
clean_crime_min_df.head()
clean_crime_sum_group = clean_crime_min_df.groupby(["Year", "ZipCode"]).count()
clean_crime_sum_group

Unnamed: 0_level_0,Unnamed: 1_level_0,PC
Year,ZipCode,Unnamed: 2_level_1
2014,92345,1
2014,94043,1
2014,94571,16
2014,95501,1
2014,95608,1745
...,...,...
2018,95843,925
2018,95864,520
2018,95949,1
2018,95961,1


In [4]:
# Create a new data frame with just the zip codes and the sum of the pc's
clean_crime_personal_min_df = pd.DataFrame()
clean_crime_personal_min_df["PC"] = clean_personal_crime_df["PC"]
clean_crime_personal_min_df["Year"] = clean_personal_crime_df["Year"]
clean_crime_personal_min_df["ZipCode"] = clean_personal_crime_df["ZipCode"]
clean_crime_personal_min_df.head()
clean_crime_personal_sum_group = clean_crime_personal_min_df.groupby(["Year", "ZipCode"]).count()
clean_crime_personal_sum_group

Unnamed: 0_level_0,Unnamed: 1_level_0,PC
Year,ZipCode,Unnamed: 2_level_1
2014,92345,1
2014,94043,1
2014,94571,2
2014,95501,1
2014,95608,229
...,...,...
2018,95842,311
2018,95843,236
2018,95864,83
2018,95961,1


In [5]:
# Read in the Census data
census_df = pd.read_csv("./Resources/census_output_final.csv")
census_df.head()


Unnamed: 0,Year,ZipCode,Population,Poverty Count,Poverty Rate,Family Poverty Count,Median Household Income,Per Capita Income,Median Home Value
0,2014,95608,60729.0,8301.0,13.67,1476.0,55256.0,34606.0,304200.0
1,2014,95610,43572.0,7174.0,16.46,1136.0,50536.0,25288.0,217100.0
2,2014,95621,41474.0,5842.0,14.09,1069.0,51827.0,24379.0,173700.0
3,2014,95624,63131.0,6848.0,10.85,1266.0,80233.0,29752.0,270400.0
4,2014,95626,6335.0,792.0,12.5,182.0,60579.0,22576.0,193900.0


In [6]:
# Merge the crime and the census data
merged_all_df = pd.merge(census_df, clean_crime_sum_group, how="left", left_on=["Year", "ZipCode"], right_on=["Year", "ZipCode"])
merged_all_df["Crime Rate"] = (100 * merged_all_df["PC"]/merged_all_df["Population"]).round(2)
merged_all_df.fillna(0, inplace=True)
merged_all_df.head()

Unnamed: 0,Year,ZipCode,Population,Poverty Count,Poverty Rate,Family Poverty Count,Median Household Income,Per Capita Income,Median Home Value,PC,Crime Rate
0,2014,95608,60729.0,8301.0,13.67,1476.0,55256.0,34606.0,304200.0,1745.0,2.87
1,2014,95610,43572.0,7174.0,16.46,1136.0,50536.0,25288.0,217100.0,8.0,0.02
2,2014,95621,41474.0,5842.0,14.09,1069.0,51827.0,24379.0,173700.0,38.0,0.09
3,2014,95624,63131.0,6848.0,10.85,1266.0,80233.0,29752.0,270400.0,6.0,0.01
4,2014,95626,6335.0,792.0,12.5,182.0,60579.0,22576.0,193900.0,70.0,1.1


In [7]:
# Merge the crime and the census data
merged_personal_df = pd.merge(census_df, clean_crime_personal_sum_group, how="left", left_on=["Year", "ZipCode"], right_on=["Year", "ZipCode"])
merged_personal_df["Crime Rate"] = (100 * merged_personal_df["PC"]/merged_personal_df["Population"]).round(2)
merged_personal_df.fillna(0, inplace=True)
merged_personal_df

Unnamed: 0,Year,ZipCode,Population,Poverty Count,Poverty Rate,Family Poverty Count,Median Household Income,Per Capita Income,Median Home Value,PC,Crime Rate
0,2014,95608,60729.0,8301.0,13.67,1476.0,55256.0,34606.0,304200.0,229.0,0.38
1,2014,95610,43572.0,7174.0,16.46,1136.0,50536.0,25288.0,217100.0,2.0,0.00
2,2014,95621,41474.0,5842.0,14.09,1069.0,51827.0,24379.0,173700.0,10.0,0.02
3,2014,95624,63131.0,6848.0,10.85,1266.0,80233.0,29752.0,270400.0,0.0,0.00
4,2014,95626,6335.0,792.0,12.50,182.0,60579.0,22576.0,193900.0,5.0,0.08
...,...,...,...,...,...,...,...,...,...,...,...
240,2018,95838,39187.0,9989.0,25.49,1847.0,44849.0,17299.0,201900.0,2.0,0.01
241,2018,95841,20586.0,3964.0,19.26,885.0,47215.0,25167.0,233300.0,234.0,1.14
242,2018,95842,32269.0,7050.0,21.85,1386.0,50412.0,22419.0,222300.0,311.0,0.96
243,2018,95843,46709.0,5818.0,12.46,1248.0,78102.0,27488.0,299900.0,236.0,0.51


In [8]:
# Write out the Data Frames
merged_all_df.to_csv("./Resources/merged_all_df.csv")
merged_personal_df.to_csv("./Resources/merged_personal_df.csv")

In [9]:
gmaps.configure(api_key=g_key)
# zipcode = "95608"
# base_url = f"https://maps.googleapis.com/maps/api/geocode/json?address={zipcode}&key={g_key}"
# # print(base_url)
# response = requests.get(base_url)
# data = response.json()
# pprint(data['results'])

In [10]:
merged_all_df["Lat"] = 0
merged_all_df["Lng"] = 0
# Iterate over the rows in the dataframe
for index, row in merged_all_df.iterrows():
    zipcode = int(row["ZipCode"])
    print(f"Gathering Lat/Lng for Zip Code {zipcode} Index = {index}")
    base_url = f"https://maps.googleapis.com/maps/api/geocode/json?address={zipcode}&key={g_key}"
    response = requests.get(base_url)
    data = response.json()
    results = data['results']
    try:
        lat = results[0]["geometry"]["location"]["lat"]
        lng = results[0]["geometry"]["location"]["lng"]
        merged_all_df.loc[index, "Lat"] = lat
        merged_all_df.loc[index, "Lng"] = lng
    except (KeyError, IndexError):
        print(f"Error with Zip Code {zipcode} Index = {index}")
        merged_all_df.loc[index, "Lat"] = 0
        merged_all_df.loc[index, "Lng"] = 0

merged_all_df.head()

Gathering Lat/Lon for Zip Code 95608 Index = 0
Gathering Lat/Lon for Zip Code 95610 Index = 1
Gathering Lat/Lon for Zip Code 95621 Index = 2
Gathering Lat/Lon for Zip Code 95624 Index = 3
Gathering Lat/Lon for Zip Code 95626 Index = 4
Gathering Lat/Lon for Zip Code 95628 Index = 5
Gathering Lat/Lon for Zip Code 95630 Index = 6
Gathering Lat/Lon for Zip Code 95632 Index = 7
Gathering Lat/Lon for Zip Code 95638 Index = 8
Gathering Lat/Lon for Zip Code 95639 Index = 9
Gathering Lat/Lon for Zip Code 95641 Index = 10
Gathering Lat/Lon for Zip Code 95655 Index = 11
Gathering Lat/Lon for Zip Code 95660 Index = 12
Gathering Lat/Lon for Zip Code 95662 Index = 13
Gathering Lat/Lon for Zip Code 95670 Index = 14
Gathering Lat/Lon for Zip Code 95673 Index = 15
Gathering Lat/Lon for Zip Code 95683 Index = 16
Gathering Lat/Lon for Zip Code 95690 Index = 17
Gathering Lat/Lon for Zip Code 95693 Index = 18
Gathering Lat/Lon for Zip Code 95742 Index = 19
Gathering Lat/Lon for Zip Code 95758 Index = 20
Ga

Unnamed: 0,Year,ZipCode,Population,Poverty Count,Poverty Rate,Family Poverty Count,Median Household Income,Per Capita Income,Median Home Value,PC,Crime Rate,Lat,Lng
0,2014,95608,60729.0,8301.0,13.67,1476.0,55256.0,34606.0,304200.0,1745.0,2.87,38.627279,-121.319849
1,2014,95610,43572.0,7174.0,16.46,1136.0,50536.0,25288.0,217100.0,8.0,0.02,38.697745,-121.274081
2,2014,95621,41474.0,5842.0,14.09,1069.0,51827.0,24379.0,173700.0,38.0,0.09,38.694001,-121.302688
3,2014,95624,63131.0,6848.0,10.85,1266.0,80233.0,29752.0,270400.0,6.0,0.01,38.424402,-121.331288
4,2014,95626,6335.0,792.0,12.5,182.0,60579.0,22576.0,193900.0,70.0,1.1,38.719089,-121.457045


In [12]:
merged_personal_df["Lat"] = 0
merged_personal_df["Lng"] = 0
# Iterate over the rows in the dataframe
for index, row in merged_personal_df.iterrows():
    zipcode = int(row["ZipCode"])
    print(f"Gathering Lat/Lng for Zip Code {zipcode} Index = {index}")
    base_url = f"https://maps.googleapis.com/maps/api/geocode/json?address={zipcode}&key={g_key}"
    response = requests.get(base_url)
    data = response.json()
    results = data['results']
    try:
        lat = results[0]["geometry"]["location"]["lat"]
        lng = results[0]["geometry"]["location"]["lng"]
        merged_personal_df.loc[index, "Lat"] = lat
        merged_personal_df.loc[index, "Lng"] = lng
    except (KeyError, IndexError):
        print(f"Error with Zip Code {zipcode} Index = {index}")
        merged_personal_df.loc[index, "Lat"] = 0
        merged_personal_df.loc[index, "Lng"] = 0

merged_personal_df.head()

Gathering Lat/Lng for Zip Code 95608 Index = 0
Gathering Lat/Lng for Zip Code 95610 Index = 1
Gathering Lat/Lng for Zip Code 95621 Index = 2
Gathering Lat/Lng for Zip Code 95624 Index = 3
Gathering Lat/Lng for Zip Code 95626 Index = 4
Gathering Lat/Lng for Zip Code 95628 Index = 5
Gathering Lat/Lng for Zip Code 95630 Index = 6
Gathering Lat/Lng for Zip Code 95632 Index = 7
Gathering Lat/Lng for Zip Code 95638 Index = 8
Gathering Lat/Lng for Zip Code 95639 Index = 9
Gathering Lat/Lng for Zip Code 95641 Index = 10
Gathering Lat/Lng for Zip Code 95655 Index = 11
Gathering Lat/Lng for Zip Code 95660 Index = 12
Gathering Lat/Lng for Zip Code 95662 Index = 13
Gathering Lat/Lng for Zip Code 95670 Index = 14
Gathering Lat/Lng for Zip Code 95673 Index = 15
Gathering Lat/Lng for Zip Code 95683 Index = 16
Gathering Lat/Lng for Zip Code 95690 Index = 17
Gathering Lat/Lng for Zip Code 95693 Index = 18
Gathering Lat/Lng for Zip Code 95742 Index = 19
Gathering Lat/Lng for Zip Code 95758 Index = 20
Ga

Unnamed: 0,Year,ZipCode,Population,Poverty Count,Poverty Rate,Family Poverty Count,Median Household Income,Per Capita Income,Median Home Value,PC,Crime Rate,Lat,Lng
0,2014,95608,60729.0,8301.0,13.67,1476.0,55256.0,34606.0,304200.0,229.0,0.38,38.627279,-121.319849
1,2014,95610,43572.0,7174.0,16.46,1136.0,50536.0,25288.0,217100.0,2.0,0.0,38.697745,-121.274081
2,2014,95621,41474.0,5842.0,14.09,1069.0,51827.0,24379.0,173700.0,10.0,0.02,38.694001,-121.302688
3,2014,95624,63131.0,6848.0,10.85,1266.0,80233.0,29752.0,270400.0,0.0,0.0,38.424402,-121.331288
4,2014,95626,6335.0,792.0,12.5,182.0,60579.0,22576.0,193900.0,5.0,0.08,38.719089,-121.457045


In [13]:
# Write out the Data Frames
merged_all_df.to_csv("./Resources/merged_all_lat_lng_df.csv")
merged_personal_df.to_csv("./Resources/merged_personal_lat_lng_df.csv")

In [29]:
# Prep data frames for a heatmap
merged_all_2014_df = merged_all_df.loc[merged_all_df["Year"] == 2014]
merged_all_2014_lat_lng_df = merged_all_2014_df[["Lat", "Lng"]].astype(float)
merged_all_2014_max_poverty_rate = merged_all_2014_df["Poverty Rate"].max()
merged_all_2014_max_crime_rate = merged_all_2014_df["Crime Rate"].max()
print(f"Max Poverty Rate is {merged_all_2014_max_poverty_rate}")
print(f"Max Crime Rate is {merged_all_2014_max_crime_rate}")

Max Poverty Rate is 39.24
Max Crime Rate is 37.72


In [28]:
# Create the Poverty Rate heatmap for 2014
fig = gmaps.figure(center=(38.55, -121.0), zoom_level=10)
heatmap_layer = gmaps.heatmap_layer(merged_all_2014_lat_lng_df, weights=merged_all_2014_df["Poverty Rate"], max_intensity=40,
                                    point_radius=0.03, dissipating=False, opacity=0.5)
fig.add_layer(heatmap_layer)
fig


Figure(layout=FigureLayout(height='420px'))

In [32]:
# Create the Crime Rate heatmap for 2014
fig = gmaps.figure(center=(38.55, -121.0), zoom_level=10)
heatmap_layer = gmaps.heatmap_layer(merged_all_2014_lat_lng_df, weights=merged_all_2014_df["Crime Rate"], max_intensity=10,
                                    point_radius=0.03, dissipating=False, opacity=0.5)
fig.add_layer(heatmap_layer)
fig

Figure(layout=FigureLayout(height='420px'))