In [3]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as stats
from scipy.stats import linregress
from config import api_key


In [4]:
# Read in the Crimes
clean_crime_df = pd.read_csv("./Resources/Crime_clean.csv")
clean_personal_crime_df = pd.read_csv("./Resources/Crime_Persons_clean.csv")
clean_crime_df.head()

Unnamed: 0,X,Y,FID,ActivityNumber,District,Neighborhood,OccurenceStartDate,OccurenceEndDate,ReportDate,OccurenceLocation,OccurenceCity,ZipCode,PrimaryViolation,DateTime,Year,PC
0,0.0001517236,-2.607703e-06,2,"]\=""-0092335",North Central,Carmichael Park,2018/03/19 21:30:00,2018/03/19 21:59:59,2018/03/20 17:00:00,5100 Block of Janell,Carmichael,95608,PC 273.5 Inflict Crpl Inj Sp/Cohab,2018-03-19 21:30:00,2018,273.5
1,6756477.0,1974923.0,3,03/30-0055028,East,Cordova Meadows,2015/03/06 01:11:00,2015/03/06 01:11:00,2015/03/06 00:00:00,10000 Block of Terra,Rancho Cordova,95670,PC 594(B)(2)(A) Vandalism Less Than $400,2015-03-06 01:11:00,2015,594.0
2,6754609.0,1968828.0,226871,2014-0000030,East,Lincoln Village,2014/01/01 00:14:59,2014/01/01 00:14:59,2014/01/01 00:42:00,3300 Block of Explor,Sacramento,95827,PC 246 Shoot At Inhabited Dwelling/Vehicle/Etc,2014-01-01 00:14:59,2014,246.0
3,6747809.0,1969023.0,226872,2014-0000031,Elk Grove,Butterfield,2014/01/01 00:27:00,2014/01/01 00:27:00,2014/01/01 00:30:00,9300 Block of Mira D,Sacramento,95827,PC 459 Burglary,2014-01-01 00:27:00,2014,459.0
4,6728042.0,1985577.0,226874,2014-0000033,North Central,Bellview,2014/01/01 00:24:59,2014/01/01 00:27:00,2014/01/01 00:48:00,2600 Block of Darwin,Sacramento,95821,PC 594(A) Vandalism,2014-01-01 00:24:59,2014,594.0


In [5]:
# Create a new data frame with just the zip codes and the sum of the pc's
clean_crime_min_df = pd.DataFrame()
clean_crime_min_df["PC"] = clean_crime_df["PC"]
clean_crime_min_df["Year"] = clean_crime_df["Year"]
clean_crime_min_df["ZipCode"] = clean_crime_df["ZipCode"]
clean_crime_min_df.head()
clean_crime_sum_group = clean_crime_min_df.groupby(["Year", "ZipCode"]).count()
clean_crime_sum_group

Unnamed: 0_level_0,Unnamed: 1_level_0,PC
Year,ZipCode,Unnamed: 2_level_1
2014,92345,1
2014,94043,1
2014,94571,16
2014,95501,1
2014,95608,1745
...,...,...
2018,95843,925
2018,95864,520
2018,95949,1
2018,95961,1


In [6]:
# Create a new data frame with just the zip codes and the sum of the pc's
clean_crime_personal_min_df = pd.DataFrame()
clean_crime_personal_min_df["PC"] = clean_personal_crime_df["PC"]
clean_crime_personal_min_df["Year"] = clean_personal_crime_df["Year"]
clean_crime_personal_min_df["ZipCode"] = clean_personal_crime_df["ZipCode"]
clean_crime_personal_min_df.head()
clean_crime_personal_sum_group = clean_crime_personal_min_df.groupby(["Year", "ZipCode"]).count()
clean_crime_personal_sum_group

Unnamed: 0_level_0,Unnamed: 1_level_0,PC
Year,ZipCode,Unnamed: 2_level_1
2014,92345,1
2014,94043,1
2014,94571,2
2014,95501,1
2014,95608,229
...,...,...
2018,95842,311
2018,95843,236
2018,95864,83
2018,95961,1


In [7]:
# Read in the Census data
census_df = pd.read_csv("./Resources/census_output_final.csv")
census_df.head()


Unnamed: 0,Year,ZipCode,Population,Poverty Count,Poverty Rate,Family Poverty Count,Median Household Income,Per Capita Income,Median Home Value
0,2014,95608,60729.0,8301.0,13.67,1476.0,55256.0,34606.0,304200.0
1,2014,95610,43572.0,7174.0,16.46,1136.0,50536.0,25288.0,217100.0
2,2014,95621,41474.0,5842.0,14.09,1069.0,51827.0,24379.0,173700.0
3,2014,95624,63131.0,6848.0,10.85,1266.0,80233.0,29752.0,270400.0
4,2014,95626,6335.0,792.0,12.5,182.0,60579.0,22576.0,193900.0


In [8]:
# Merge the crime and the census data
merged_all_df = pd.merge(census_df, clean_crime_sum_group, how="left", left_on=["Year", "ZipCode"], right_on=["Year", "ZipCode"])
merged_all_df["Crime Rate"] = (100 * merged_all_df["PC"]/merged_all_df["Population"]).round(2)
merged_all_df.fillna(0, inplace=True)
merged_all_df.head()

Unnamed: 0,Year,ZipCode,Population,Poverty Count,Poverty Rate,Family Poverty Count,Median Household Income,Per Capita Income,Median Home Value,PC,Crime Rate
0,2014,95608,60729.0,8301.0,13.67,1476.0,55256.0,34606.0,304200.0,1745.0,2.87
1,2014,95610,43572.0,7174.0,16.46,1136.0,50536.0,25288.0,217100.0,8.0,0.02
2,2014,95621,41474.0,5842.0,14.09,1069.0,51827.0,24379.0,173700.0,38.0,0.09
3,2014,95624,63131.0,6848.0,10.85,1266.0,80233.0,29752.0,270400.0,6.0,0.01
4,2014,95626,6335.0,792.0,12.5,182.0,60579.0,22576.0,193900.0,70.0,1.1


In [9]:
# Merge the crime and the census data
merged_personal_df = pd.merge(census_df, clean_crime_personal_sum_group, how="left", left_on=["Year", "ZipCode"], right_on=["Year", "ZipCode"])
merged_personal_df["Crime Rate"] = (100 * merged_personal_df["PC"]/merged_personal_df["Population"]).round(2)
merged_personal_df.fillna(0, inplace=True)
merged_personal_df

Unnamed: 0,Year,ZipCode,Population,Poverty Count,Poverty Rate,Family Poverty Count,Median Household Income,Per Capita Income,Median Home Value,PC,Crime Rate
0,2014,95608,60729.0,8301.0,13.67,1476.0,55256.0,34606.0,304200.0,229.0,0.38
1,2014,95610,43572.0,7174.0,16.46,1136.0,50536.0,25288.0,217100.0,2.0,0.00
2,2014,95621,41474.0,5842.0,14.09,1069.0,51827.0,24379.0,173700.0,10.0,0.02
3,2014,95624,63131.0,6848.0,10.85,1266.0,80233.0,29752.0,270400.0,0.0,0.00
4,2014,95626,6335.0,792.0,12.50,182.0,60579.0,22576.0,193900.0,5.0,0.08
...,...,...,...,...,...,...,...,...,...,...,...
240,2018,95838,39187.0,9989.0,25.49,1847.0,44849.0,17299.0,201900.0,2.0,0.01
241,2018,95841,20586.0,3964.0,19.26,885.0,47215.0,25167.0,233300.0,234.0,1.14
242,2018,95842,32269.0,7050.0,21.85,1386.0,50412.0,22419.0,222300.0,311.0,0.96
243,2018,95843,46709.0,5818.0,12.46,1248.0,78102.0,27488.0,299900.0,236.0,0.51


In [10]:
# Write out the Data Frames
merged_all_df.to_csv("./Resources/merged_all_df.csv")
merged_personal_df.to_csv("./Resources/merged_personal_df.csv")