In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import time
import gmaps
import gmaps.datasets
import zipcodes
import censusgeocode as cg

from census import Census
from us import states

# census key
from api_keys import (ckey, gkey)

# Census API Key
c = Census(ckey, year=2013)

In [2]:
# Read Shooting File and store into Pandas data frame
shooting14_data = pd.read_csv("DataFiles/2014.csv")
shooting15_data = pd.read_csv("DataFiles/2015.csv")
shooting16_data = pd.read_csv("DataFiles/2016.csv")
shooting17_data = pd.read_csv("DataFiles/2017.csv")
shooting18_data = pd.read_csv("DataFiles/2018.csv")

In [3]:
# Merge dataframes
shooting_data = pd.merge(shooting14_data,shooting15_data, how="outer")
shooting_data = pd.merge(shooting_data, shooting16_data, how="outer")
shooting_data = pd.merge(shooting_data, shooting17_data, how="outer")
#shooting_data = pd.merge(shooting_data, shooting18_data, how="outer")

In [4]:
# Find number of incidents in original
len(shooting_data)

1331

In [5]:
# Format dates into datetimes and preview to verify
shooting_data["Incident Date"] = pd.to_datetime(shooting_data["Incident Date"])
shooting_data.head()

Unnamed: 0,Incident Date,State,City Or County,Address,# Killed,# Injured,Operations
0,2014-12-29,Louisiana,New Orleans,Poydras and Bolivar,0,4,
1,2014-12-27,California,Los Angeles,8800 block of South Figueroa Street,1,3,
2,2014-12-27,California,Sacramento,4000 block of May Street,0,4,
3,2014-12-26,Illinois,East St. Louis,2500 block of Summit Avenue,1,3,
4,2014-12-24,Missouri,Saint Louis,18th and Pine,1,3,


In [6]:
# Utilize dictionary to change State names to abbreviations to use in geocoding
us_state_abbrev = {'Alabama': 'AL','Alaska': 'AK','Arizona': 'AZ','Arkansas': 'AR','California': 'CA','Colorado': 'CO','Connecticut': 'CT',
    'Delaware': 'DE','Florida': 'FL','Georgia': 'GA','Hawaii': 'HI','Idaho': 'ID','Illinois': 'IL','Indiana': 'IN','Iowa': 'IA',
    'Kansas': 'KS','Kentucky': 'KY','Louisiana': 'LA','Maine': 'ME','Maryland': 'MD','Massachusetts': 'MA','Michigan': 'MI',
    'Minnesota': 'MN','Mississippi': 'MS','Missouri': 'MO','Montana': 'MT','Nebraska': 'NE','Nevada': 'NV','New Hampshire': 'NH',
    'New Jersey': 'NJ','New Mexico': 'NM','New York': 'NY','North Carolina': 'NC','North Dakota': 'ND','Ohio': 'OH','Oklahoma': 'OK',
    'Oregon': 'OR','Pennsylvania': 'PA','Rhode Island': 'RI','South Carolina': 'SC','South Dakota': 'SD','Tennessee': 'TN',
    'Texas': 'TX','Utah': 'UT','Vermont': 'VT','Virginia': 'VA','Washington': 'WA','West Virginia': 'WV','Wisconsin': 'WI',
    'Wyoming': 'WY', 'District of Columbia': 'DC' ,'Puerto Rico': 'PR'}

# Loop through rows to add new column with abbreviations
for index, row in shooting_data.iterrows():
    #shooting2_data["ST"] = us_state_abbrev[shooting_data["State"][index]]
    shooting_data.loc[index, "ST"] = us_state_abbrev[shooting_data["State"][index]]
    
# Preview to verify
shooting_data.head(20)

Unnamed: 0,Incident Date,State,City Or County,Address,# Killed,# Injured,Operations,ST
0,2014-12-29,Louisiana,New Orleans,Poydras and Bolivar,0,4,,LA
1,2014-12-27,California,Los Angeles,8800 block of South Figueroa Street,1,3,,CA
2,2014-12-27,California,Sacramento,4000 block of May Street,0,4,,CA
3,2014-12-26,Illinois,East St. Louis,2500 block of Summit Avenue,1,3,,IL
4,2014-12-24,Missouri,Saint Louis,18th and Pine,1,3,,MO
5,2014-12-23,Kentucky,Winchester,260 Oxford Drive,1,3,,KY
6,2014-12-22,Michigan,Detroit,Charlevoix and Philip,1,3,,MI
7,2014-12-22,New York,Webster,191 Lake Road,4,2,,NY
8,2014-12-22,Illinois,Chicago,5700 block of South Green Street,0,5,,IL
9,2014-12-21,Florida,Sarasota,4034 N Washington Blvd,2,2,,FL


In [7]:
# Identify dataframe information to be used in geocoding
address = shooting_data['Address']
city = shooting_data['City Or County']
state = shooting_data['ST']

# Run through each incident and use Census geocoding to find zipcodes,
# put identifiable incidents' indices into a list to reference later
problems = []

for x in range(0, len(address)):

    # Try/except to identify index of unidentifiable zipcodes
    try:
        zips = cg.onelineaddress(f'{address[x]}, {city[x]}, {state[x]}', returntype='locations')
        zips = zips[0]['addressComponents']['zip']
        shooting_data.loc[x, "Zipcode"] = zips
        print(f"Processing zipcode {x}: {zips}........")
        
    except:
        problems.append(x)
        print(f"Could not find zipcode {x}...........")
        
print(f"---------------\nFinished Processing\n---------------")

Could not find zipcode 0........
Could not find zipcode 1........
Processing zipcode 2: 95838........
Processing zipcode 3: 62205........
Processing zipcode 4: 63103........
Processing zipcode 5: 40391........
Could not find zipcode 6........
Could not find zipcode 7........
Could not find zipcode 8........
Processing zipcode 9: 34234........
Could not find zipcode 10........
Processing zipcode 11: 60409........
Processing zipcode 12: 61101........
Could not find zipcode 13........
Could not find zipcode 14........
Processing zipcode 15: 33136........
Processing zipcode 16: 33604........
Processing zipcode 17: 97217........
Could not find zipcode 18........
Processing zipcode 19: 63111........
Processing zipcode 20: 23605........
Processing zipcode 21: 11212........
Processing zipcode 22: 37404........
Processing zipcode 23: 07103........
Processing zipcode 24: 30315........
Processing zipcode 25: 38116........
Could not find zipcode 26........
Could not find zipcode 27........
Could n

Could not find zipcode 228........
Processing zipcode 229: 43420........
Could not find zipcode 230........
Processing zipcode 231: 60644........
Processing zipcode 232: 94801........
Could not find zipcode 233........
Could not find zipcode 234........
Processing zipcode 235: 48213........
Could not find zipcode 236........
Could not find zipcode 237........
Could not find zipcode 238........
Processing zipcode 239: 15419........
Processing zipcode 240: 76164........
Could not find zipcode 241........
Processing zipcode 242: 32205........
Could not find zipcode 243........
Could not find zipcode 244........
Could not find zipcode 245........
Processing zipcode 246: 46407........
Could not find zipcode 247........
Could not find zipcode 248........
Could not find zipcode 249........
Processing zipcode 250: 70126........
Processing zipcode 251: 60626........
Processing zipcode 252: 46131........
Could not find zipcode 253........
Could not find zipcode 254........
Could not find zipcode

Processing zipcode 454: 19952........
Could not find zipcode 455........
Could not find zipcode 456........
Processing zipcode 457: 10037........
Could not find zipcode 458........
Could not find zipcode 459........
Processing zipcode 460: 40508........
Processing zipcode 461: 28119........
Could not find zipcode 462........
Processing zipcode 463: 19104........
Processing zipcode 464: 02895........
Processing zipcode 465: 29401........
Could not find zipcode 466........
Processing zipcode 467: 11225........
Processing zipcode 468: 08105........
Could not find zipcode 469........
Could not find zipcode 470........
Could not find zipcode 471........
Could not find zipcode 472........
Could not find zipcode 473........
Processing zipcode 474: 10468........
Processing zipcode 475: 43211........
Could not find zipcode 476........
Processing zipcode 477: 06606........
Processing zipcode 478: 77004........
Could not find zipcode 479........
Could not find zipcode 480........
Could not find z

Could not find zipcode 678........
Processing zipcode 679: 24013........
Could not find zipcode 680........
Could not find zipcode 681........
Processing zipcode 682: 94108........
Could not find zipcode 683........
Processing zipcode 684: 36617........
Processing zipcode 685: 61108........
Processing zipcode 686: 90016........
Could not find zipcode 687........
Could not find zipcode 688........
Processing zipcode 689: 49508........
Processing zipcode 690: 95815........
Processing zipcode 691: 11209........
Processing zipcode 692: 70119........
Could not find zipcode 693........
Processing zipcode 694: 78204........
Could not find zipcode 695........
Could not find zipcode 696........
Processing zipcode 697: 94102........
Could not find zipcode 698........
Processing zipcode 699: 77338........
Processing zipcode 700: 77005........
Could not find zipcode 701........
Could not find zipcode 702........
Processing zipcode 703: 98233........
Could not find zipcode 704........
Processing zi

Could not find zipcode 903........
Could not find zipcode 904........
Could not find zipcode 905........
Processing zipcode 906: 66609........
Could not find zipcode 907........
Processing zipcode 908: 30802........
Processing zipcode 909: 21206........
Processing zipcode 910: 35031........
Could not find zipcode 911........
Could not find zipcode 912........
Could not find zipcode 913........
Processing zipcode 914: 19140........
Processing zipcode 915: 29669........
Could not find zipcode 916........
Processing zipcode 917: 48211........
Processing zipcode 918: 32805........
Could not find zipcode 919........
Could not find zipcode 920........
Could not find zipcode 921........
Could not find zipcode 922........
Could not find zipcode 923........
Processing zipcode 924: 87106........
Could not find zipcode 925........
Could not find zipcode 926........
Could not find zipcode 927........
Processing zipcode 928: 80230........
Could not find zipcode 929........
Could not find zipcode 93

Could not find zipcode 1124........
Processing zipcode 1125: 27260........
Processing zipcode 1126: 32209........
Could not find zipcode 1127........
Processing zipcode 1128: 70807........
Processing zipcode 1129: 45202........
Processing zipcode 1130: 23223........
Could not find zipcode 1131........
Could not find zipcode 1132........
Could not find zipcode 1133........
Could not find zipcode 1134........
Could not find zipcode 1135........
Could not find zipcode 1136........
Processing zipcode 1137: 23868........
Processing zipcode 1138: 62024........
Processing zipcode 1139: 45251........
Could not find zipcode 1140........
Could not find zipcode 1141........
Could not find zipcode 1142........
Processing zipcode 1143: 04950........
Could not find zipcode 1144........
Processing zipcode 1145: 23669........
Processing zipcode 1146: 27601........
Could not find zipcode 1147........
Processing zipcode 1148: 48341........
Could not find zipcode 1149........
Could not find zipcode 1150.

In [8]:
# Print preview to verify
shooting_data.head(25)

Unnamed: 0,Incident Date,State,City Or County,Address,# Killed,# Injured,Operations,ST,Zipcode
0,2014-12-29,Louisiana,New Orleans,Poydras and Bolivar,0,4,,LA,
1,2014-12-27,California,Los Angeles,8800 block of South Figueroa Street,1,3,,CA,
2,2014-12-27,California,Sacramento,4000 block of May Street,0,4,,CA,95838.0
3,2014-12-26,Illinois,East St. Louis,2500 block of Summit Avenue,1,3,,IL,62205.0
4,2014-12-24,Missouri,Saint Louis,18th and Pine,1,3,,MO,63103.0
5,2014-12-23,Kentucky,Winchester,260 Oxford Drive,1,3,,KY,40391.0
6,2014-12-22,Michigan,Detroit,Charlevoix and Philip,1,3,,MI,
7,2014-12-22,New York,Webster,191 Lake Road,4,2,,NY,
8,2014-12-22,Illinois,Chicago,5700 block of South Green Street,0,5,,IL,
9,2014-12-21,Florida,Sarasota,4034 N Washington Blvd,2,2,,FL,34234.0


In [9]:
# Find how many unidentifiable zipcodes
len(problems)

625

In [15]:
# Run through address information with Google geocode to try to find any of the remaining unidentifiable zipcodes
params = {"key": gkey}

for n in problems:
    
    try:
        base_url = "https://maps.googleapis.com/maps/api/geocode/json"

        address = shooting_data['Address'][n]
        city = shooting_data['City Or County'][n]
        state = shooting_data['State'][n]

        # update address key value
        params['address'] = f"{address} {city},{state}"

        # make request
        address = requests.get(base_url, params=params)

        # convert to json
        address = address.json()
        address = address["results"][0]["formatted_address"]
        zipcode = address.split(', ')[2].split(' ')[1]

        shooting_data.loc[n, "Zipcode"] = zipcode
        print(f"Processing zipcode {n}: {zipcode}........")
        
    except:
        shooting_data.loc[n, "Zipcode"] = "NaN"
        print(f"Can't find {n} zipcode")
        
print(f"---------------\nFinished Processing\n---------------")

Processing zipcode 0: 70113........
Processing zipcode 1: 90003........
Can't find 6 zipcode
Processing zipcode 7: 14580........
Processing zipcode 8: 60621........
Processing zipcode 10: 39367........
Processing zipcode 13: 07201........
Processing zipcode 14: 19446........
Processing zipcode 18: 46614........
Processing zipcode 26: 78626........
Processing zipcode 27: 94103........
Can't find 28 zipcode
Processing zipcode 29: 80204........
Processing zipcode 31: 57262........
Processing zipcode 32: 45505........
Processing zipcode 34: 60624........
Can't find 35 zipcode
Processing zipcode 39: 33142........
Processing zipcode 41: 90222........
Processing zipcode 42: 90222........
Processing zipcode 44: 38128........
Processing zipcode 47: 11101........
Can't find 49 zipcode
Processing zipcode 50: 48224........
Processing zipcode 53: 95210........
Processing zipcode 54: 30315........
Can't find 56 zipcode
Processing zipcode 57: 12203........
Processing zipcode 59: 48234........
Process

Processing zipcode 448: 13204........
Processing zipcode 452: 48205........
Processing zipcode 455: 02780........
Processing zipcode 456: 48213........
Processing zipcode 458: 19134........
Processing zipcode 459: 15221........
Processing zipcode 462: 48206........
Processing zipcode 466: 33169........
Processing zipcode 469: 74115........
Processing zipcode 470: 63106........
Processing zipcode 471: 30214........
Processing zipcode 472: 73149........
Processing zipcode 473: 31061........
Processing zipcode 476: 33142........
Processing zipcode 479: 90044........
Processing zipcode 480: 63113........
Processing zipcode 481: 59722........
Processing zipcode 483: 60644........
Processing zipcode 484: 52802........
Processing zipcode 486: 11798........
Processing zipcode 490: 30013........
Processing zipcode 492: 68127........
Processing zipcode 493: 60636........
Processing zipcode 494: 19013........
Processing zipcode 498: 48507........
Can't find 499 zipcode
Processing zipcode 501: 937

Processing zipcode 925: 60649........
Processing zipcode 926: 60636........
Processing zipcode 927: 60624........
Processing zipcode 929: 60644........
Processing zipcode 930: 33325........
Processing zipcode 931: 36092........
Can't find 932 zipcode
Processing zipcode 933: 33907........
Can't find 935 zipcode
Processing zipcode 937: 08618........
Processing zipcode 939: 78202........
Processing zipcode 941: 66106........
Processing zipcode 944: 90220........
Processing zipcode 948: 91752........
Processing zipcode 949: 22192........
Processing zipcode 964: 35462........
Processing zipcode 966: 39571........
Processing zipcode 969: 60621........
Processing zipcode 971: 32703........
Processing zipcode 974: 85303........
Processing zipcode 976: 98134........
Processing zipcode 979: 23061........
Processing zipcode 981: 20003........
Processing zipcode 982: 60623........
Processing zipcode 985: 80126........
Processing zipcode 989: 33147........
Processing zipcode 990: 39183........
Proc

In [17]:
# Check to see how many unidentifiable
remaining = shooting_data[shooting_data.Zipcode == "NaN"]
len(remaining)

75

In [19]:
# Check values of remaining to see why
remaining.head(25)

Unnamed: 0,Incident Date,State,City Or County,Address,# Killed,# Injured,Operations,ST,Zipcode
6,2014-12-22,Michigan,Detroit,Charlevoix and Philip,1,3,,MI,
28,2014-11-23,Virginia,Parksley,Parksley Road,0,4,,VA,
35,2014-11-18,Nevada,North Las Vegas,Englestad Street,0,4,,NV,
49,2014-10-18,New York,Brooklyn,Lenox Road,0,4,,NY,
56,2014-10-08,Georgia,Atlanta,Auburn Avenue and Edgewood Avenue,0,4,,GA,
67,2014-09-26,South Carolina,Darlington County,600 block of Turner Road,1,4,,SC,
71,2014-09-18,Florida,Bell,NW 30th Street and NW 39th Terrace,8,0,,FL,
98,2014-08-13,Louisiana,New Orleans,New Orleans and North Rocheblave streets,0,4,,LA,
99,2014-08-12,Georgia,Cartersville,Brent Cir,2,3,,GA,
102,2014-08-10,Georgia,Wrightsville,Idylwild Dr,0,6,,GA,


In [20]:
# Drop rows with zipcodes that are unidentifiable
shooting_data = shooting_data[shooting_data.Zipcode != "NaN"]

# Veryify number of incidents are correct
len(shooting_data)

1256

In [21]:
# Drop Operations column
shooting_data = shooting_data.drop(["Operations"],axis = 1 )

In [22]:
# Replace state names with abbreviations and drop abbreviations column
shooting_data["State"] = shooting_data["ST"]
shooting_data = shooting_data.drop(["ST"], axis = 1)

In [24]:
# Preview and verify dataframe
shooting_data.head(50)

Unnamed: 0,Incident Date,State,City Or County,Address,# Killed,# Injured,Zipcode
0,2014-12-29,LA,New Orleans,Poydras and Bolivar,0,4,70113
1,2014-12-27,CA,Los Angeles,8800 block of South Figueroa Street,1,3,90003
2,2014-12-27,CA,Sacramento,4000 block of May Street,0,4,95838
3,2014-12-26,IL,East St. Louis,2500 block of Summit Avenue,1,3,62205
4,2014-12-24,MO,Saint Louis,18th and Pine,1,3,63103
5,2014-12-23,KY,Winchester,260 Oxford Drive,1,3,40391
7,2014-12-22,NY,Webster,191 Lake Road,4,2,14580
8,2014-12-22,IL,Chicago,5700 block of South Green Street,0,5,60621
9,2014-12-21,FL,Sarasota,4034 N Washington Blvd,2,2,34234
10,2014-12-21,MS,Waynesboro,Central Avenue,1,4,39367


In [26]:
# Save to csv file
shooting_data.to_csv("OutputFiles/ZipcodeData.csv")