In [19]:
import json
import pandas as pd
import numpy as np
import re
import os
import requests
import dotenv

# Load dotenv
dotenv.load_dotenv()

# Get enviroment variables
# ========================
MAPBOX_KEY = os.getenv("MAPBOX_KEY")

# Create API key for GeoJSON in https://rapidapi.com/VanitySoft/api/boundaries-io-1/
# Then copy and paste the key in .env file
GEOJSON_KEY = os.getenv("GEOJSON_KEY")

# Load Crime Data

In [20]:
# Read CSV file to pandas dataframe
df = pd.read_csv('../crime/KCPD_Crime_Data_Complete.csv')

# Datatypes of columns
"{:,} rows in crime dataset..".format(df.shape[0])

  interactivity=interactivity, compiler=compiler, result=result)


'1,550,285 rows in crime dataset..'

In [21]:
# Remove rows with non-numeric zip codes
df = df[df['Zip Code'].apply(
  lambda x: True if type(x) in [float, int] else re.match(r'^-?\d+(?:\.\d+)?$', x) is not None)
]

# Ensure 5 digit Zip Codes. Convert zip code to string and fill with zeros
df['Zip Code'] = df['Zip Code'].fillna(0).astype(float).astype(int).astype(str).str.zfill(5)

crime_zipcodes = df['Zip Code'].unique()

"{} unique Zip Codes and {:,} rows in Crime Data".format(len(crime_zipcodes), df.shape[0])

'649 unique Zip Codes and 1,550,284 rows in Crime Data'

# Obtain GeoJSON for Kansas City from RapidAPI
For testing ZIP codes, open [https://kepler.gl/demo](https://kepler.gl/demo) and open the geojson file. 

In [22]:
url = "https://vanitysoft-boundaries-io-v1.p.rapidapi.com/reaperfire/rest/v1/public/boundary/zipcode/location"

querystring = {
    # Center of Kansas City
    "latitude": "39.099225",
    "longitude": "-94.5839147",
    # Radius of search (45 miles)
    "radius":"45",
    "showDetails":"true"
}

headers = {
    'x-rapidapi-host': "vanitysoft-boundaries-io-v1.p.rapidapi.com",
    'x-rapidapi-key': GEOJSON_KEY
    }

zipcodes_45mi_from_KC = requests.request("GET", url, headers=headers, params=querystring)

# Convert to JSON
with open('../map/zipcodes_45mi_clean.geojson', 'w') as f:
    f.write(zipcodes_45mi_from_KC.text)

### Alternative plan. Obtain data for all ZIP codes in crime data. 

In [23]:
import requests

url = "https://vanitysoft-boundaries-io-v1.p.rapidapi.com/rest/v1/public/boundary/zipcode"

querystring = {"combine":"false"}

payload = list(crime_zipcodes)
headers = {
    'content-type': "application/json",
    'x-rapidapi-host': "vanitysoft-boundaries-io-v1.p.rapidapi.com",
    'x-rapidapi-key': GEOJSON_KEY
    }

crime_geojson_api = requests.request("POST", url, data=json.dumps(payload), headers=headers, params=querystring)

with open('../map/zipcodes_in_crimeKC.geojson', 'w') as f:
    f.write(crime_geojson_api.text)

In [24]:
# Write all zip codes to file
with open('../map/list_zipcodes_KansasCity.txt', 'w') as f:
    f.write('\n'.join([
    feature['properties']['zipCode']
    for feature in data['features']
]))


### Proportion of crimes within 45 miles range

In [25]:
# Parse JSON response
data = json.loads(zipcodes_45mi_from_KC.text) 

# Filter geojson to only include zip codes with crime data
zip_codes_with_crime = [
    feature['properties']['zipCode']
    for feature in data['features']
    if feature['properties']['zipCode'] in crime_zipcodes
]

print("{}/{} zip codes with crime data within Kansas City".format(len(zip_codes_with_crime), len(crime_zipcodes)))

171/649 zip codes with crime data within Kansas City


In [26]:
# Count number of crimes per zip code
crime_by_zipcode = df.groupby('Zip Code').size().reset_index(name='count').sort_values('count', ascending=False)
crime_by_zipcode['with_zip_code'] = crime_by_zipcode['Zip Code'].isin(zip_codes_with_crime) 

print("{0:10.1f}% of crimes with ZIP code within Kansas City".format( 
  100 * sum(crime_by_zipcode[crime_by_zipcode['with_zip_code'] == True]['count']) / sum(crime_by_zipcode['count']))
)

# If you want to see that in Excel, you can use this code
# crime_by_zipcode.to_csv('../crime/crimes_count_by_zipcode.csv')

      95.2% of crimes with ZIP code within Kansas City


# Prepare crime data for map

In [27]:
# Dictionary of zip codes and count of crimes
zip_code_counts = {
    zipcode: count
    for zipcode, count in zip(crime_by_zipcode['Zip Code'], crime_by_zipcode['count'])
}

# Add count of crimes to geojson
for feature in data['features']:
    zipcode = feature['properties']['zipCode']
    feature['properties']['crime_count'] = zip_code_counts.get(zipcode, 0)

with open('../map/45mi_with_crimes.geojson', 'w') as f:
    json.dump(data, f)

# Work in Progress. Extract lat/lon from LOCATION

In [28]:
# Extract latitude and longitude from address text usign regex
import re


def extract_lat_long(address):
    # \((\d+\.\d+),\s*(-?\d+\.\d+)\)
    # \(([0-9\.]+), ([0-9\.]+)\)
    match = re.search(r'x', address)
    if match:
        return float(match.group(1)), float(match.group(2))
    else:
        return None, None

print(df['Location'][1])
extract_lat_long(df['Location'][1])

1100 LOCUST ST
KANSAS CITY, MO 64106
(39.100854728000456, -94.57737538399965)


(None, None)