# How do socioeconomic factors affect crime rates in Seattle?
## Chantria Im, Ellie Velez, and Nawaf Osman

In [1]:
%%capture
%%script echo skipping # Skip this code cell

# Install packages
import sys
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install requests

In [2]:
# Import libraries
import pandas as pd
import requests
import re
import time

In [3]:
# Load datasets
crime_df = pd.read_csv('./data/SPD_Crime_Data__2008-Present_20240226.csv')
poverty_df = pd.read_csv('./data/Poverty_and_Near_Poverty_Map_Full_Data_data.csv', 
                         converters={'Census Tract': str.strip})
income_df = pd.read_csv('./data/ACSST5Y2018.S1903-Data.csv')

In [4]:
# Remove rows with missing Longitude and Latitude values
crime_df = crime_df[(crime_df['Longitude'] != 0) & (crime_df['Latitude'] != 0)]

# Filter for only 2018 entries
crime_df['Report DateTime'] = pd.to_datetime(crime_df['Report DateTime'], format='%m/%d/%Y %I:%M:%S %p')
crime_df = crime_df[crime_df['Report DateTime'].dt.year == 2018]

# Add Census Tract column
crime_df['Census Tract'] = pd.Series(dtype='string')

In [5]:
%%capture
%%script echo skipping # Skip this code cell

# Fill Census Tract column with corresponding census tract numbers
i = 0
while i < len(crime_df):
    # Make request to Census Geocoder API to get row's census tract data based on row's longtitude and latitude values
    payload = {'benchmark': 'Public_AR_Current', 'vintage': 'ACS2018_Current', 'x': crime_df['Longitude'].iloc[i], 
               'y': crime_df['Latitude'].iloc[i], 'format': 'json', 'layers': 'Census Tracts'}
    try:
        r = requests.get(f'https://geocoding.geo.census.gov/geocoder/geographies/coordinates', params=payload)
    except requests.Timeout:
        time.sleep(300)
        continue
    # Extract the census tract number from the response and store it in the Census Tract column
    crime_df['Census Tract'].iloc[i] = re.findall('\d+\.\d+|\d+', 
                                                  r.json()['result']['geographies']['Census Tracts'][0]['NAME'])[0]
    i += 1

# Backup dataframe to disk 
crime_df.to_csv('./data/crime.csv')

In [6]:
pd.set_option('display.max_colwidth', None)

# Cache the dataframe
# %store crime_df
# Reload the dataframe from cache rather than recomputing (5+ hours)
%store -r crime_df
crime_df

Unnamed: 0,Report Number,Offense ID,Offense Start DateTime,Offense End DateTime,Report DateTime,Group A B,Crime Against Category,Offense Parent Group,Offense,Offense Code,Precinct,Sector,Beat,MCPP,100 Block Address,Longitude,Latitude,Census Tract
41275,2018-487339,7644443754,12/31/2018 11:39:00 PM,,2018-12-31 23:39:00,A,PROPERTY,STOLEN PROPERTY OFFENSES,Stolen Property Offenses,280,W,Q,Q1,MAGNOLIA,26XX BLOCK OF BISHOP PL W,-122.404822,47.644858,56
41276,2018-487178,7696639702,12/31/2018 07:00:00 PM,,2018-12-31 23:14:00,A,PROPERTY,LARCENY-THEFT,All Other Larceny,23H,S,R,R2,CLAREMONT/RAINIER VISTA,35XX BLOCK OF RAINIER AVE S,-122.292905,47.571475,101
41277,2018-487186,7664183259,12/31/2018 09:14:00 PM,,2018-12-31 22:52:00,B,SOCIETY,DRIVING UNDER THE INFLUENCE,Driving Under the Influence,90D,E,E,E2,CAPITOL HILL,14TH AVE / E UNION ST,-122.314167,47.612917,75
41278,2018-487085,7685572628,12/31/2018 03:00:00 PM,12/31/2018 03:30:00 PM,2018-12-31 22:09:00,A,PROPERTY,LARCENY-THEFT,Theft From Motor Vehicle,23F,E,C,C2,MONTLAKE/PORTAGE BAY,LAKE WASHINGTON BLVD E / E INTERLAKEN BLVD,-122.297248,47.630773,62
41279,2018-487216,7631819895,12/31/2018 09:33:00 PM,,2018-12-31 21:33:00,A,PROPERTY,DESTRUCTION/DAMAGE/VANDALISM OF PROPERTY,Destruction/Damage/Vandalism of Property,290,N,J,J2,GREENWOOD,1XX BLOCK OF N 85TH ST,-122.357312,47.690607,17.01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1071747,2018-336488,7663881810,09/08/2018 10:32:00 PM,,2018-09-08 22:32:00,A,SOCIETY,DRUG/NARCOTIC OFFENSES,Drug/Narcotic Violations,35A,S,S,S1,NEW HOLLY,71XX BLOCK OF HOLLY PARK DR S,-122.287444,47.539097,110.01
1071748,2018-336488,7663881807,09/08/2018 10:32:00 PM,,2018-09-08 22:32:00,A,PERSON,ASSAULT OFFENSES,Aggravated Assault,13A,S,S,S1,NEW HOLLY,71XX BLOCK OF HOLLY PARK DR S,-122.287444,47.539097,110.01
1080980,2018-350575,7669099329,09/19/2018 05:43:00 PM,,2018-09-19 17:43:00,A,PROPERTY,BURGLARY/BREAKING&ENTERING,Burglary/Breaking & Entering,220,S,R,R1,MID BEACON HILL,40XX BLOCK OF 14TH AVE S,-122.314717,47.568641,100.01
1080981,2018-103499,7676453184,03/23/2018 01:38:00 PM,,2018-03-23 14:02:00,A,SOCIETY,DRUG/NARCOTIC OFFENSES,Drug/Narcotic Violations,35A,W,M,M3,DOWNTOWN COMMERCIAL,15XX BLOCK OF 3RD AVE,-122.338247,47.610246,81


In [7]:
# Drop empty columns
poverty_df = poverty_df.drop(['% of Population Under 100% Poverty Line', '% of Population Under 200% Poverty Line', 
                              'Share Below Selected % of Poverty Level'], axis=1)

# Group data by census tract and calculate total poverty population for levels
poverty_by_tract = poverty_df[poverty_df['Name'] != 'Total'].groupby('Census Tract').agg(
    {'Population Under 100% Poverty Level': 'sum', 'Population Under 200% Poverty Level': 'sum'})

# For each census tract fill designated 'Total' row with total poverty populations for levels
poverty_df.loc[
    poverty_df['Name'] == 'Total', 
    'Population Under 100% Poverty Level'] = poverty_df.loc[
    poverty_df['Name'] == 'Total', 'Census Tract'].map(poverty_by_tract['Population Under 100% Poverty Level']).values
poverty_df.loc[
    poverty_df['Name'] == 'Total', 
    'Population Under 200% Poverty Level'] = poverty_df.loc[
    poverty_df['Name'] == 'Total', 'Census Tract'].map(poverty_by_tract['Population Under 200% Poverty Level']).values

# Filter for only designated 'Total' rows
poverty_df = poverty_df[poverty_df['Name'] == 'Total']
poverty_df

Unnamed: 0,Census Tract,Community Reporting Area,Geographic Area Name,Name,Estimate,Geometry,Population Under 100% Poverty Level,Population Under 200% Poverty Level,Total Population
7,66,Cascade/Eastlake,"Census Tract 66, King County, Washington",Total,4243,Polygon,293,581,4243
15,44,University District,"Census Tract 44, King County, Washington",Total,7409,Polygon,2483,3664,7409
23,78,Madrona/Leschi,"Census Tract 78, King County, Washington",Total,5675,Polygon,332,580,5675
31,17.02,Greenwood/Phinney Ridge,"Census Tract 17.02, King County, Washington",Total,4902,Polygon,511,1190,4902
39,47,Ballard,"Census Tract 47, King County, Washington",Total,9409,Polygon,463,1279,9409
...,...,...,...,...,...,...,...,...,...
3160,83,First Hill,"Census Tract 83, King County, Washington",Total,3188,Polygon,451,846,3188
3168,14,North Beach/Blue Ridge,"Census Tract 14, King County, Washington",Total,5674,Polygon,507,1043,5674
3169,88,Central Area/Squire Park,"Census Tract 88, King County, Washington",Total,4406,Polygon,385,826,4406
3184,35,Greenwood/Phinney Ridge,"Census Tract 35, King County, Washington",Total,4169,Polygon,387,614,4169


In [8]:
# Clean income_df

# Edit column names
#income_df.rename(columns={
#    'GEO_ID': 'Geography',
#    'NAME': 'Geographic Area Name',
#}, inplace=True)

# Remove the second row
# income_df = income_df.drop(0)


# List of columns to keep
#columns_to_keep = ['Geographic Area Name', 'column',
#                   '','',]

# Keep only the specified columns above
# income_df = income_df.filter(columns_to_keep)

income_df

Unnamed: 0,GEO_ID,NAME,S1903_C01_001E,S1903_C01_001M,S1903_C01_002E,S1903_C01_002M,S1903_C01_003E,S1903_C01_003M,S1903_C01_004E,S1903_C01_004M,...,S1903_C03_038M,S1903_C02_039E,S1903_C02_039M,S1903_C03_039E,S1903_C03_039M,S1903_C02_040E,S1903_C02_040M,S1903_C03_040E,S1903_C03_040M,Unnamed: 242
0,Geography,Geographic Area Name,Estimate!!Number!!HOUSEHOLD INCOME BY RACE AND HISPANIC OR LATINO ORIGIN OF HOUSEHOLDER!!Households,Margin of Error!!Number MOE!!HOUSEHOLD INCOME BY RACE AND HISPANIC OR LATINO ORIGIN OF HOUSEHOLDER!!Households,Estimate!!Number!!HOUSEHOLD INCOME BY RACE AND HISPANIC OR LATINO ORIGIN OF HOUSEHOLDER!!Households!!One race--!!White,Margin of Error!!Number MOE!!HOUSEHOLD INCOME BY RACE AND HISPANIC OR LATINO ORIGIN OF HOUSEHOLDER!!Households!!One race--!!White,Estimate!!Number!!HOUSEHOLD INCOME BY RACE AND HISPANIC OR LATINO ORIGIN OF HOUSEHOLDER!!Households!!One race--!!Black or African American,Margin of Error!!Number MOE!!HOUSEHOLD INCOME BY RACE AND HISPANIC OR LATINO ORIGIN OF HOUSEHOLDER!!Households!!One race--!!Black or African American,Estimate!!Number!!HOUSEHOLD INCOME BY RACE AND HISPANIC OR LATINO ORIGIN OF HOUSEHOLDER!!Households!!One race--!!American Indian and Alaska Native,Margin of Error!!Number MOE!!HOUSEHOLD INCOME BY RACE AND HISPANIC OR LATINO ORIGIN OF HOUSEHOLDER!!Households!!One race--!!American Indian and Alaska Native,...,Margin of Error!!Median income (dollars) MOE!!NONFAMILY HOUSEHOLDS!!Nonfamily households!!Male householder,Estimate!!Percent Distribution!!NONFAMILY HOUSEHOLDS!!Nonfamily households!!Male householder!!Living alone,Margin of Error!!Percent Distribution MOE!!NONFAMILY HOUSEHOLDS!!Nonfamily households!!Male householder!!Living alone,Estimate!!Median income (dollars)!!NONFAMILY HOUSEHOLDS!!Nonfamily households!!Male householder!!Living alone,Margin of Error!!Median income (dollars) MOE!!NONFAMILY HOUSEHOLDS!!Nonfamily households!!Male householder!!Living alone,Estimate!!Percent Distribution!!NONFAMILY HOUSEHOLDS!!Nonfamily households!!Male householder!!Not living alone,Margin of Error!!Percent Distribution MOE!!NONFAMILY HOUSEHOLDS!!Nonfamily households!!Male householder!!Not living alone,Estimate!!Median income (dollars)!!NONFAMILY HOUSEHOLDS!!Nonfamily households!!Male householder!!Not living alone,Margin of Error!!Median income (dollars) MOE!!NONFAMILY HOUSEHOLDS!!Nonfamily households!!Male householder!!Not living alone,
1,1400000US53033000100,"Census Tract 1, King County, Washington",3899,175,2410,209,537,169,11,18,...,11598,44.3,8.9,36267,11988,9.8,4.2,79231,13657,
2,1400000US53033000200,"Census Tract 2, King County, Washington",3693,146,2792,207,242,59,49,66,...,17938,31.8,7.6,67556,18643,21.9,7.8,87647,27979,
3,1400000US53033000300,"Census Tract 3, King County, Washington",1159,68,888,86,42,27,0,12,...,30376,19.8,12.6,45625,16842,23.8,12.3,101607,90680,
4,1400000US53033000401,"Census Tract 4.01, King County, Washington",3842,158,2662,201,478,121,35,55,...,3174,28.2,6.6,30163,9505,5.3,3.5,65125,37481,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,1400000US53033032702,"Census Tract 327.02, King County, Washington",2497,144,2457,141,0,17,0,17,...,12352,35.4,14.7,69836,25713,25.5,11.9,153644,7353,
395,1400000US53033032703,"Census Tract 327.03, King County, Washington",871,63,798,71,0,12,0,12,...,20701,31.5,14.0,38036,17845,10.4,7.8,101964,98083,
396,1400000US53033032704,"Census Tract 327.04, King County, Washington",2499,226,2273,270,18,26,0,17,...,32514,33.9,14.6,15809,11636,19.7,10.5,107143,53591,
397,1400000US53033032800,"Census Tract 328, King County, Washington",983,80,954,89,0,12,6,7,...,33596,36.0,11.4,48750,17878,19.9,15.7,-,**,
