# How do socioeconomic factors affect crime rates in Seattle?
## Chantria Im, Ellie Velez, and Nawaf Osman

In [18]:
# Import libraries
import pandas as pd
import requests
import re
import dill # Notebook caching
import time

In [19]:
# Load datasets
crime_df = pd.read_csv('./data/SPD_Crime_Data__2008-Present_20240226.csv')
poverty_df = pd.read_csv('./data/Poverty_and_Near_Poverty_Map_Full_Data_data.csv', converters={'Census Tract': str.strip})
income_df = pd.read_csv('./data/ACSST5Y2018.S1903-Data.csv')

In [20]:
# Remove rows with missing Longitude and Latitude values
crime_df = crime_df[(crime_df['Longitude'] != 0) & (crime_df['Latitude'] != 0)]

# Filter for only 2018 entries
crime_df['Report DateTime'] = pd.to_datetime(crime_df['Report DateTime'], format='%m/%d/%Y %I:%M:%S %p')
crime_df = crime_df[crime_df['Report DateTime'].dt.year == 2018]

# Add Census Tract column
crime_df['Census Tract'] = pd.Series(dtype='string')

In [21]:
# Cache the dataframe
# %store crime_df
# Reload the dataframe from cache rather than recomputing (5+ hours)
%store -r crime_df
crime_df

no stored variable or alias crime_df


Unnamed: 0,Report Number,Offense ID,Offense Start DateTime,Offense End DateTime,Report DateTime,Group A B,Crime Against Category,Offense Parent Group,Offense,Offense Code,Precinct,Sector,Beat,MCPP,100 Block Address,Longitude,Latitude,Census Tract
41275,2018-487339,7644443754,12/31/2018 11:39:00 PM,,2018-12-31 23:39:00,A,PROPERTY,STOLEN PROPERTY OFFENSES,Stolen Property Offenses,280,W,Q,Q1,MAGNOLIA,26XX BLOCK OF BISHOP PL W,-122.404822,47.644858,
41276,2018-487178,7696639702,12/31/2018 07:00:00 PM,,2018-12-31 23:14:00,A,PROPERTY,LARCENY-THEFT,All Other Larceny,23H,S,R,R2,CLAREMONT/RAINIER VISTA,35XX BLOCK OF RAINIER AVE S,-122.292905,47.571475,
41277,2018-487186,7664183259,12/31/2018 09:14:00 PM,,2018-12-31 22:52:00,B,SOCIETY,DRIVING UNDER THE INFLUENCE,Driving Under the Influence,90D,E,E,E2,CAPITOL HILL,14TH AVE / E UNION ST,-122.314167,47.612917,
41278,2018-487085,7685572628,12/31/2018 03:00:00 PM,12/31/2018 03:30:00 PM,2018-12-31 22:09:00,A,PROPERTY,LARCENY-THEFT,Theft From Motor Vehicle,23F,E,C,C2,MONTLAKE/PORTAGE BAY,LAKE WASHINGTON BLVD E / E INTERLAKEN BLVD,-122.297248,47.630773,
41279,2018-487216,7631819895,12/31/2018 09:33:00 PM,,2018-12-31 21:33:00,A,PROPERTY,DESTRUCTION/DAMAGE/VANDALISM OF PROPERTY,Destruction/Damage/Vandalism of Property,290,N,J,J2,GREENWOOD,1XX BLOCK OF N 85TH ST,-122.357312,47.690607,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1071747,2018-336488,7663881810,09/08/2018 10:32:00 PM,,2018-09-08 22:32:00,A,SOCIETY,DRUG/NARCOTIC OFFENSES,Drug/Narcotic Violations,35A,S,S,S1,NEW HOLLY,71XX BLOCK OF HOLLY PARK DR S,-122.287444,47.539097,
1071748,2018-336488,7663881807,09/08/2018 10:32:00 PM,,2018-09-08 22:32:00,A,PERSON,ASSAULT OFFENSES,Aggravated Assault,13A,S,S,S1,NEW HOLLY,71XX BLOCK OF HOLLY PARK DR S,-122.287444,47.539097,
1080980,2018-350575,7669099329,09/19/2018 05:43:00 PM,,2018-09-19 17:43:00,A,PROPERTY,BURGLARY/BREAKING&ENTERING,Burglary/Breaking & Entering,220,S,R,R1,MID BEACON HILL,40XX BLOCK OF 14TH AVE S,-122.314717,47.568641,
1080981,2018-103499,7676453184,03/23/2018 01:38:00 PM,,2018-03-23 14:02:00,A,SOCIETY,DRUG/NARCOTIC OFFENSES,Drug/Narcotic Violations,35A,W,M,M3,DOWNTOWN COMMERCIAL,15XX BLOCK OF 3RD AVE,-122.338247,47.610246,


In [22]:
# Clean poverty_df

# Check for empty values
empty_values = poverty_df.isnull()
print(poverty_df.isnull().any()) 

# Remove columns "% Population Under 100% Poverly Line" and "% Population under 200% Poverty Line"
poverty_df = poverty_df.drop(["% of Population Under 100% Poverty Line", "% of Population Under 200% Poverty Line", 
                              "Share Below Selected % of Poverty Level"], axis=1)

poverty_df

Census Tract                               False
Community Reporting Area                    True
Geographic Area Name                       False
Name                                       False
% of Population Under 100% Poverty Line     True
% of Population Under 200% Poverty Line     True
Estimate                                   False
Geometry                                    True
Population Under 100% Poverty Level        False
Population Under 200% Poverty Level        False
Share Below Selected % of Poverty Level     True
Total Population                           False
dtype: bool


Unnamed: 0,Census Tract,Community Reporting Area,Geographic Area Name,Name,Estimate,Geometry,Population Under 100% Poverty Level,Population Under 200% Poverty Level,Total Population
0,66,Cascade/Eastlake,"Census Tract 66, King County, Washington",Total Under .50,147,Polygon,147,147,0
1,66,Cascade/Eastlake,"Census Tract 66, King County, Washington",Total 2.00 and over,3662,Polygon,0,0,0
2,66,Cascade/Eastlake,"Census Tract 66, King County, Washington",Total 1.85 to 1.99,92,Polygon,0,92,0
3,66,Cascade/Eastlake,"Census Tract 66, King County, Washington",Total 1.50 to 1.84,52,Polygon,0,52,0
4,66,Cascade/Eastlake,"Census Tract 66, King County, Washington",Total 1.25 to 1.49,114,Polygon,0,114,0
...,...,...,...,...,...,...,...,...,...
3195,57,Magnolia,"Census Tract 57, King County, Washington",Total 1.25 to 1.49,135,Polygon,0,135,0
3196,57,Magnolia,"Census Tract 57, King County, Washington",Total 1.50 to 1.84,106,Polygon,0,106,0
3197,57,Magnolia,"Census Tract 57, King County, Washington",Total 1.85 to 1.99,15,Polygon,0,15,0
3198,57,Magnolia,"Census Tract 57, King County, Washington",Total 2.00 and over,6249,Polygon,0,0,0


In [23]:
# Clean income_df

# Edit column names
#income_df.rename(columns={
#    'GEO_ID': 'Geography',
#    'NAME': 'Geographic Area Name',
#}, inplace=True)

# Remove the second row
# income_df = income_df.drop(0)


# List of columns to keep
#columns_to_keep = ['Geographic Area Name', 'column',
#                   '','',]

# Keep only the specified columns above
# income_df = income_df.filter(columns_to_keep)

income_df

Unnamed: 0,GEO_ID,NAME,S1903_C01_001E,S1903_C01_001M,S1903_C01_002E,S1903_C01_002M,S1903_C01_003E,S1903_C01_003M,S1903_C01_004E,S1903_C01_004M,...,S1903_C03_038M,S1903_C02_039E,S1903_C02_039M,S1903_C03_039E,S1903_C03_039M,S1903_C02_040E,S1903_C02_040M,S1903_C03_040E,S1903_C03_040M,Unnamed: 242
0,Geography,Geographic Area Name,Estimate!!Number!!HOUSEHOLD INCOME BY RACE AND...,Margin of Error!!Number MOE!!HOUSEHOLD INCOME ...,Estimate!!Number!!HOUSEHOLD INCOME BY RACE AND...,Margin of Error!!Number MOE!!HOUSEHOLD INCOME ...,Estimate!!Number!!HOUSEHOLD INCOME BY RACE AND...,Margin of Error!!Number MOE!!HOUSEHOLD INCOME ...,Estimate!!Number!!HOUSEHOLD INCOME BY RACE AND...,Margin of Error!!Number MOE!!HOUSEHOLD INCOME ...,...,Margin of Error!!Median income (dollars) MOE!!...,Estimate!!Percent Distribution!!NONFAMILY HOUS...,Margin of Error!!Percent Distribution MOE!!NON...,Estimate!!Median income (dollars)!!NONFAMILY H...,Margin of Error!!Median income (dollars) MOE!!...,Estimate!!Percent Distribution!!NONFAMILY HOUS...,Margin of Error!!Percent Distribution MOE!!NON...,Estimate!!Median income (dollars)!!NONFAMILY H...,Margin of Error!!Median income (dollars) MOE!!...,
1,1400000US53033000100,"Census Tract 1, King County, Washington",3899,175,2410,209,537,169,11,18,...,11598,44.3,8.9,36267,11988,9.8,4.2,79231,13657,
2,1400000US53033000200,"Census Tract 2, King County, Washington",3693,146,2792,207,242,59,49,66,...,17938,31.8,7.6,67556,18643,21.9,7.8,87647,27979,
3,1400000US53033000300,"Census Tract 3, King County, Washington",1159,68,888,86,42,27,0,12,...,30376,19.8,12.6,45625,16842,23.8,12.3,101607,90680,
4,1400000US53033000401,"Census Tract 4.01, King County, Washington",3842,158,2662,201,478,121,35,55,...,3174,28.2,6.6,30163,9505,5.3,3.5,65125,37481,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,1400000US53033032702,"Census Tract 327.02, King County, Washington",2497,144,2457,141,0,17,0,17,...,12352,35.4,14.7,69836,25713,25.5,11.9,153644,7353,
395,1400000US53033032703,"Census Tract 327.03, King County, Washington",871,63,798,71,0,12,0,12,...,20701,31.5,14.0,38036,17845,10.4,7.8,101964,98083,
396,1400000US53033032704,"Census Tract 327.04, King County, Washington",2499,226,2273,270,18,26,0,17,...,32514,33.9,14.6,15809,11636,19.7,10.5,107143,53591,
397,1400000US53033032800,"Census Tract 328, King County, Washington",983,80,954,89,0,12,6,7,...,33596,36.0,11.4,48750,17878,19.9,15.7,-,**,
