# DataKind Red Cross Project Phase 2
## Home Fire Risk Data Model
10/28/2019
Tasks:
1. <b>HFC Home Visit Area Profiles</b>: Use SVI data to identify common demographic and economic themes for Census Tract areas where alarms were installed. Document methodology and results
2. <b>Home Fire Area Profiles</b>: Use SVI data to identify common demographic and economic themes for Census Tract areas reporting fires. Document methodolgy and results
3. <b>Lives Saved Area Profiles</b>: Use SVI data to identify common themes for Lives Saved Locations. Document methodology and results

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
pd.set_option('display.max_columns',500)
sns.set()

In [2]:
alarms_path = '../data/01_raw/HomeFire_SmokeAlarmInstalls_NEW.csv'
alarms_top = pd.read_csv(alarms_path,
                        nrows=100)

In [3]:
col_dtypes = {'Zip':str,'FIPS':str}
alarms = pd.read_csv(alarms_path,
                     dtype=col_dtypes,
                     index_col = 0)

In [4]:
alarms['FIPS'] = (alarms['FIPS'].str[:-2]
                  .str.zfill(12))

In [5]:
alarms['FIPS'].str.len().value_counts(dropna=False)

12.0    496982
NaN       4420
Name: FIPS, dtype: int64

In [29]:
alarms['In-Home Visit Date'] = pd.to_datetime(alarms['In-Home Visit Date'])

In [32]:
alarms['pre-existing_alarm'] = np.where(alarms['Pre-Existing Alarms'] > 0, 'Yes','No')
alarms['working_pre-existing_alarm'] = np.where(alarms['Pre-Existing Alarms Tested and Working'] > 0, 'Yes','No')

In [31]:
alarms['pre-existing_alarm'].value_counts(dropna=False)

Yes    252455
No     248947
Name: pre-existing_alarm, dtype: int64

In [33]:
alarms['working_pre-existing_alarm'].value_counts(dropna=False)

No     354770
Yes    146632
Name: working_pre-existing_alarm, dtype: int64

# Exploratory Data Analysis

In [22]:
alarms.describe().round(1)

Unnamed: 0,Census Block Group Y,Census Block Group X,Smoke Alarms Installed (10-Year/9-Volt/DHH),10-Year and 9-Volt Alarms Installed,Deaf and Hard of Hearing Alarms Installed,Pre-Existing Alarms,Pre-Existing Alarms Tested and Working,Batteries Replaced,Fire Escape Plans Made,Fire Safety Checklists Completed,Additional Hazard Education Conducted,People Served,Youth Served,Seniors Served,"Veterans, Military Members and Military Family Members Served","Individuals with Disabilities, Access, or Functional Needs Served"
count,496982.0,496982.0,501402.0,501402.0,501402.0,501297.0,501297.0,501402.0,501402.0,501402.0,501402.0,501402.0,501290.0,501295.0,501263.0,501279.0
mean,37.6,-88.7,2.4,2.4,0.0,1.1,0.6,0.1,0.8,0.8,0.5,2.7,0.5,0.4,0.1,0.2
std,5.4,14.7,1.5,1.5,0.2,1.4,1.1,0.6,0.4,0.4,0.5,1.7,1.0,0.7,0.3,0.5
min,18.0,-166.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
25%,34.0,-94.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
50%,38.8,-84.5,2.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0
75%,40.9,-79.9,3.0,3.0,0.0,2.0,1.0,0.0,1.0,1.0,1.0,4.0,0.0,1.0,0.0,0.0
max,67.2,-65.6,14.0,14.0,6.0,9.0,9.0,9.0,1.0,1.0,1.0,20.0,14.0,15.0,14.0,15.0


In [23]:
alarms.describe(include='O')

Unnamed: 0,Zip,Division,Region,Chapter,FIPS,City,State,County,In-Home Visit Date,Additional Hazard Type
count,501402,501402,501402,501402,496982,501402,501402,501402,501402,501402.0
unique,16011,7,60,267,75138,9597,56,1645,1366,8.0
top,60901,Crossroads Division,Greater New York Region,ARC of Greater New York,340130003002,BROOKLYN,NY,Kings,10/15/2016,
freq,2997,101418,31429,27632,688,9168,41531,9337,4264,231625.0


In [27]:
alarms.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 501402 entries, 0 to 501401
Columns: 26 entries, Zip to Individuals with Disabilities, Access, or Functional Needs Served
dtypes: float64(8), int64(8), object(10)
memory usage: 103.3+ MB


In [36]:
alarms.head()

Unnamed: 0,Zip,Division,Region,Chapter,FIPS,Census Block Group Y,Census Block Group X,City,State,County,In-Home Visit Date,Smoke Alarms Installed (10-Year/9-Volt/DHH),10-Year and 9-Volt Alarms Installed,Deaf and Hard of Hearing Alarms Installed,Pre-Existing Alarms,Pre-Existing Alarms Tested and Working,Batteries Replaced,Fire Escape Plans Made,Fire Safety Checklists Completed,Additional Hazard Education Conducted,Additional Hazard Type,People Served,Youth Served,Seniors Served,"Veterans, Military Members and Military Family Members Served","Individuals with Disabilities, Access, or Functional Needs Served",pre-existing_alarm,working_pre-existing_alarm
0,84003,Southwest and Rocky Mountain Division,Utah and Nevada Region,ARC Central and Southern UT,490490107003,40.387018,-111.788729,AMERICAN FORK,UT,Utah,2017-08-19,3,3,0,0.0,0.0,1,1,1,0,,2,0.0,0.0,0.0,0.0,No,No
1,84003,Southwest and Rocky Mountain Division,Utah and Nevada Region,ARC Central and Southern UT,490490107003,40.387018,-111.788729,AMERICAN FORK,UT,Utah,2017-08-12,3,3,0,0.0,0.0,0,1,1,0,,6,0.0,0.0,0.0,0.0,No,No
2,84003,Southwest and Rocky Mountain Division,Utah and Nevada Region,ARC Central and Southern UT,490490107003,40.387018,-111.788729,AMERICAN FORK,UT,Utah,2017-08-19,3,3,0,0.0,0.0,0,1,1,0,,2,0.0,0.0,0.0,0.0,No,No
3,84003,Southwest and Rocky Mountain Division,Utah and Nevada Region,ARC Central and Southern UT,490490107001,40.399519,-111.783987,HIGHLAND,UT,Utah,2015-09-19,3,3,0,0.0,0.0,0,1,1,1,Earthquake,4,0.0,0.0,0.0,0.0,No,No
4,84003,Southwest and Rocky Mountain Division,Utah and Nevada Region,ARC Central and Southern UT,490490107001,40.399519,-111.783987,AMERICAN FORK,UT,Utah,2017-06-03,3,3,0,0.0,0.0,0,1,1,0,,1,0.0,0.0,0.0,0.0,No,No


## Aggregate by census tract

In [37]:
tracts = pd.crosstab(alarms['FIPS'],alarms['pre-existing_alarm'])

In [39]:
tracts.sort_values('Yes',ascending=False).head()

pre-existing_alarm,No,Yes
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1
120559606025,62,285
121170213152,39,248
361031584102,62,233
360610236004,79,231
90093481252,42,216
