In [213]:
import numpy as np
import pandas as pd
import random
import statsmodels.api as sm
import scipy.stats
import warnings
warnings.filterwarnings(action='ignore')

In [131]:
data = pd.read_csv("Incidents_Responded_to_by_Fire_Companies.csv", na_values='nan')
data.head()

Unnamed: 0,IM_INCIDENT_KEY,FIRE_BOX,INCIDENT_TYPE_DESC,INCIDENT_DATE_TIME,ARRIVAL_DATE_TIME,UNITS_ONSCENE,LAST_UNIT_CLEARED_DATE_TIME,HIGHEST_LEVEL_DESC,TOTAL_INCIDENT_DURATION,ACTION_TAKEN1_DESC,...,ZIP_CODE,BOROUGH_DESC,FLOOR,CO_DETECTOR_PRESENT_DESC,FIRE_ORIGIN_BELOW_GRADE_FLAG,STORY_FIRE_ORIGIN_COUNT,FIRE_SPREAD_DESC,DETECTOR_PRESENCE_DESC,AES_PRESENCE_DESC,STANDPIPE_SYS_PRESENT_FLAG
0,55672688,2147,"300 - Rescue, EMS incident, other",01/01/2013 12:00:20 AM,01/01/2013 12:14:23 AM,1.0,01/01/2013 12:20:06 AM,"1 - More than initial alarm, less than Signal 7-5",1186.0,"00 - Action taken, other",...,10454,2 - Bronx,,,,,,,,
1,55672692,818,735A - Unwarranted alarm/defective condition o...,01/01/2013 12:00:37 AM,01/01/2013 12:09:03 AM,3.0,01/01/2013 12:30:06 AM,"1 - More than initial alarm, less than Signal 7-5",1769.0,86 - Investigate,...,10036,1 - Manhattan,,,,,,,,
2,55672693,9656,"300 - Rescue, EMS incident, other",01/01/2013 12:01:17 AM,01/01/2013 12:04:55 AM,1.0,01/01/2013 12:15:18 AM,"1 - More than initial alarm, less than Signal 7-5",841.0,"00 - Action taken, other",...,11418,5 - Queens,,,,,,,,
3,55672695,7412,412 - Gas leak (natural gas or LPG),01/01/2013 12:02:32 AM,01/01/2013 12:07:48 AM,4.0,01/01/2013 12:40:11 AM,"1 - More than initial alarm, less than Signal 7-5",2259.0,44 - Hazardous materials leak control & contai...,...,11103,5 - Queens,1.0,,,,,,,
4,55672697,4019,735A - Unwarranted alarm/defective condition o...,01/01/2013 12:01:49 AM,01/01/2013 12:06:27 AM,6.0,01/01/2013 12:24:56 AM,"1 - More than initial alarm, less than Signal 7-5",1387.0,86 - Investigate,...,11385,5 - Queens,,,,,,,,


In [9]:
# most common type
type_count = data['INCIDENT_TYPE_DESC'].value_counts()
type_count[0] / sum(type_count)

0.3614828304238471

In [12]:
# ratio of average units on scene
avg_onscene = data.groupby('INCIDENT_TYPE_DESC').mean()['UNITS_ONSCENE']
avg_onscene['111 - Building fire'] / avg_onscene['651 - Smoke scare, odor of smoke']

2.7597595139775324

In [16]:
# ratio of false call rate
fc_count = data[data['INCIDENT_TYPE_DESC']=='710 - Malicious, mischievous false call, other'].groupby('BOROUGH_DESC').size()
borough_count = data.groupby('BOROUGH_DESC').size()
fc_count = fc_count / borough_count
fc_count['3 - Staten Island'] / fc_count['1 - Manhattan']

1.6243819471953407

In [155]:
# third quantitle of minutes arrival
data['INCIDENT_DATE_TIME'] = pd.to_datetime(data.loc[:, 'INCIDENT_DATE_TIME'], format="%m/%d/%Y %I:%M:%S %p")
data['ARRIVAL_DATE_TIME'] = pd.to_datetime(data.loc[:, 'ARRIVAL_DATE_TIME'], format="%m/%d/%Y %I:%M:%S %p")
time_df = data[data['INCIDENT_TYPE_DESC']=='111 - Building fire']['ARRIVAL_DATE_TIME'] - data[data['INCIDENT_TYPE_DESC']=='111 - Building fire']['INCIDENT_DATE_TIME']
time_df.quantile(0.75).seconds / 60

4.15

In [81]:
hour_func = lambda df: df.groupby(df['INCIDENT_DATE_TIME'].dt.hour).size()
cook_df = hour_func(data[data['INCIDENT_TYPE_DESC'] == '113 - Cooking fire, confined to container']) / hour_func(data)
cook_df.max()

0.050846824501505385

In [199]:
# residents and incidents correlation
census_df = pd.read_csv("2010+Census+Population+By+Zipcode+(ZCTA).csv", index_col=0)
data['ZIP_CODE'] = pd.to_numeric(data['ZIP_CODE'].fillna(0).apply(lambda x: str(x).split('-')[0])).astype('int64')
incident_df = data[data['INCIDENT_TYPE_DESC'] == '111 - Building fire'].groupby('ZIP_CODE').size()
join_df = pd.merge(pd.DataFrame(incident_df), census_df, how='inner', left_index=True, right_index=True)
res = sm.OLS(join_df.iloc[:, 0], join_df.iloc[:, 1]).fit()
res.rsquared

0.8834755463590017

In [208]:
# proportion of co detector given incident duration
co_df = data.dropna(subset=['CO_DETECTOR_PRESENT_DESC']).copy()
co_df['TOTAL_INCIDENT_DURATION'] = co_df['TOTAL_INCIDENT_DURATION'] / 60
bin_group = pd.cut(co_df['TOTAL_INCIDENT_DURATION'], bins=range(20, 80, 10), include_lowest=True)
co_count = co_df.groupby([bin_group, co_df['CO_DETECTOR_PRESENT_DESC']]).size()
co_count = co_count.unstack()
co_count = co_count['No'] / co_count['Yes']
X = sm.add_constant(np.arange(25, 75, 10))
co_res = sm.OLS(co_count.values, X).fit()
co_res.params[0] + co_res.params[1] * 39

0.33083042838924376

In [214]:
# chi-square test
co_df['GREATER_THAN_60'] = co_df['TOTAL_INCIDENT_DURATION'].apply(lambda x: 1 if x>=60 else 0)
chi_count = co_df.groupby([co_df['GREATER_THAN_60'], co_df['CO_DETECTOR_PRESENT_DESC']]).size()
chi_count = chi_count.unstack()
chi2, p, ddof, expected = scipy.stats.chi2_contingency(chi_count)
chi2

1169.1529727007567