# Assessing fire risk by location in NYC

## Background
Using data provided by NYC OpenData, this notebook walks through the steps of analyzing fire risk in New York City.

## Import Libraries

In [2]:
# Data analysis and visualization
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt

# Interactive maps
import folium
from folium.plugins import HeatMap

# Machine Learning
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve, roc_auc_score, auc
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import Pipeline

## Load and describe data

Note: Data was filtered on the NYC OpenData site to only include incident classification groups that were fire-related (Structural and NonStructural Fires) prior to export.

In [3]:
alarms_data = pd.read_csv('../data/raw/In-Service_Alarm_Box_Locations.csv')
fires_data = pd.read_csv('../data/raw/Fire_Incident_Dispatch_Data.csv')

In [4]:
alarms_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13008 entries, 0 to 13007
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   BOROBOX           13008 non-null  object 
 1   BOX_TYPE          13008 non-null  object 
 2   LOCATION          13008 non-null  object 
 3   ZIP               12981 non-null  float64
 4   BOROUGH           13004 non-null  object 
 5   COMMUNITYDISTICT  12866 non-null  object 
 6   CITYCOUNCIL       13004 non-null  float64
 7   LATITUDE          13008 non-null  float64
 8   LONGITUDE         13008 non-null  float64
 9   Location Point    13008 non-null  object 
dtypes: float64(4), object(6)
memory usage: 1016.4+ KB


In [5]:
fires_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 134205 entries, 0 to 134204
Data columns (total 29 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   STARFIRE_INCIDENT_ID            134205 non-null  int64  
 1   INCIDENT_DATETIME               134205 non-null  object 
 2   ALARM_BOX_BOROUGH               134205 non-null  object 
 3   ALARM_BOX_NUMBER                134205 non-null  int64  
 4   ALARM_BOX_LOCATION              134195 non-null  object 
 5   INCIDENT_BOROUGH                134205 non-null  object 
 6   ZIPCODE                         128109 non-null  float64
 7   POLICEPRECINCT                  128114 non-null  float64
 8   CITYCOUNCILDISTRICT             128110 non-null  float64
 9   COMMUNITYDISTRICT               128112 non-null  float64
 10  COMMUNITYSCHOOLDISTRICT         128101 non-null  float64
 11  CONGRESSIONALDISTRICT           128110 non-null  float64
 12  ALARM_SOURCE_DES

In [6]:
merged = pd.merge(left=alarms_data, right=fires_data, left_on='LOCATION', right_on='ALARM_BOX_LOCATION')
merged.head()

Unnamed: 0,BOROBOX,BOX_TYPE,LOCATION,ZIP,BOROUGH,COMMUNITYDISTICT,CITYCOUNCIL,LATITUDE,LONGITUDE,Location Point,...,FIRST_ACTIVATION_DATETIME,FIRST_ON_SCENE_DATETIME,INCIDENT_CLOSE_DATETIME,VALID_DISPATCH_RSPNS_TIME_INDC,VALID_INCIDENT_RSPNS_TIME_INDC,INCIDENT_RESPONSE_SECONDS_QY,INCIDENT_TRAVEL_TM_SECONDS_QY,ENGINES_ASSIGNED_QUANTITY,LADDERS_ASSIGNED_QUANTITY,OTHER_UNITS_ASSIGNED_QUANTITY
0,B2653,ERS,3 AVE & 65 ST,11220.0,Brooklyn,BK07,38.0,40.63932,-74.023549,POINT (-74.02354939 40.63932033),...,02/06/2018 03:43:00 AM,02/06/2018 03:46:35 AM,02/06/2018 04:48:21 AM,N,Y,363,357,2,2,0
1,B2653,ERS,3 AVE & 65 ST,11220.0,Brooklyn,BK07,38.0,40.63932,-74.023549,POINT (-74.02354939 40.63932033),...,07/13/2018 01:56:41 PM,07/13/2018 01:59:36 PM,07/13/2018 02:09:18 PM,N,Y,227,194,3,2,1
2,B2653,ERS,3 AVE & 65 ST,11220.0,Brooklyn,BK07,38.0,40.63932,-74.023549,POINT (-74.02354939 40.63932033),...,08/06/2018 06:33:22 AM,08/06/2018 06:35:39 AM,08/06/2018 06:54:41 AM,N,Y,191,155,3,2,1
3,B2653,ERS,3 AVE & 65 ST,11220.0,Brooklyn,BK07,38.0,40.63932,-74.023549,POINT (-74.02354939 40.63932033),...,09/13/2018 06:28:56 PM,09/13/2018 06:30:12 PM,09/13/2018 07:41:08 PM,N,Y,160,95,4,2,1
4,B2653,ERS,3 AVE & 65 ST,11220.0,Brooklyn,BK07,38.0,40.63932,-74.023549,POINT (-74.02354939 40.63932033),...,09/16/2018 09:52:58 AM,09/16/2018 09:54:42 AM,09/16/2018 10:16:05 AM,N,Y,171,121,3,2,1


In [7]:
# Remove unnecessary columns
data = merged[['STARFIRE_INCIDENT_ID',
               'INCIDENT_DATETIME',
               'ALARM_BOX_BOROUGH',
               'ALARM_BOX_NUMBER',
               'ALARM_BOX_LOCATION',
               'LATITUDE',
               'LONGITUDE',
               'INCIDENT_BOROUGH',
               'ZIPCODE',
               'INCIDENT_CLASSIFICATION',
               'INCIDENT_CLASSIFICATION_GROUP',
               'DISPATCH_RESPONSE_SECONDS_QY',
               'INCIDENT_RESPONSE_SECONDS_QY',
               'INCIDENT_TRAVEL_TM_SECONDS_QY',
               'ENGINES_ASSIGNED_QUANTITY',
               'LADDERS_ASSIGNED_QUANTITY',
               'OTHER_UNITS_ASSIGNED_QUANTITY',]]
data.head()

Unnamed: 0,STARFIRE_INCIDENT_ID,INCIDENT_DATETIME,ALARM_BOX_BOROUGH,ALARM_BOX_NUMBER,ALARM_BOX_LOCATION,LATITUDE,LONGITUDE,INCIDENT_BOROUGH,ZIPCODE,INCIDENT_CLASSIFICATION,INCIDENT_CLASSIFICATION_GROUP,DISPATCH_RESPONSE_SECONDS_QY,INCIDENT_RESPONSE_SECONDS_QY,INCIDENT_TRAVEL_TM_SECONDS_QY,ENGINES_ASSIGNED_QUANTITY,LADDERS_ASSIGNED_QUANTITY,OTHER_UNITS_ASSIGNED_QUANTITY
0,1803726530140110,02/06/2018 03:40:32 AM,BROOKLYN,2653,3 AVE & 65 ST,40.63932,-74.023549,BROOKLYN,11220.0,Automobile Fire,NonStructural Fires,6,363,357,2,2,0
1,1819426530140570,07/13/2018 01:55:49 PM,BROOKLYN,2653,3 AVE & 65 ST,40.63932,-74.023549,BROOKLYN,11220.0,Multiple Dwelling 'A' - Other fire,Structural Fires,33,227,194,3,2,1
2,1821826530140150,08/06/2018 06:32:28 AM,BROOKLYN,2653,3 AVE & 65 ST,40.63932,-74.023549,BROOKLYN,11220.0,Demolition Debris or Rubbish Fire,NonStructural Fires,36,191,155,3,2,1
3,1825626530241090,09/13/2018 06:27:32 PM,BROOKLYN,2653,3 AVE & 65 ST,40.63932,-74.023549,BROOKLYN,11220.0,Multiple Dwelling 'A' - Other fire,Structural Fires,65,160,95,4,2,1
4,1825926530240410,09/16/2018 09:51:51 AM,BROOKLYN,2653,3 AVE & 65 ST,40.63932,-74.023549,BROOKLYN,11220.0,Multiple Dwelling 'A' - Food on the stove fire,Structural Fires,50,171,121,3,2,1


In [8]:
# Export cleaned data to csv
data.to_csv('../data/processed/cleaned_fire_dispatch_data.csv', index=False)

## Mapping alarm boxes

In [None]:
# # Visualizing fire incidents in a map
# fire_map = folium.Map(location=[40.73, -73.94], zoom_start=14, tiles='Stamen Terrain')
# heat = building_fires[['LATITUDE', 'LONGITUDE']]
# heat = heat.dropna(axis=0, subset=['LATITUDE', 'LONGITUDE'])
# heat_data = [[row['LATITUDE'], row['LONGITUDE']]for index, row in heat.iterrows()]
# HeatMap(heat_data).add_to(fire_map)
# fire_map

## Data Wrangling

In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 62111 entries, 0 to 62110
Data columns (total 17 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   STARFIRE_INCIDENT_ID           62111 non-null  int64  
 1   INCIDENT_DATETIME              62111 non-null  object 
 2   ALARM_BOX_BOROUGH              62111 non-null  object 
 3   ALARM_BOX_NUMBER               62111 non-null  int64  
 4   ALARM_BOX_LOCATION             62111 non-null  object 
 5   LATITUDE                       62111 non-null  float64
 6   LONGITUDE                      62111 non-null  float64
 7   INCIDENT_BOROUGH               62111 non-null  object 
 8   ZIPCODE                        61221 non-null  float64
 9   INCIDENT_CLASSIFICATION        62111 non-null  object 
 10  INCIDENT_CLASSIFICATION_GROUP  62111 non-null  object 
 11  DISPATCH_RESPONSE_SECONDS_QY   62111 non-null  int64  
 12  INCIDENT_RESPONSE_SECONDS_QY   62111 non-null 

In [12]:
# Convert INCIDENT_DATETIME column to datetime
data['INCIDENT_DATETIME'] = data['INCIDENT_DATETIME'].apply(lambda x: dt.datetime.strptime(x,'%m/%d/%Y %I:%M:%S %p'))
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 62111 entries, 0 to 62110
Data columns (total 17 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   STARFIRE_INCIDENT_ID           62111 non-null  int64         
 1   INCIDENT_DATETIME              62111 non-null  datetime64[ns]
 2   ALARM_BOX_BOROUGH              62111 non-null  object        
 3   ALARM_BOX_NUMBER               62111 non-null  int64         
 4   ALARM_BOX_LOCATION             62111 non-null  object        
 5   LATITUDE                       62111 non-null  float64       
 6   LONGITUDE                      62111 non-null  float64       
 7   INCIDENT_BOROUGH               62111 non-null  object        
 8   ZIPCODE                        61221 non-null  float64       
 9   INCIDENT_CLASSIFICATION        62111 non-null  object        
 10  INCIDENT_CLASSIFICATION_GROUP  62111 non-null  object        
 11  DISPATCH_RESPON

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [13]:
# Add a column that splits off the year
data['YEAR'] = data['INCIDENT_DATETIME'].dt.year

# Move that column to the beginning of the frame
year = data['YEAR']
data.drop(labels=['YEAR'], axis=1, inplace=True)
data.insert(0,'YEAR', year)
data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,YEAR,STARFIRE_INCIDENT_ID,INCIDENT_DATETIME,ALARM_BOX_BOROUGH,ALARM_BOX_NUMBER,ALARM_BOX_LOCATION,LATITUDE,LONGITUDE,INCIDENT_BOROUGH,ZIPCODE,INCIDENT_CLASSIFICATION,INCIDENT_CLASSIFICATION_GROUP,DISPATCH_RESPONSE_SECONDS_QY,INCIDENT_RESPONSE_SECONDS_QY,INCIDENT_TRAVEL_TM_SECONDS_QY,ENGINES_ASSIGNED_QUANTITY,LADDERS_ASSIGNED_QUANTITY,OTHER_UNITS_ASSIGNED_QUANTITY
0,2018,1803726530140110,2018-02-06 03:40:32,BROOKLYN,2653,3 AVE & 65 ST,40.63932,-74.023549,BROOKLYN,11220.0,Automobile Fire,NonStructural Fires,6,363,357,2,2,0
1,2018,1819426530140570,2018-07-13 13:55:49,BROOKLYN,2653,3 AVE & 65 ST,40.63932,-74.023549,BROOKLYN,11220.0,Multiple Dwelling 'A' - Other fire,Structural Fires,33,227,194,3,2,1
2,2018,1821826530140150,2018-08-06 06:32:28,BROOKLYN,2653,3 AVE & 65 ST,40.63932,-74.023549,BROOKLYN,11220.0,Demolition Debris or Rubbish Fire,NonStructural Fires,36,191,155,3,2,1
3,2018,1825626530241090,2018-09-13 18:27:32,BROOKLYN,2653,3 AVE & 65 ST,40.63932,-74.023549,BROOKLYN,11220.0,Multiple Dwelling 'A' - Other fire,Structural Fires,65,160,95,4,2,1
4,2018,1825926530240410,2018-09-16 09:51:51,BROOKLYN,2653,3 AVE & 65 ST,40.63932,-74.023549,BROOKLYN,11220.0,Multiple Dwelling 'A' - Food on the stove fire,Structural Fires,50,171,121,3,2,1


In [14]:
# Split out the 2020 & 2021 data
data20_21 = data.loc[(data.YEAR == 2020) | (data.YEAR == 2021)]
data20_21

Unnamed: 0,YEAR,STARFIRE_INCIDENT_ID,INCIDENT_DATETIME,ALARM_BOX_BOROUGH,ALARM_BOX_NUMBER,ALARM_BOX_LOCATION,LATITUDE,LONGITUDE,INCIDENT_BOROUGH,ZIPCODE,INCIDENT_CLASSIFICATION,INCIDENT_CLASSIFICATION_GROUP,DISPATCH_RESPONSE_SECONDS_QY,INCIDENT_RESPONSE_SECONDS_QY,INCIDENT_TRAVEL_TM_SECONDS_QY,ENGINES_ASSIGNED_QUANTITY,LADDERS_ASSIGNED_QUANTITY,OTHER_UNITS_ASSIGNED_QUANTITY
10,2020,2000626530140006,2020-01-06 00:13:36,BROOKLYN,2653,3 AVE & 65 ST,40.639320,-74.023549,BROOKLYN,11220.0,Automobile Fire,NonStructural Fires,39,170,131,1,1,0
11,2020,2001626530140827,2020-01-16 17:18:31,BROOKLYN,2653,3 AVE & 65 ST,40.639320,-74.023549,BROOKLYN,11220.0,Multiple Dwelling 'A' - Food on the stove fire,Structural Fires,28,179,151,3,2,1
12,2020,2016226530140729,2020-06-10 17:31:54,BROOKLYN,2653,3 AVE & 65 ST,40.639320,-74.023549,BROOKLYN,11220.0,Multiple Dwelling 'A' - Food on the stove fire,Structural Fires,42,197,155,3,2,1
13,2020,2025726530140895,2020-09-13 18:22:48,BROOKLYN,2653,3 AVE & 65 ST,40.639320,-74.023549,BROOKLYN,11220.0,Demolition Debris or Rubbish Fire,NonStructural Fires,42,174,132,3,2,1
14,2020,2033426530140121,2020-11-29 03:54:56,BROOKLYN,2653,3 AVE & 65 ST,40.639320,-74.023549,BROOKLYN,11220.0,Multiple Dwelling 'A' - Other fire,Structural Fires,15,140,125,3,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62103,2021,2105240000000000,2021-02-21 15:15:00,BROOKLYN,3874,AVENUE T & W 13 ST,40.598038,-73.984494,BROOKLYN,11223.0,Manhole Fire - Other,NonStructural Fires,32,382,350,3,2,1
62107,2020,2000347130150135,2020-01-03 04:53:38,QUEENS,4713,MERRICK BLVD & BAISLEY BLVD,40.683778,-73.769742,QUEENS,11434.0,Private Dwelling Fire,Structural Fires,22,236,214,3,3,1
62108,2020,2008047130150389,2020-03-20 17:08:53,QUEENS,4713,MERRICK BLVD & BAISLEY BLVD,40.683778,-73.769742,QUEENS,11434.0,Automobile Fire,NonStructural Fires,24,229,205,1,1,0
62109,2020,2020147130150874,2020-07-19 23:05:15,QUEENS,4713,MERRICK BLVD & BAISLEY BLVD,40.683778,-73.769742,QUEENS,11434.0,Private Dwelling Fire,Structural Fires,34,234,200,3,2,1


In [15]:
data18_19 = data.loc[(data.YEAR == 2018)|(data.YEAR == 2019)]
data18_19

Unnamed: 0,YEAR,STARFIRE_INCIDENT_ID,INCIDENT_DATETIME,ALARM_BOX_BOROUGH,ALARM_BOX_NUMBER,ALARM_BOX_LOCATION,LATITUDE,LONGITUDE,INCIDENT_BOROUGH,ZIPCODE,INCIDENT_CLASSIFICATION,INCIDENT_CLASSIFICATION_GROUP,DISPATCH_RESPONSE_SECONDS_QY,INCIDENT_RESPONSE_SECONDS_QY,INCIDENT_TRAVEL_TM_SECONDS_QY,ENGINES_ASSIGNED_QUANTITY,LADDERS_ASSIGNED_QUANTITY,OTHER_UNITS_ASSIGNED_QUANTITY
0,2018,1803726530140110,2018-02-06 03:40:32,BROOKLYN,2653,3 AVE & 65 ST,40.639320,-74.023549,BROOKLYN,11220.0,Automobile Fire,NonStructural Fires,6,363,357,2,2,0
1,2018,1819426530140570,2018-07-13 13:55:49,BROOKLYN,2653,3 AVE & 65 ST,40.639320,-74.023549,BROOKLYN,11220.0,Multiple Dwelling 'A' - Other fire,Structural Fires,33,227,194,3,2,1
2,2018,1821826530140150,2018-08-06 06:32:28,BROOKLYN,2653,3 AVE & 65 ST,40.639320,-74.023549,BROOKLYN,11220.0,Demolition Debris or Rubbish Fire,NonStructural Fires,36,191,155,3,2,1
3,2018,1825626530241090,2018-09-13 18:27:32,BROOKLYN,2653,3 AVE & 65 ST,40.639320,-74.023549,BROOKLYN,11220.0,Multiple Dwelling 'A' - Other fire,Structural Fires,65,160,95,4,2,1
4,2018,1825926530240410,2018-09-16 09:51:51,BROOKLYN,2653,3 AVE & 65 ST,40.639320,-74.023549,BROOKLYN,11220.0,Multiple Dwelling 'A' - Food on the stove fire,Structural Fires,50,171,121,3,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62098,2019,1933638740140327,2019-12-02 09:19:19,BROOKLYN,3874,AVENUE T & W 13 ST,40.598038,-73.984494,BROOKLYN,11223.0,Manhole Fire - Seeping Smoke,NonStructural Fires,40,415,375,3,2,1
62099,2019,1934638740141089,2019-12-12 21:37:32,BROOKLYN,3874,AVENUE T & W 13 ST,40.598038,-73.984494,BROOKLYN,11223.0,Automobile Fire,NonStructural Fires,21,382,361,1,1,0
62104,2018,1824547130150240,2018-09-02 09:37:33,QUEENS,4713,MERRICK BLVD & BAISLEY BLVD,40.683778,-73.769742,QUEENS,11434.0,Store Fire,Structural Fires,30,252,222,4,2,1
62105,2019,1916447130150628,2019-06-13 19:02:10,QUEENS,4713,MERRICK BLVD & BAISLEY BLVD,40.683778,-73.769742,QUEENS,11434.0,Demolition Debris or Rubbish Fire,NonStructural Fires,32,262,230,1,1,0


In [None]:
# Calculate all fire incidents for 2018-2019
data18_19.STARFIRE_INCIDENT_ID.value_counts()