# Assessing fire risk by location in NYC

## Background
Using data provided by NYC OpenData, this notebook walks through the steps of analyzing fire risk in New York City.

## Import Libraries

In [None]:
# Data analysis and visualization
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt

# Interactive maps
import folium
from folium.plugins import HeatMap

# Machine Learning
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve, roc_auc_score, auc
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import Pipeline

## Load and describe data

Note: Data was filtered on the NYC OpenData site to only include incident classification groups that were fire-related (Structural and NonStructural Fires) prior to export.

In [3]:
# Connection to azure database 
# import pandas as pd, pyodbc
# server = 'finalprojectdata.database.windows.net'
# database = 'v2-project-data'
# username = 'finalproject1_pmprybylski'
# password = 'firedispatch1!'
# driver= '{ODBC Driver 17 for SQL Server}'
# # con_string = 'DRIVER='+driver+';SERVER='+server+';PORT=1433;DATABASE='+database+';UID='+username+';PWD='+ password
# # con_string = 'DRIVER={SQL Server};SERVER='+ <server> +';DATABASE=' + <database>
# # cnxn = pyodbc.connect(con_string)
# cnxn = pyodbc.connect(
#     'DRIVER={ODBC Driver 17 for SQL Server};'
#     'SERVER=finalprojectdata.database.windows.net;'
#     'PORT=1433;'
#     'DATABASE=v2-project-data;'
#     'UID=finalproject1_pmprybylski;'
#     'PWD=firedispatch1!;'
# )
# query = """
# SELECT TOP 3 * FROM cleaned_fire_dispatch_data
# """
# result_port_map = pd.read_sql(query, cnxn)
# result_port_map

Unnamed: 0,STARFIRE_INCIDENT_ID,INCIDENT_DATETIME,ALARM_BOX_BOROUGH,ALARM_BOX_NUMBER,ALARM_BOX_LOCATION,LATITUDE,LONGITUDE,INCIDENT_BOROUGH,ZIPCODE,INCIDENT_CLASSIFICATION,...,DISPATCH_RESPONSE_SECONDS_QY,INCIDENT_RESPONSE_SECONDS_QY,INCIDENT_TRAVEL_TM_SECONDS_QY,ENGINES_ASSIGNED_QUANTITY,LADDERS_ASSIGNED_QUANTITY,OTHER_UNITS_ASSIGNED_QUANTITY,Column 17,Column 18,Column 19,Column 20
0,1803730000000000.0,2018-02-06 03:40:00,BROOKLYN,2653,3 AVE & 65 ST,40.63932,-74.023549,BROOKLYN,11220,Automobile Fire,...,6,363,357,2,2,0,,,,1899-12-30
1,1819430000000000.0,2018-07-13 13:55:00,BROOKLYN,2653,3 AVE & 65 ST,40.63932,-74.023549,BROOKLYN,11220,Multiple Dwelling 'A' - Other fire,...,33,227,194,3,2,1,,,,1899-12-30
2,1821830000000000.0,2018-08-06 06:32:00,BROOKLYN,2653,3 AVE & 65 ST,40.63932,-74.023549,BROOKLYN,11220,Demolition Debris or Rubbish Fire,...,36,191,155,3,2,1,,,,1899-12-30


In [None]:
# Load the data into Python

# Fire Dispatch
alarms_df = pd.read_csv('../data/raw/In-Service_Alarm_Box_Locations.csv')
fires_df = pd.read_csv('../data/raw/Fire_Incident_Dispatch_Data.csv')

# NYPD Complaints
nypd1819_df = pd.read_csv('../data/raw/2018_19_crime_data.csv')
nypd2021_df = pd.read_csv('../data/raw/2020_21_crime_data.csv')

# DOB/ECB Violations
DOB18_df = pd.

In [None]:
merged = pd.merge(left=alarms_data, right=fires_data, left_on='LOCATION', right_on='ALARM_BOX_LOCATION')
merged.head()

In [None]:
# Remove unnecessary columns
data = merged[['STARFIRE_INCIDENT_ID',
               'INCIDENT_DATETIME',
               'ALARM_BOX_BOROUGH',
               'ALARM_BOX_NUMBER',
               'ALARM_BOX_LOCATION',
               'LATITUDE',
               'LONGITUDE',
               'INCIDENT_BOROUGH',
               'ZIPCODE',
               'INCIDENT_CLASSIFICATION',
               'INCIDENT_CLASSIFICATION_GROUP',
               'DISPATCH_RESPONSE_SECONDS_QY',
               'INCIDENT_RESPONSE_SECONDS_QY',
               'INCIDENT_TRAVEL_TM_SECONDS_QY',
               'ENGINES_ASSIGNED_QUANTITY',
               'LADDERS_ASSIGNED_QUANTITY',
               'OTHER_UNITS_ASSIGNED_QUANTITY',]]
data.head()

In [None]:
# Export cleaned data to csv
data.to_csv('../data/processed/cleaned_fire_dispatch_data.csv', index=False)

## Mapping alarm boxes

In [None]:
# # Visualizing fire incidents in a map
# fire_map = folium.Map(location=[40.73, -73.94], zoom_start=14, tiles='Stamen Terrain')
# heat = building_fires[['LATITUDE', 'LONGITUDE']]
# heat = heat.dropna(axis=0, subset=['LATITUDE', 'LONGITUDE'])
# heat_data = [[row['LATITUDE'], row['LONGITUDE']]for index, row in heat.iterrows()]
# HeatMap(heat_data).add_to(fire_map)
# fire_map

## Data Wrangling

In [None]:
data.info()

In [None]:
# Convert INCIDENT_DATETIME column to datetime
data['INCIDENT_DATETIME'] = data['INCIDENT_DATETIME'].apply(lambda x: dt.datetime.strptime(x,'%m/%d/%Y %I:%M:%S %p'))
data.info()

In [None]:
# Add a column that splits off the year
data['YEAR'] = data['INCIDENT_DATETIME'].dt.year

# Move that column to the beginning of the frame
year = data['YEAR']
data.drop(labels=['YEAR'], axis=1, inplace=True)
data.insert(0,'YEAR', year)
data.head()

In [None]:
# Split out the 2020 & 2021 data
data20_21 = data.loc[(data.YEAR == 2020) | (data.YEAR == 2021)]
data20_21

In [None]:
data18_19 = data.loc[(data.YEAR == 2018)|(data.YEAR == 2019)]
data18_19

In [None]:
# Calculate all fire incidents for 2018-2019
data18_19.STARFIRE_INCIDENT_ID.value_counts()