# Assessing fire risk factors in NYC

## Background
Using data provided by NYC OpenData, this notebook walks through the steps of analyzing fire-related incidents and some possible contributing factors in New York City.

## Import Libraries

In [1]:
# Data analysis and visualization
import numpy as np
import pandas as pd
import datetime as dt


## Load and describe data

Note: Data was filtered on the NYC OpenData site to only include incident classification groups that were fire-related (Structural and NonStructural Fires) prior to export.

In [3]:
# Load the data into Python

# NYPD Complaints
nypd_df = pd.read_csv('../static/data/raw/NYPD_Complaint_18-21.csv')

# Housing Maintenance Code Violations
codev_df = pd.read_csv('../static/data/raw/Housing_Maintenance_Code_Violations_18-21.csv')

# Orders to repair/vacate
vacate_df = pd.read_csv('../static/data/raw/Order_to_Repair_Vacate_18-21.csv')

FileNotFoundError: [Errno 2] File ../data/raw/In-Service_Alarm_Box_Locations.csv does not exist: '../data/raw/In-Service_Alarm_Box_Locations.csv'

## ELT

### NYPD Complaints

In [None]:
nypd_df

In [None]:
# Rename columns for easier useage
nypd_df.rename(columns={
    'CMPLNT_FR_DT':'DATE',
    'BORO_NM':'BOROUGH',
    'CMPLNT_NUM':'NYPD_COMPLAINT_COUNT'
}, inplace=True)
nypd_df.head()

In [None]:
nypd_df.info()

In [None]:
# Convert COMPLAINT_DATE column to datetime
nypd_df['DATE'] = nypd_df['DATE'].apply(lambda x: dt.datetime.strptime(x,'%m/%d/%Y %I:%M:%S %p'))
nypd_df.info()

In [None]:
# Add a column that splits off the year
nypd_df['YEAR'] = nypd_df['DATE'].dt.year

# Move that column to the beginning of the frame
year = nypd_df['YEAR']
nypd_df.drop(labels=['YEAR'], axis=1, inplace=True)
nypd_df.insert(0,'YEAR', year)
nypd_df.head()

### Housing Maintenance Code Violations

In [None]:
codev_df

In [None]:
# Rename columns
codev_df.rename(columns={
    'NOVIssuedDate': 'DATE',
    'Borough': 'BOROUGH',    
    'ViolationID': 'CODE_VIOLATION_COUNT'
}, inplace=True)
codev_df.head()

In [None]:
codev_df.info()

In [None]:
# Convert VIOLATION_ISSUE_DATE column to datetime
codev_df['DATE'] = codev_df['DATE'].apply(lambda x: dt.datetime.strptime(x,'%m/%d/%Y %I:%M:%S %p'))
codev_df.info()

In [None]:
# Add a column that splits off the year
codev_df['YEAR'] = codev_df['DATE'].dt.year

# Move that column to the beginning of the frame
year = codev_df['YEAR']
codev_df.drop(labels=['YEAR'], axis=1, inplace=True)
codev_df.insert(0,'YEAR', year)
codev_df.head()

### Orders to Vacate

In [None]:
vacate_df

In [None]:
# Rename columns
vacate_df.rename(columns={
    'VACATE EFFECTIVE DATE': 'DATE',
    'BOROUGH': 'BORO',
    'VACATE ORDER NUMBER': 'VACATE_ORDER_COUNT'
}, inplace=True)
vacate_df.head()

In [None]:
# Replace Borough abbreviations with names based on file schema 
# MN = Manhattan
# BX = Bronx
# BK = Brooklyn
# QN = Queens
# SI = Staten Island

def f(x):
    if x['BORO'] == 'MN': return 'MANHATTAN'
    elif x['BORO'] == 'BX': return 'BRONX'
    elif x['BORO'] == 'BK': return 'BROOKLYN'
    elif x['BORO'] == 'QN': return 'QUEENS'
    elif x['BORO'] == 'SI': return 'STATEN ISLAND'
    else: return ''

vacate_df['BOROUGH'] = vacate_df.apply(f, axis=1)
vacate_df.head()

In [None]:
# Drop unneeded columns
vacate_df = vacate_df[['DATE',
                       'BOROUGH',
                       'VACATE_ORDER_COUNT']]
vacate_df.head()

In [None]:
vacate_df.info()

In [None]:
# Convert VACATE_DATE column to datetime
vacate_df['DATE'] = vacate_df['DATE'].apply(lambda x: dt.datetime.strptime(x,'%m/%d/%Y %I:%M:%S %p'))
vacate_df.info()

In [None]:
# Add a column that splits off the year
vacate_df['YEAR'] = vacate_df['DATE'].dt.year

# Move that column to the beginning of the frame
year = vacate_df['YEAR']
vacate_df.drop(labels=['YEAR'], axis=1, inplace=True)
vacate_df.insert(0,'YEAR', year)
vacate_df.head()