 ## Project 2
 ### Team:4
    
    Anji Asthana
    Eugene Witherspoon
    Fatma Butun
    Matt Keeley
    Shay Masood


 

## Overview 

####  The purpose of this project is to examine the distribution of fire and rescue incident calls across different neighborhoods in Cincinnati, OH and inform residents about the fire departmentâ€™s performance, including: 
#####   a. Ambulance headquarters placement 
#####   b. Fire hydrant pressure adjustments
#####   c. Allocation of fire fighter/rescue resources
#####   d. Helping high-risk citizens stay informed about response time
#####   e. Serve as a resource for fire department managers/administration


In [1]:
import pandas as pd
from datetime import datetime
from sqlalchemy import create_engine



## Extract the Data 

### This part is using the original data source to obtain a file that is smaller in size 

### The code here is commented out because the original file is not available on github

In [2]:
# # read the csv into a dataframe
# cincinnati_fire = pd.read_csv("Cincinnati_Fire_Incidents__CAD___including_EMS__ALS_BLS_.csv")

In [3]:
# cincinnati_fire.tail()

In [4]:
# # Check out how many rows and columns are in the dataframe

# cincinnati_fire.shape

## Transform and Clean the Data

### Reduce the original Data

In [5]:
# # check the column types
# cincinnati_fire.dtypes

In [6]:
# # Change the CREATE_TIME_INCIDENT  column into datetime64 in order to do further analysis

# cincinnati_fire["CREATE_TIME_INCIDENT"] = pd.to_datetime(cincinnati_fire["CREATE_TIME_INCIDENT"])


In [7]:
# # check out the types of the columns to see if the transformation has worked
# cincinnati_fire.dtypes

In [8]:
# # remove the rows with incidents before the year 2019

# cincinnati_filtered_df = cincinnati_fire.loc[(cincinnati_fire['CREATE_TIME_INCIDENT'] >= '01/01/2019')]
# cincinnati_filtered_df.shape  

In [9]:
# # save the filtered dataframe and use this as your source data

# cincinnati_filtered_df.to_csv('cincinnati_filtered_df.csv', index=False)

## Load the filtered data for the analysis


In [10]:
# read the csv into a df

cincinnati_fire_filtered = pd.read_csv("cincinnati_filtered_df.csv")

In [11]:
# Change the CREATE_TIME_INCIDENT  column into datetime64 in order to do further analysis

cincinnati_fire_filtered["CREATE_TIME_INCIDENT"] = pd.to_datetime(cincinnati_fire_filtered["CREATE_TIME_INCIDENT"])

In [12]:
# Change the ARRIVAL_TIME_PRIMARY_UNIT column into datetime64 in order to do further analysis

cincinnati_fire_filtered["ARRIVAL_TIME_PRIMARY_UNIT"] = pd.to_datetime(cincinnati_fire_filtered["ARRIVAL_TIME_PRIMARY_UNIT"])

In [13]:
# Change the CLOSED_TIME_INCIDENT column into datetime64 in order to do further analysis

cincinnati_fire_filtered["CLOSED_TIME_INCIDENT"] = pd.to_datetime(cincinnati_fire_filtered["CLOSED_TIME_INCIDENT"])
cincinnati_fire_filtered.dtypes

ADDRESS_X                                 object
LATITUDE_X                               float64
LONGITUDE_X                              float64
AGENCY                                    object
CREATE_TIME_INCIDENT              datetime64[ns]
DISPOSITION_TEXT                          object
EVENT_NUMBER                              object
INCIDENT_TYPE_ID                          object
INCIDENT_TYPE_DESC                        object
NEIGHBORHOOD                              object
ARRIVAL_TIME_PRIMARY_UNIT         datetime64[ns]
BEAT                                      object
CLOSED_TIME_INCIDENT              datetime64[ns]
DISPATCH_TIME_PRIMARY_UNIT                object
CFD_INCIDENT_TYPE                         object
CFD_INCIDENT_TYPE_GROUP                   object
COMMUNITY_COUNCIL_NEIGHBORHOOD            object
dtype: object

In [14]:
# remove rows with at least 1 NaN

cincinnati_fire_filtered = cincinnati_fire_filtered.dropna()
cincinnati_fire_filtered.shape

(127528, 17)

In [15]:
# Calculate the duration for the arrival after the incident was reported

cincinnati_fire_filtered["ARRIVAL_DURATION"] = (cincinnati_fire_filtered["ARRIVAL_TIME_PRIMARY_UNIT"] - cincinnati_fire_filtered["CREATE_TIME_INCIDENT"]).astype('timedelta64[m]')



In [16]:
# Calculate the duration for closing the incident after the incident was reported

cincinnati_fire_filtered["CLOSING_DURATION"] = (cincinnati_fire_filtered["CLOSED_TIME_INCIDENT"] - cincinnati_fire_filtered["CREATE_TIME_INCIDENT"]).astype('timedelta64[m]')



In [17]:
# cincinnati_fire_filtered.head()

In [18]:
# dropdupliacte rows
cincinnati_fire_filtered = cincinnati_fire_filtered.drop_duplicates()
cincinnati_fire_filtered.shape

(120546, 19)

In [19]:
# prepare 3 new dataframes with the required columns only 
    
# 1-prepare an incidents vs neighborhood df
    
neighborhood_incidents = cincinnati_fire_filtered [["EVENT_NUMBER", "NEIGHBORHOOD", "LATITUDE_X", "LONGITUDE_X", "INCIDENT_TYPE_DESC"]]
neighborhood_incidents.head()

Unnamed: 0,EVENT_NUMBER,NEIGHBORHOOD,LATITUDE_X,LONGITUDE_X,INCIDENT_TYPE_DESC
0,CFD211116000125,WALNUT HILLS,39.12604,-84.487895,ABNORMAL BREATHING
2,CFD211116000123,ROSELAWN,39.205596,-84.456781,NOT ALERT
5,CFD211116000120,ROSELAWN,39.191268,-84.45761,PUBLIC ASSIST (NO INJURIES AND NO PRIORITY SYM...
7,CFD211116000118,WINTON HILLS,39.1849,-84.51245,ALERT WITH ABNORMAL BREATHING
10,CFD211116000115,DOWNTOWN,39.104139,-84.508171,POSSIBLY DANGEROUS BODY AREA


In [20]:
# 2- prepare an incidents vs arrival time df

incidents_time_duration = cincinnati_fire_filtered [["EVENT_NUMBER", "ARRIVAL_DURATION", "CREATE_TIME_INCIDENT", "NEIGHBORHOOD", "INCIDENT_TYPE_DESC"]]
incidents_time_duration.head(15)

Unnamed: 0,EVENT_NUMBER,ARRIVAL_DURATION,CREATE_TIME_INCIDENT,NEIGHBORHOOD,INCIDENT_TYPE_DESC
0,CFD211116000125,3.0,2021-11-16 01:41:00,WALNUT HILLS,ABNORMAL BREATHING
2,CFD211116000123,6.0,2021-11-16 01:29:00,ROSELAWN,NOT ALERT
5,CFD211116000120,5.0,2021-11-16 01:12:00,ROSELAWN,PUBLIC ASSIST (NO INJURIES AND NO PRIORITY SYM...
7,CFD211116000118,4.0,2021-11-16 01:07:00,WINTON HILLS,ALERT WITH ABNORMAL BREATHING
10,CFD211116000115,2.0,2021-11-16 01:02:00,DOWNTOWN,POSSIBLY DANGEROUS BODY AREA
11,CFD211116000114,4.0,2021-11-16 01:00:00,AVONDALE,ABNORMAL BREATHING
12,CFD211116000113,4.0,2021-11-16 12:51:00,WESTWOOD,ALTERED LEVEL OF CONSCIOUSNESS
14,CFD211116000111,7.0,2021-11-16 12:48:00,MADISONVILLE,NOT ALERT
15,CFD211116000110,2.0,2021-11-16 12:48:00,WALNUT HILLS,SUDDEN SPEECH PROBLEMS
18,CFD211116000107,3.0,2021-11-16 12:41:00,COLLEGE HILL,CHANGING COLOR


In [21]:
# 3- prepare a grouped neighborhood dataframe

neighborhood_incidents["NEIGHBORHOOD"].unique()
neighborhood_incidents_grouped = neighborhood_incidents.groupby(["NEIGHBORHOOD", "INCIDENT_TYPE_DESC"]).size().reset_index(name='counts')
neighborhood_incidents_grouped.head(15)

Unnamed: 0,NEIGHBORHOOD,INCIDENT_TYPE_DESC,counts
0,AVONDALE,1ST TRIMESTER HEMORRHAGE OR MISCARRIAGE,8
1,AVONDALE,1ST TRIMESTER SERIOUS HEMORRHAGE,9
2,AVONDALE,2ND TRIMESTER HEMORRHAGE OR MISCARRIAGE,8
3,AVONDALE,3RD TRIMESTER HEMORRHAGE,7
4,AVONDALE,ABDOMINAL PAIN,225
5,AVONDALE,ABDOMINAL PAIN/CRAMPING (< 6 MONTHS/24 WEEKS A...,25
6,AVONDALE,ABNORMAL BEHAVIOR,33
7,AVONDALE,ABNORMAL BREATHING,782
8,AVONDALE,ABNORMAL BREATHING (PARTIAL OBSTRUCTION),24
9,AVONDALE,ABNORMAL BREATHING WITH MULTIPLE FLU SYMPTOMS,25


In [22]:
# put similar incidents under the same incident type description. 

# what can we put together?

## Create Connection to SQL database and load the tables

In [23]:
# create a connection string to postgres
connection_string = "postgres:bootcamp@localhost:5432/cincinnatifire"
engine = create_engine(f'postgresql://{connection_string}')

In [24]:
# check the tables in the cincinnatifire database
engine.table_names()

  engine.table_names()


['neighborhood_incidents',
 'incidents_time_duration',
 'neighborhood_incidents_grouped']

In [25]:
# Insert df data to the tables
neighborhood_incidents.to_sql(name='neighborhood_incidents', con=engine, if_exists='append', index=False)



In [26]:
incidents_time_duration.to_sql(name='incidents_time_duration', con=engine, if_exists='append', index=False)

In [27]:
neighborhood_incidents_grouped.to_sql(name='neighborhood_incidents_grouped', con=engine, if_exists='append', index=False)

In [28]:
# check if the tables are correctly loaded into the database

pd.read_sql_query('select * from incidents_time_duration', con=engine).head()

Unnamed: 0,EVENT_NUMBER,ARRIVAL_DURATION,CREATE_TIME_INCIDENT,NEIGHBORHOOD,INCIDENT_TYPE_DESC
0,CFD211116000125,3.0,2021-11-16 01:41:00,WALNUT HILLS,ABNORMAL BREATHING
1,CFD211116000123,6.0,2021-11-16 01:29:00,ROSELAWN,NOT ALERT
2,CFD211116000120,5.0,2021-11-16 01:12:00,ROSELAWN,PUBLIC ASSIST (NO INJURIES AND NO PRIORITY SYM...
3,CFD211116000118,4.0,2021-11-16 01:07:00,WINTON HILLS,ALERT WITH ABNORMAL BREATHING
4,CFD211116000115,2.0,2021-11-16 01:02:00,DOWNTOWN,POSSIBLY DANGEROUS BODY AREA
