 ## Project 2
 ### Team:4
    
    Anji Asthana
    Eugene Witherspoon
    Fatma Butun
    Matt Keeley
    Shay Masood


 

## Overview 

####  The purpose of this project is to examine the distribution of fire and rescue incident calls across different neighborhoods in Cincinnati, OH and inform residents about the fire department’s performance, including: 
#####   a. Ambulance headquarters placement 
#####   b. Fire hydrant pressure adjustments
#####   c. Allocation of fire fighter/rescue resources
#####   d. Helping high-risk citizens stay informed about response time
#####   e. Serve as a resource for fire department managers/administration


In [None]:
import pandas as pd
from datetime import datetime
from sqlalchemy import create_engine



## Extract the Data 

### This part is using the original data source to obtain a file that is smaller in size 

### The code here is commented out because the original file is not available on github

In [None]:
# # read the csv into a dataframe
# cincinnati_fire = pd.read_csv("Cincinnati_Fire_Incidents__CAD___including_EMS__ALS_BLS_.csv")

In [None]:
# cincinnati_fire.tail()

In [None]:
# # Check out how many rows and columns are in the dataframe

# cincinnati_fire.shape

## Transform and Clean the Data

### Reduce the original Data

In [None]:
# # check the column types
# cincinnati_fire.dtypes

In [None]:
# # Change the CREATE_TIME_INCIDENT  column into datetime64 in order to do further analysis

# cincinnati_fire["CREATE_TIME_INCIDENT"] = pd.to_datetime(cincinnati_fire["CREATE_TIME_INCIDENT"])


In [None]:
# # check out the types of the columns to see if the transformation has worked
# cincinnati_fire.dtypes

In [None]:
# # remove the rows with incidents before the year 2019

# cincinnati_filtered_df = cincinnati_fire.loc[(cincinnati_fire['CREATE_TIME_INCIDENT'] >= '01/01/2019')]
# cincinnati_filtered_df.shape  

In [None]:
# # save the filtered dataframe and use this as your source data

# cincinnati_filtered_df.to_csv('cincinnati_filtered_df.csv', index=False)

## Load the filtered data for the analysis


In [None]:
# read the csv into a df

cincinnati_fire_filtered = pd.read_csv("cincinnati_filtered_df.csv")

In [None]:
# Change the CREATE_TIME_INCIDENT  column into datetime64 in order to do further analysis

cincinnati_fire_filtered["CREATE_TIME_INCIDENT"] = pd.to_datetime(cincinnati_fire_filtered["CREATE_TIME_INCIDENT"])

In [None]:
# Change the ARRIVAL_TIME_PRIMARY_UNIT column into datetime64 in order to do further analysis

cincinnati_fire_filtered["ARRIVAL_TIME_PRIMARY_UNIT"] = pd.to_datetime(cincinnati_fire_filtered["ARRIVAL_TIME_PRIMARY_UNIT"])

In [None]:
# Change the CLOSED_TIME_INCIDENT column into datetime64 in order to do further analysis

cincinnati_fire_filtered["CLOSED_TIME_INCIDENT"] = pd.to_datetime(cincinnati_fire_filtered["CLOSED_TIME_INCIDENT"])
cincinnati_fire_filtered.dtypes

In [None]:
# remove rows with at least 1 NaN

cincinnati_fire_filtered = cincinnati_fire_filtered.dropna()
cincinnati_fire_filtered.shape

In [None]:
# Calculate the duration for the arrival after the incident was reported

cincinnati_fire_filtered["ARRIVAL_DURATION"] = (cincinnati_fire_filtered["ARRIVAL_TIME_PRIMARY_UNIT"] - cincinnati_fire_filtered["CREATE_TIME_INCIDENT"]).astype('timedelta64[m]')



In [None]:
# Calculate the duration for closing the incident after the incident was reported

cincinnati_fire_filtered["CLOSING_DURATION"] = (cincinnati_fire_filtered["CLOSED_TIME_INCIDENT"] - cincinnati_fire_filtered["CREATE_TIME_INCIDENT"]).astype('timedelta64[m]')



In [None]:
# cincinnati_fire_filtered.head()

In [None]:
# dropdupliacte rows
cincinnati_fire_filtered = cincinnati_fire_filtered.drop_duplicates()
cincinnati_fire_filtered.shape

In [None]:
# prepare 3 new dataframes with the required columns only 
    
# 1-prepare an incidents vs neighborhood df
    
neighborhood_incidents = cincinnati_fire_filtered [["EVENT_NUMBER", "NEIGHBORHOOD", "LATITUDE_X", "LONGITUDE_X", "INCIDENT_TYPE_DESC"]]
neighborhood_incidents.head()

In [None]:
# 2- prepare an incidents vs arrival time df

incidents_time_duration = cincinnati_fire_filtered [["EVENT_NUMBER", "ARRIVAL_DURATION", "CREATE_TIME_INCIDENT", "NEIGHBORHOOD", "INCIDENT_TYPE_DESC"]]
incidents_time_duration.head(15)

In [None]:
# 3- prepare a grouped neighborhood dataframe

neighborhood_incidents["NEIGHBORHOOD"].unique()
neighborhood_incidents_grouped = neighborhood_incidents.groupby(["NEIGHBORHOOD", "INCIDENT_TYPE_DESC"]).size().reset_index(name='counts')
neighborhood_incidents_grouped.head(15)

In [None]:
# put similar incidents under the same incident type description. 

# what can we put together?

## Create Connection to SQL database and load the tables

In [None]:
# create a connection string to postgres
connection_string = "postgres:bootcamp@localhost:5432/cincinnatifire"
engine = create_engine(f'postgresql://{connection_string}')

In [None]:
# check the tables in the cincinnatifire database
engine.table_names()

In [None]:
# Insert df data to the tables
neighborhood_incidents.to_sql(name='neighborhood_incidents', con=engine, if_exists='append', index=False)



In [None]:
incidents_time_duration.to_sql(name='incidents_time_duration', con=engine, if_exists='append', index=False)

In [None]:
neighborhood_incidents_grouped.to_sql(name='neighborhood_incidents_grouped', con=engine, if_exists='append', index=False)

In [None]:
# check if the tables are correctly loaded into the database

pd.read_sql_query('select * from incidents_time_duration', con=engine).head()