# **911 Calls Capstone Project**

Data is taken from [Kaggle](https://www.kaggle.com/mchirico/montcoalert). The data contains the following fields:

- lat : String variable, Latitude
- lng: String variable, Longitude
- desc: String variable, Description of the Emergency Call
- zip: String variable, Zipcode
- title: String variable, Title
- timeStamp: String variable, YYYY-MM-DD HH:MM:SS
- twp: String variable, Township
- addr: String variable, Address
- e: String variable, Dummy variable (always 1)

## **1.1 Data and Setup**

### 1.1.1 Loading the required libraries

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.ticker as tkr
from matplotlib import pyplot
from matplotlib.font_manager import FontProperties
import calendar 
import datetime
import seaborn as sns
import folium
%matplotlib inline

### 1.1.2 Loading the Dataset

In [None]:
df=pd.read_csv("../input/montcoalert/911.csv")
df.sample(5)

### 1.1.3 To look at the concise summary of the dataframe

In [None]:
df.info()

### 1.1.4 Converting timeStamp to DateTime64 format

In [None]:
df['timeStamp'] = pd.to_datetime(df['timeStamp'])
df.info()

## **1.2 Feature Creation and Data Exploration**

### 1.2.1 Missing Value Count

In [None]:
print('Missing values:',df.isnull().values.sum())
df.isnull().sum()

### 1.2.2 Top 5 zipcodes for 911 calls

In [None]:
top5_zip = pd.DataFrame(df.groupby("zip").count()[
             "timeStamp"].sort_values(ascending=False).head(5))
top5_zip.columns = ["Frequency of Calls"]
top5_zip
top5_zip.style.background_gradient(cmap='Reds')

### 1.2.3 Top 5 Towns for 911 calls

In [None]:
top5_towns = pd.DataFrame(df.groupby("twp").count()[
             "timeStamp"].sort_values(ascending=False).head(5))
top5_towns.columns = ["Frequency of Calls"]
top5_towns.style.background_gradient(cmap='Reds')

### 1.2.4 Top 5 Hours of the Day for 911 calls

In [None]:
# Extracting hour from the time stamp
df["Hour"] = df.timeStamp.dt.hour
df.sample(3)

In [None]:
top5_hour = pd.DataFrame(df.groupby("Hour").count()[
             "timeStamp"].sort_values(ascending=False).head(5))
top5_hour.columns = ["Frequency of Calls"]
top5_hour.style.background_gradient(cmap='Reds')

### 1.2.5 Top 5 Days for 911 Calls

In [None]:
# Extracting the day of the week from the timeStamp column
df["DayOfWeek"] = df["timeStamp"].dt.day_name().astype(str).str[0:3]
df.sample(3)

In [None]:
top5_DayOfWeek = pd.DataFrame(df.groupby("DayOfWeek").count()[
             "timeStamp"].sort_values(ascending=False).head(5))
top5_DayOfWeek.columns = ["Frequency of Calls"]
top5_DayOfWeek.style.background_gradient(cmap='Reds')

### 1.2.6 Top 5 Months for 911 Calls

In [None]:
# Extracting the Month Name from the timeStamp column
df["MonthName"] = df["timeStamp"].dt.month_name().astype(str)

# Extracting the year from timeStamp column
df["Year"] =df["timeStamp"].dt.year

df.sample(3)

In [None]:
top5_Months = pd.DataFrame(df.groupby("MonthName").count()[
             "timeStamp"].sort_values(ascending=False).head(5))
top5_Months.columns = ["Frequency of Calls"]
top5_Months.style.background_gradient(cmap='Reds')

### 1.2.7 Adding a column for the reason of 911 Call

In [None]:
df["Reason"]=df['title'].apply(lambda title: title.split(':')[0])
df.sample(3)

### 1.2.8 Dates Range

In [None]:
dates_range = {'Date':[min(df.timeStamp),max(df.timeStamp)]}
df_dates_range = pd.DataFrame(dates_range, index =['Earliest Date','Latest Date'])
df_dates_range

## **1.3 Data Visualization**

### 1.3.1 Most Common reasons for 911 Calls

In [None]:
ReasonsCount = pd.DataFrame(df.groupby("Reason").count()[
             "timeStamp"].sort_values(ascending=False))

ReasonsCount

In [None]:
fig = plt.figure(dpi=120)
ax = fig.add_axes([0,0,1,1])
ax.bar(ReasonsCount.index,ReasonsCount.timeStamp)
ax.yaxis.set_major_formatter(
        tkr.FuncFormatter(lambda y,  p: format(int(y), ',')))
plt.title("Common Reasons for 911 Calls",fontdict={'fontname': 'Georgia', 'fontsize': 15, 'fontweight': 'bold'})
plt.xlabel("Reasons")
plt.ylabel("Frequency of Calls")
plt.show()

In [None]:
# Adding a column for percentage
ReasonsCount["Percent"] = round(
    ReasonsCount.timeStamp*100/sum(ReasonsCount.timeStamp), 2)

fig, ax = plt.subplots(figsize=(6, 3), dpi=150)

recipe = ReasonsCount.index

data = ReasonsCount.timeStamp

wedges, texts,_ = ax.pie(data, wedgeprops=dict(width=0.5),autopct="%.1f%%"
                       )

bbox_props = dict(boxstyle="square,pad=0.2", fc="w", ec="k", lw=0.72)
kw = dict(arrowprops=dict(arrowstyle="-"),
          bbox=bbox_props, zorder=0, va="center")

for i, p in enumerate(wedges):
    ang = (p.theta2 - p.theta1)/2. + p.theta1
    y = np.sin(np.deg2rad(ang))
    x = np.cos(np.deg2rad(ang))
    horizontalalignment = {-1: "right", 1: "left"}[int(np.sign(x))]
    connectionstyle = "angle,angleA=0,angleB={}".format(ang)
    kw["arrowprops"].update({"connectionstyle": connectionstyle})
    ax.annotate(recipe[i],xy=(x, y), xytext=(1.35*np.sign(x), 1.4*y),
                horizontalalignment=horizontalalignment, **kw)
    
ax.set_title("Common Reasons for 911 Call", y=1.3, pad=-19,
             fontdict={'fontname': 'Georgia', 'fontsize': 15, 'fontweight': 'bold'})

plt.show()

### 1.3.2 Variation of 911 Calls over time (Jan 2016 - Dec 2019)

In [None]:
# Creating a dataframe
df_monthly_calls = pd.DataFrame(df[(df.timeStamp.dt.year>min(df.timeStamp.dt.year)) & (df.timeStamp.dt.year<max(df.timeStamp.dt.year))].groupby(["MonthName", "Reason"])["timeStamp"].count())

# Creating  Pivot Table
df_monthly_calls_pivot = df_monthly_calls.pivot_table(index=["MonthName"],columns="Reason",values="timeStamp")

# Rearranging the index
df_monthly_calls_pivot = df_monthly_calls_pivot.reindex(["January", "February", "March","April","May", 
                                                         "June", "July", "August", "September", "October",
                                                         "November","December",])

# Conditional Formatting
df_monthly_calls_pivot.style.background_gradient(cmap='Reds')

In [None]:
fig, ax = plt.subplots(dpi=200)
for count in df_monthly_calls_pivot.iloc[:,0:].columns.to_list():
    plt.plot(df_monthly_calls_pivot.index.to_list(), df_monthly_calls_pivot.iloc[:,0:][count], label = count)
    
plt.yticks(np.arange(0, 35000, step=3000))
plt.xticks(rotation=45, ha="right",
         rotation_mode="anchor")
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
plt.title("Variation of 911 Calls over time (Jan 2016 - Dec 2019)",fontweight="bold")
plt.ylabel("Frequency of Calls")
plt.xlabel("Month")
fontP = FontProperties()
fontP.set_size('small')
plt.legend(title='Reasons', bbox_to_anchor=(1, 1.015), loc='upper left',prop=fontP)
plt.grid()
plt.show()

In [None]:
# Creating a dataframe
df_yearly_calls = pd.DataFrame(df[(df.timeStamp.dt.year>min(df.timeStamp.dt.year)) & (df.timeStamp.dt.year<max(df.timeStamp.dt.year))].groupby(["Year", "Reason"])["timeStamp"].count())

# Creating  Pivot Table
df_yearly_calls_pivot = df_yearly_calls.pivot_table(index=["Year"],columns="Reason",values="timeStamp")

df_yearly_calls_pivot.style.background_gradient(cmap='Reds')

In [None]:
fig, ax = plt.subplots(dpi=200)
for count in df_yearly_calls_pivot.iloc[:,0:].columns.to_list():
    plt.plot(df_yearly_calls_pivot.index.to_list(), df_yearly_calls_pivot.iloc[:,0:][count], label = count)
    
plt.yticks(np.arange(10000, 87000, step=12500))
plt.xticks(np.arange(2016, 2020, step=1),
         rotation_mode="anchor")
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
plt.title("Variation of 911 Calls over time (Jan 2016 - Dec 2019)",fontweight="bold")
plt.ylabel("Frequency of Calls")
plt.xlabel("Year")
fontP = FontProperties()
fontP.set_size('small')
plt.legend(title='Reasons', bbox_to_anchor=(1, 1.015), loc='upper left',prop=fontP)
plt.grid()
plt.show()

### 1.3.3 Heatmap

In [None]:
# Creating a dataframe
df_month_Year_calls = pd.DataFrame(df[(df.timeStamp.dt.year>min(df.timeStamp.dt.year)) & (df.timeStamp.dt.year<max(df.timeStamp.dt.year))].groupby(["Year", "MonthName"])["timeStamp"].count())

# Creating  Pivot Table
df_month_Year_calls_pivot = df_month_Year_calls.pivot_table(index=["Year"],columns="MonthName",values="timeStamp")

# Rearranging the index
df_month_Year_calls_pivot=df_month_Year_calls_pivot[["January", "February", "March","April","May","June", "July", "August", "September", "October", "November","December"]]

# Conditional Formatting
df_month_Year_calls_pivot.style.background_gradient(cmap='Reds')

In [None]:
pyplot.figure(figsize=(8, 2),dpi=150)
ax = sns.heatmap(df_month_Year_calls_pivot,linewidths=.5)
plt.xticks(rotation=45, ha="right",
         rotation_mode="anchor", size=8.5)
plt.yticks(size=8.5, rotation = 360)
plt.xlabel("Months")
plt.ylabel("Years")
plt.title("Variation of 911 Calls with Months and Years (Jan 2016 - Dec 2019)",fontweight="bold")
plt.show()

In [None]:
# Creating a dataframe
df_day_hour_calls = pd.DataFrame(df[(df.timeStamp.dt.year>min(df.timeStamp.dt.year)) & (df.timeStamp.dt.year<max(df.timeStamp.dt.year))].groupby(["DayOfWeek", "Hour"])["timeStamp"].count())

# Creating  Pivot Table
df_day_hour_calls_pivot = df_day_hour_calls.pivot_table(index=["Hour"],columns="DayOfWeek",values="timeStamp")

# Rearranging the index
df_day_hour_calls_pivot=df_day_hour_calls_pivot[["Mon", "Tue", "Wed","Thu","Fri", "Sat", "Sun"]]

# Conditional Formatting
df_day_hour_calls_pivot.style.background_gradient(cmap='Reds')

In [None]:
pyplot.figure(figsize=(8, 5.5),dpi=150)
ax = sns.heatmap(df_day_hour_calls_pivot,linewidths=.5)
plt.xticks(rotation=45, ha="right",
         rotation_mode="anchor", size=8.5)
plt.yticks(size=8.5, rotation = 360)
plt.xlabel("Days of the Week")
plt.ylabel("Hours")
plt.title("Variation of 911 Calls with Days and Hours (Jan 2016 - Dec 2019)",fontweight="bold")
plt.show()


### 1.3.4 Locations from where 911 calls were placed

In [None]:
df_coords_filter = df[(df.lng<-70) & (df.lng>-80)]

In [None]:
# Plotting the location of first 40,000 calls
m = folium.Map(location=[40.5762327,-76.6158772],width=400,height=400)

for la,lo in zip(df_coords_filter.lat[0:40000].values,df_coords_filter.lng[0:40000].values):
    folium.Circle(
    radius=60,
    location=[la,lo],
    color="crimson",
    fill=False,prefer_canvas=True
    ).add_to(m)
    
m