## In this reporrt we will be analysing the call data provided by Montgomery County. 

In [None]:
# section 1 importing Libs:

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
import os 
# my style:
sns.set(style= "whitegrid")

# My favourite Library for visualisation 
from plotly import __version__
import cufflinks as cf

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

cf.go_offline()

import plotly.figure_factory as ff
import plotly.offline as py
##for online plotting use import plotly.plotly as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True)
from plotly import tools

In [None]:
df = pd.read_csv("../input/911.csv")

## 2. Exploring Data set:

In [None]:
# head of DEFAULT data frame:
df.head(2)

In [None]:
# all unique enteries in respective columns:
df.nunique()

In [None]:
#droping non usefull columns or dummy cols:
del df["e"]

In [None]:
# information about the DEFAULT data frame:
df.info()

In [None]:
# as we can see timestamp column is an object not a date time type:
# coverting: 

df["timeStamp"] = pd.to_datetime(df["timeStamp"])

df["timeStamp"].loc[0]

# 3 Creating New Features and Columns for data analyis:

### Creating columns like year, month, date etc on the basis of timeStamp column:



In [None]:
df["reason category"] = df["title"].apply(lambda x: x.split(":")[0])

# The 3 category of reasons for calls.
df["reason category"].value_counts()

In [None]:
p = df["timeStamp"].loc[0]
print(p)
print (p.year)
print (p.month)
print (p.date())
print (p.dayofweek)
print (p.time())

In [None]:
# adding new date time columns:

df["year"] = df["timeStamp"].apply(lambda x: x.year)

df["month"] = df["timeStamp"].apply(lambda x: x.month)

df["date"] = df["timeStamp"].apply(lambda x: x.date())

df["day of week"] = df["timeStamp"].apply(lambda x : x.dayofweek)

df["time"] = df["timeStamp"].apply(lambda x : x.time())

In [None]:
# Mapping day of week (numberic) column into categorical:
dow = {0:"Mon", 1:"Tues", 2:"Wed", 3:"Thurs" , 4:"Fri", 5:"Sat", 6:"Sun" }

df["Day of Week"] = df["day of week"].map(dow)

In [None]:
# Creating a column of the basis of sunlight
df["day/night"] = df["timeStamp"].apply(lambda x : "night" if int(x.strftime("%H")) > 19 else "day")


#  4 Basic Q/A section:


In [None]:
# Top 5 zipcode for 911 calls?
df["zip"].value_counts().head(5)

In [None]:
# Top 5 reason for 911 calls:
df["title"].value_counts().head(5)

In [None]:
# Top 5 township from where calls were recieved:
df["twp"].value_counts().head(5)

In [None]:
# Top 10 dates of receiving the maximum in calls in all the years:
df["date"].value_counts().head(10)

In [None]:
# Most busy year with total number of calls recieved:
df["year"].value_counts().head(1)

# 5 Data Visualizing and Analysing 

In [None]:
# New customized Data Frame with additional features and columns:
df.head(1)

In [None]:
# Plot for Category of reasons:

plt.figure(figsize=(14,7))
sns.set_context("paper", font_scale = 2)
sns.countplot(x= "reason category", data= df, palette="bright")
plt.title(" Calls Reason ALL Combined")
plt.show()

In [None]:
plt.figure(figsize=(14,7))
sns.set_context("paper", font_scale = 2)
sns.countplot(x= "reason category", data= df, palette="bright" ,hue= "year")
plt.title(" Calls Reason Yearly")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()

In [None]:
# Plot for calls recieved yearly:
plt.figure(figsize=(14,7))
sns.set_context("paper", font_scale = 2)
sns.countplot(x= "year", data= df, palette="Paired")
plt.title(" Calls Reason Yearly")
plt.show()

In [None]:
plt.figure(figsize=(14,7))
sns.set_context("paper", font_scale = 2)
sns.countplot(x= "year", data= df, palette="Paired", hue = "reason category")
plt.title(" Calls Reason Yearly having the hue of reasons")
plt.show()

In [None]:
# Plot for calls recieved monthly combined of all years:
plt.figure(figsize=(14,7))

sns.set_context("paper", font_scale = 2)
sns.countplot(x= "month", data= df, palette="magma")

plt.title(" Monthly Calls Combined All Years")
plt.show()

In [None]:
plt.figure(figsize = (14,7))

sns.set_context("paper", font_scale=2)
sns.countplot(data= df, x= "month", hue= "year", palette="magma")

plt.title(" Monthly Calls Yearly")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()

In [None]:
plt.figure(figsize=(14,7))
sns.set_context("paper", font_scale = 2)
sns.countplot(x= "month", data= df, palette="magma", hue= "reason category")
plt.title(" Monthly Calls Category Combined All Years")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()

In [None]:
# Day and Night Call Reports
g = sns.FacetGrid(df, row ="day/night", col="reason category", height = 7)
g.map(sns.countplot, "month", palette= "magma")
plt.title("Day and Night Call Reports")


In [None]:
# Calls report Daily: 

plt.figure(figsize=(14,7))
sns.set_context("paper", font_scale = 2)
sns.countplot(x= "Day of Week", data= df, palette="pastel")
plt.title(" Day Of Week Calls")
plt.show()

In [None]:
plt.figure(figsize=(14,7))
sns.set_context("paper", font_scale = 2)
sns.countplot(x= "Day of Week", data= df, palette="pastel", hue= ("reason category") )     
plt.title(" Day Calls By Reason ")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()

In [None]:
plt.figure(figsize=(14,7))
sns.set_context("paper", font_scale = 2)
sns.countplot(x= "Day of Week", data= df, palette="pastel", hue= "year" )     
plt.title(" Daily Calls By Year ")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()

# 5 Line Plots on basis of aggregation:
## Using interactive plots: Plotly


In [None]:
df.groupby("year").count()
# this will return a df having Years as its index

In [None]:
by_year = df.groupby("year").count()
df_year = by_year.reset_index()

df_year.iplot( x= "year", y= "lat", title =" Calls", colors="purple", xTitle="Years", 
             yTitle = "Counts")

In [None]:
by_month= df.groupby("month").count()
# Changing index from months to default index(0:)
df_month= by_month.reset_index()
df_month.iplot(x="month", y = "lat", colors= "pink", title="Monthly calls in all years", xTitle="Month",
               yTitle = "Counts")

In [None]:
date_gr = df.groupby("date").count()
df_date = date_gr.reset_index()

df_date.iplot(x= "date", y="lat", size= 6, color= "red", title= "Every Day Total Calls", 
             xTitle = "Days", yTitle = "counts")

In [None]:
ems_gr = df[df["reason category"]== "EMS"].groupby("date").count()

df_ems = ems_gr.reset_index()

df_ems.iplot(x= "date", y= "lat", colors= "orange", title="EMS Calls", xTitle="Days", 
            yTitle = "Counts")

In [None]:
tra_gr = df[df["reason category"]== "Traffic"].groupby("date").count()

df_tra= tra_gr.reset_index()

df_tra.iplot(x= "date", y= "lat", colors= "pink", title="Traffic Calls", xTitle="Days", 
            yTitle = "Counts")

In [None]:
fire_gr = df[df["reason category"]== "Fire"].groupby("date").count()

df_fire= fire_gr.reset_index()

df_fire.iplot(x= "date", y= "lat", colors= "purple", title="Fire Calls", xTitle="Days", 
            yTitle = "Counts")

In [None]:
df["year"].value_counts()

In [None]:
df["hour"] = df["time"].apply(lambda x: x.hour)
tmap = {0:1, 1:2, 2:3, 3:4, 4:5, 5:6, 6:7, 7:8, 8:9, 9:10, 10:11, 11:12, 12:13, 13:14, 
        14:15, 15:16, 16:17, 17:18, 18:19, 19:20, 20:21, 21:22, 22:23, 23:24}
df["hour"] = df["hour"].map(tmap)
print(df["hour"].value_counts())

df["hour"] = df["time"].apply(lambda x: x.hour)
hr_grp = df.groupby("hour").count()
df_time = hr_grp.reset_index()


df_time.iplot(x= "hour", y = "lat", title= "Total number of calls  Hourly: In all years :", 
             colors = "pink")

In [None]:
y15_grp = df[df["year"] == 2015].groupby("hour").count()

df_y15 = y15_grp.reset_index()

df_y15.iplot( x="hour", y ="lat" , title = "Total number of calls hourly in Year: 2015", 
            colors = "lightgreen")

In [None]:
y16_grp = df[df["year"] == 2016].groupby("hour").count()

df_y16 = y16_grp.reset_index()

df_y16.iplot( x="hour", y ="lat",title = "Total number of calls hourly in Year: 2016", 
            colors = "brown")

In [None]:
y17_grp = df[df["year"] == 2017].groupby("hour").count()

df_y17 = y17_grp.reset_index()

df_y17.iplot( x="hour", y ="lat", title = "Total number of calls hourly in Year: 2017",
            colors = "blue")

In [None]:
y18_grp = df[df["year"] == 2018].groupby("hour").count()

df_y18 = y18_grp.reset_index()

df_y18.iplot( x="hour", y ="lat", title = "Total number of calls hourly in Year: 2018" )

# 6 Heat Maps:



In [None]:
dayvshour= df.groupby(["Day of Week", "hour"]).count()["lat"].unstack()
dayvshour.head()

In [None]:
plt.figure(figsize= (15,10))
sns.heatmap(dayvshour, cmap = "coolwarm", linewidths=.1)

In [None]:
plt.figure(figsize= (15,10))
sns.clustermap(dayvshour, cmap = "coolwarm")

In [None]:
monthvshour = df.groupby(["hour", "month"]).count()["lat"].unstack()
monthvshour.head()

In [None]:
plt.figure(figsize=(15,10))
sns.heatmap(monthvshour, cmap="viridis", linewidths=.5)

In [None]:
sns.clustermap(monthvshour, cmap = "viridis")

## Please, free to leave any comment or feedback. 
 ## Thanks!