In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

For this capstone project I will be analyzing some 911 call data from [Kaggle](https://www.kaggle.com/mchirico/montcoalert). The data contains the following fields:

* lat : String variable, Latitude
* lng: String variable, Longitude
* desc: String variable, Description of the Emergency Call
* zip: String variable, Zipcode
* title: String variable, Title
* timeStamp: String variable, YYYY-MM-DD HH:MM:SS
* twp: String variable, Township
* addr: String variable, Address
* e: String variable, Dummy variable (always 1)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

In [None]:
df = pd.read_csv('../input/911.csv')

In [None]:
df.info()

In [None]:
df.head()

### Top 5 zipcodes for 911 calls

In [None]:
df['zip'].value_counts().head(5)

### Top 5 Townships for 911 calls

In [None]:
df['twp'].value_counts().head(5)

### Number of unique title codes in the data

In [None]:
df['title'].nunique()

## Creating some new features
### In the titles column there are "Reasons/Departments" specified before the title code. 
For Example, if the title column value is EMS : Backpains/Injury then the Reason column would be EMS and Departments column would be Backpains/Injury. 


### Creating the Reason column

In [None]:
df['Reason'] = df['title'].apply(lambda title: title.split(':')[0])
df.head()

### Creating the Departments Column

In [None]:
df['Departments'] = df['title'].apply(lambda title: title.split(':')[1])
df.head()

## The most common reason for the 911 based calls

In [None]:
df['Reason'].value_counts().head(1)

In [None]:
sns.countplot(x='Reason',data=df,palette='magma')
sns.despine(left=True)

In [None]:
df['Departments'].value_counts()

In [None]:
type(df['timeStamp'].iloc[0])

### As the timestamp column values are string , I will change it to DateTime object

In [None]:
df['timeStamp'] = pd.to_datetime(df['timeStamp'])

#### As we can see it is changed to DateTime Object

In [None]:
type(df['timeStamp'].iloc[0])

### Now I will create three new columns as Hour, Month and Day of Week

In [None]:
df['Hour'] = df['timeStamp'].apply(lambda time: time.hour)
df['Month'] = df['timeStamp'].apply(lambda time: time.month)
df['Day of Week'] = df['timeStamp'].apply(lambda time: time.dayofweek)

In [None]:
df.head()

### The Day of Week is referred with number. I will change it to the actual names of the week 


In [None]:
day_map = {0:'Mon',1:'Tue',2:'Wed',3:'Thu',4:'Fri',5:'Sat',6:'Sun'}
df['Day of Week'] = df['Day of Week'].map(day_map)

In [None]:
df.head()

### Creating a count plot of Day of Week with Reason column

In [None]:
sns.countplot(x='Day of Week',data=df,hue='Reason',palette='viridis')
sns.despine(left=True)
#To keep the legend out of the plot
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()

### Doing the same with Month column

In [None]:
sns.countplot(x='Month',data=df,hue='Reason',palette='magma')
sns.despine(left=True)
#To keep the legend out of the plot
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()

### Counting every instance of the column by the month

In [None]:
Month_grouping = df.groupby('Month').count()
Month_grouping.head()

#### Aggregating by month, it seems that some of the column count values are different as there are some values missing.
#### So I will choose the 'lat' or Latitude column assuming that as the latitude value is there, then the call actually took place.

### Hence, I will try to create a plot showing count of calls per month

In [None]:
Month_grouping['twp'].plot()
plt.show()
#Here we will understand the curve or trend a little bit better than the bar plot

### Creating a new column Date

In [None]:
df['Date']=df['timeStamp'].apply(lambda t: t.date())
df.head()

### Now I will count the number of calls per date. Again we will assume that as the 'lat' (Latitude) value is there, so the call did actually took place

In [None]:
#Importing plotly and cufflinks for creating interactive plots
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
import plotly.plotly as py
import plotly.graph_objs as go

# For Notebooks
init_notebook_mode(connected=True)

# For offline use
cf.go_offline()

In [None]:
df.groupby('Date').count()['lat'].iplot(kind='line')

## Recreating this plot to create three seperate plots with each plot representing a reason for the 911 call. 
### The Three Reasons are EMS, Traffic and Fire

##  911 calls for EMS

In [None]:
df[df['Reason']=='EMS'].groupby('Date').count()['lat'].iplot(kind='line')

## 911 calls for Fire

In [None]:
df[df['Reason']=='Fire'].groupby('Date').count()['lat'].iplot(kind='line')

## 911 calls for Traffic

In [None]:
df[df['Reason']=='Traffic'].groupby('Date').count()['lat'].iplot(kind='line')

## I am restructuring the Dataframe so that the column becomes the Hours and Index becomes the Day of Week

### We will execute this by grouping by 'Day of Week' and 'Hour' column to create a multilevel indexed Dataframe with only one column 'Reason'

In [None]:
df.groupby(by=['Day of Week','Hour']).count()['Reason']

### Now I will convert this multilevel indexed Dataframe to a matrix form

In [None]:
df_hour=df.groupby(by=['Day of Week','Hour']).count()['Reason'].unstack()
df_hour.head()

### Creating a Heatmap with this df_hour Dataframe

In [None]:
plt.figure(figsize=(15,7))
sns.heatmap(df_hour,cmap='magma',linecolor='white',linewidths=1)
plt.show()

### Creating a Clustermap with df_hour

In [None]:
sns.clustermap(df_hour,cmap='coolwarm',linecolor='white',linewidths=1)
plt.show()

## Now I will repeat the operations to create a new dataframe that shows Month as a column

In [None]:
df_month=df.groupby(by=['Day of Week','Month']).count()['Reason'].unstack()
df_month.head()

### Creating a Heatmap for df_month

In [None]:
plt.figure(figsize=(15,7))
sns.heatmap(df_month,cmap='magma',linecolor='white',linewidths=1)
plt.show()

### Creating a clustermap for df_month

In [None]:
sns.clustermap(df_month,cmap='inferno',linecolor='white',linewidths=1)
plt.show()