# Introduction
Analysis conducted by Lion SHI

This notebook is trying to explore the interesting message behind the 911 Calls Dataset with application of data visualizaiton.

### Environment Set-up

In [None]:
import numpy as np
import pandas as pd

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
df = pd.read_csv('../input/911data/911.csv')

### Preview data

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.columns

### Study the data from different aspects
Top 5 zipcodes for 911 calls

In [None]:
df['zip'].value_counts().head(5)

Top 5 townships (twp) for 911 calls

In [None]:
df['twp'].value_counts().head()

No. of unique title codes

In [None]:
df['title'].nunique()

In the titles column there are "Reasons/Departments" specified before the title code. These are EMS, Fire, and Traffic. We create a new column called "Reason" that contains this string value.

In [None]:
df['Reason'] = df['title'].apply(lambda x : x.split(":")[0])
df['Reason'] 

Find out the most common reason for a 911 call

In [None]:
sns.countplot(x = 'Reason',data = df)

In [None]:
pd.to_datetime(df['timeStamp'])

In [None]:
time = pd.to_datetime(df['timeStamp']).loc[0]
df['timeStamp'] = pd.to_datetime(df['timeStamp'])
df['Hour'] = df['timeStamp'].apply(lambda time: time.hour)
df['Month'] = df['timeStamp'].apply(lambda time: time.month)
df['dayofweek'] = df['timeStamp'].apply(lambda time: time.dayofweek)

In [None]:
dmap = {0:'Mon',1:'Tue',2:'Wed',3:'Thu',4:'Fri',5:'Sat',6:'Sun'}
df['dayofweek'] = df['dayofweek'].map(dmap)

In [None]:
df.head()

### Relationship between the "Reason" and the "Day of Week"

In [None]:
sns.countplot(x = 'dayofweek', data = df, hue = 'Reason')

plt.legend(bbox_to_anchor=(1.02, 1), loc=2)

### Changes of Call reasons among different month

In [None]:
sns.countplot(x='Month',data = df, hue = 'Reason')
plt.legend(bbox_to_anchor=(1.02, 1), loc=2)

In [None]:
df_groupbymonth = df.groupby('Month').count()
df_groupbymonth.head()

In [None]:
df_groupbymonth['twp'].plot()

In [None]:
df_groupbymonth = df_groupbymonth.reset_index()
df_groupbymonth.head(12)

In [None]:
sns.lmplot(data = df_groupbymonth,x='Month',y = 'twp')

In [None]:
df['date'] = df['timeStamp'].apply(lambda time: time.date())
df.head()

### To study the frequency of Reasons from the period of 2015-12 ~ 2016-09

In [None]:
date_groupby = df.groupby('date').count().reset_index()
fig = plt.figure(figsize=(12,2))
axes = fig.add_axes([0,0,1,1])
axes.plot(date_groupby['date'],date_groupby['twp'])
plt.title('Reason')

In [None]:
df_EMS = df[df['Reason']=='EMS'].groupby('date').count().reset_index()
df_EMS.head()

In [None]:
fig = plt.figure(figsize = (12,4))
axes = fig.add_axes([0,0,1,1])
axes.plot(df_EMS['date'], df_EMS['Reason'])
plt.title('EMS')

In [None]:
df_traffic = df[df['Reason']=='Traffic'].groupby('date').count().reset_index()
df_traffic.head()

In [None]:
fig = plt.figure(figsize = (12,4))
axes = fig.add_axes([0,0,1,1])
axes.plot(df_traffic['date'], df_traffic['Reason'])
plt.title('Traffic')


In [None]:
df_fire = df[df['Reason']=='Fire'].groupby('date').count().reset_index()
df_fire.head()

In [None]:
df_fire.plot('date','Reason',legend=None)
plt.title('Fire')
plt.tight_layout

### To study the correlations between 'dayofweek', 'Hour' and count of 'Reason'

In [None]:
day_hour = df.groupby(by=['dayofweek','Hour']).count()['Reason'].unstack()
day_hour.head()

In [None]:
plt.figure(figsize = (12,6))
sns.heatmap(day_hour)

In [None]:
plt.figure(figsize = (12,6))
sns.clustermap(day_hour,cmap="viridis")

In [None]:
day_month = df.groupby(by=['dayofweek','Month']).count()['Reason'].unstack()
day_month.head()

In [None]:
sns.heatmap(day_month)

In [None]:
sns.clustermap(day_month)