# Explonatory Data Analaysis - 911 Call Dataset

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Import dataset with pandas

In [None]:
df = pd.read_csv('../input/montcoalert/911.csv')
df.head()

# Prepare the data

## Dataset column explanation
* lat : Latitude
* lng: Longitude
* desc: Description of the Emergency Call
* zip: Zipcode
* title: Title
* timeStamp: YYYY-MM-DD HH:MM:SS
* twp: Township
* addr: Address
* e: Dummy variable (always 1)

## Check the type of each column of datasets

In [None]:
df.info()

## drop the dummy 'e' column

In [None]:
df = df.drop(['e'], axis='columns')
df.head()

## Check each column if have null value

In [None]:
df.isnull().sum()

In [None]:
df['reason'] = df['title'].apply(lambda title: title.split(':')[0])
df.head()

# Visualize some graph to make easily understand the data

## Visualize Top 5 areas with zip codes that call 911

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(12,6))
sns.countplot(x='zip',data=df,palette='pastel', order=df['zip'].value_counts().iloc[:5].index)

## Visualize Top 5 Township that making 911 calls

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(x='twp',data=df,palette='pastel', order=df['twp'].value_counts().iloc[:5].index)

## Visualze the highest reason to call 911

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(x='reason',data=df,palette='pastel')

# Convert the 'timeStamp' column into DateTime Object

In [None]:
df['timeStamp'] = pd.to_datetime(df['timeStamp'])

In [None]:
df['hour'] = df['timeStamp'].apply(lambda time: time.hour)
df['month'] = df['timeStamp'].apply(lambda time: time.month)
df['day'] = df['timeStamp'].apply(lambda time: time.dayofweek)
df['year'] = df['timeStamp'].apply(lambda time: time.year)
df.head()

# Visualize 911 Call ratio by days, month and year

In [None]:
dayMap = {0:'Mon',1:'Tue',2:'Wed',3:'Thu',4:'Fri',5:'Sat',6:'Sun'}
df['day'] = df['day'].map(dayMap)

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(x='day',data=df,hue='reason',palette='pastel')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

In [None]:
monthMap = {0:'Jan',1:'Feb',2:'Mar',3:'Apr',4:'Mei',5:'Jun',6:'Jul', 7:'Aug', 8:'Sep', 9:'Okt', 10:'Nov', 11:'Des'}
df['month'] = df['month'].map(monthMap)

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(x='month',data=df,hue='reason',palette='pastel')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(x='year',data=df,hue='reason',palette='pastel')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

# Visualze 911 call ratio with and without the reason by time

In [None]:
figure, axes = plt.subplots(1, 2, sharex=True, figsize=(16,6))
figure.suptitle('911 Call by Year Graph')
sns.countplot(x='year',data=df,hue='reason',palette='pastel', ax=axes[0])
sns.countplot(x='year',data=df,palette='pastel', ax=axes[1])

In [None]:
figure, axes = plt.subplots(1, 2, sharex=True, figsize=(16,6))
figure.suptitle('911 Call by Month Graph')
sns.countplot(x='month',data=df,hue='reason',palette='pastel', ax=axes[0])
sns.countplot(x='month',data=df,palette='pastel', ax=axes[1])

In [None]:
figure, axes = plt.subplots(1, 2, sharex=True, figsize=(16,6))
figure.suptitle('911 Call by Days Graph')
sns.countplot(x='day',data=df,hue='reason',palette='pastel', ax=axes[0])
sns.countplot(x='day',data=df,palette='pastel', ax=axes[1])

# Visualize reason 911 call by date

In [None]:
df['date']=df['timeStamp'].apply(lambda t: t.date())

In [None]:
df.head()

## Visualize 'Traffic' reason for 911 call

In [None]:
plt.figure(figsize=(12,6))
df[df['reason']=='Traffic'].groupby('date').count()['twp'].plot()
plt.title('Traffic')
plt.tight_layout()

## Visualize 'Fire' reason for 911 call

In [None]:
plt.figure(figsize=(12,6))
df[df['reason']=='Fire'].groupby('date').count()['twp'].plot()
plt.title('Fire')
plt.tight_layout()

## Visualize 'EMS' reason for 911 call

In [None]:
plt.figure(figsize=(12,6))
df[df['reason']=='EMS'].groupby('date').count()['twp'].plot()
plt.title('EMS')
plt.tight_layout()

# Visualze what time do the most 911 calls occur

In [None]:
dayHour = df.groupby(by=['day','hour']).count()['reason'].unstack()

## Visualize 911 call by Hour

In [None]:
plt.figure(figsize=(12,6))
sns.heatmap(dayHour,cmap='coolwarm')

## Visualize 911 call by Month

In [None]:
dayMonth = df.groupby(by=['day','month']).count()['reason'].unstack()
dayMonth.head()

In [None]:
plt.figure(figsize=(12,6))
sns.heatmap(dayMonth,cmap='coolwarm')