In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Loading Data

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('/kaggle/input/montcoalert/911.csv')

In [None]:
df.info()

In [None]:
df.head()

## Basic Analysis

In [None]:
df['zip'].value_counts().head(5) #top 5 postcodes for 911 calls

In [None]:
df['twp'].value_counts().head(5) #top 5 townships for 911 calls

In [None]:
df['title'].nunique() #number of unique call reasons

## Feature Engineering

In [None]:
df['Reason'] = df['title'].apply(lambda x: x.split(':')[0]) #generating a new column based on emergency department
df['Reason'].head()

In [None]:
df['Reason'].value_counts() #EMS is the most common reason for 911 calls

In [None]:
sns.set_style('whitegrid')
sns.countplot(x='Reason',data=df)
plt.ylabel('Calls')
plt.title('Total number of calls per department')

In [None]:
type(df['timeStamp'][0]) #the timestamp column is of the type string

In [None]:
df['DateTime'] = pd.to_datetime(df['timeStamp'])
type(df['DateTime'][0]) #the timestamp column has been converted from string to timestamp type

In [None]:
df['Hour'] = df['DateTime'].apply(lambda x: x.hour)
df['Month'] = df['DateTime'].apply(lambda x: x.month)
df['Week Day'] = df['DateTime'].apply(lambda x: x.dayofweek)
#making new columns based on the 'DateTime' column
df.head()

In [None]:
dmap = {0:'Mon',1:'Tue',2:'Wed',3:'Thu',4:'Fri',5:'Sat',6:'Sun'}
df['Week Day'] = df['Week Day'].map(dmap) #mapping the week days to their names

In [None]:
df['Week Day'].head()

In [None]:
sns.countplot(x='Week Day', data=df, hue='Reason')
plt.legend(bbox_to_anchor=(1, 1)) #move legend outside of plot
plt.ylabel('Calls')
plt.title('Total number of calls, per department, per week day')

In [None]:
sns.countplot(x='Month', data=df, hue='Reason')
plt.legend(bbox_to_anchor=(1.25, 1))
plt.ylabel('Calls')
plt.title('Total number of calls, per department, per month')

In [None]:
byMonth = df.groupby(by='Month').count()
byMonth.head()

In [None]:
byMonth['lat'].plot()
plt.xlim(1,12)
plt.ylabel('Calls')
plt.title('Calls per month')

In [None]:
df['Date'] = df['DateTime'].apply(lambda x: x.date()) #creating a new date column

In [None]:
byDate = df.groupby(by='Date').count() #grouping by date
byDate.head()

In [None]:
plt.figure(figsize=(10,3))
byDate['lat'].plot()
plt.ylabel('Calls')
plt.title('Total calls as a function of time')

In [None]:
plt.figure(figsize=(10,3))
byDate_traffic = df[df['Reason']=='Traffic'].groupby(by='Date').count()
byDate_traffic['lat'].plot()
plt.ylabel('Calls')
plt.title('Total traffic calls as a function of time')

In [None]:
plt.figure(figsize=(10,3))
byDate_fire = df[df['Reason']=='Fire'].groupby(by='Date').count()
byDate_fire['lat'].plot()
plt.ylabel('Calls')
plt.title('Total fire calls as a function of time')

In [None]:
plt.figure(figsize=(10,3))
byDate_ems = df[df['Reason']=='EMS'].groupby(by='Date').count()
byDate_ems['lat'].plot()
plt.ylabel('Calls')
plt.title('Total EMS calls as a function of time')

## Heatmaps

In [None]:
day_hour = df.groupby(by=['Week Day','Hour'])['lat'].count().unstack()
day_hour.head()

In [None]:
plt.figure(figsize=(12,6))
sns.heatmap(data=day_hour,cmap='coolwarm')
plt.title('Heatmap of calls for a given hour of a weekday')

In [None]:
sns.clustermap(data=day_hour,cmap='coolwarm')
plt.title('Cluster of calls per hour of a given weekday')

In [None]:
day_month = df.groupby(by=['Week Day','Month'])['lat'].count().unstack()
day_month.head()

In [None]:
plt.figure(figsize=(12,6))
sns.heatmap(data=day_month,cmap='coolwarm')
plt.title('Heatmap of calls per month, per weekday')

In [None]:
sns.clustermap(data=day_month,cmap='coolwarm')
plt.title('Clustermap of calls per month, per weekday')