In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

In [None]:
df = pd.read_csv('/kaggle/input/montcoalert/911.csv')

In [None]:
df.info()

In [None]:
# Looking into "head([n])" it simply calls loc[:n].  This 
# is just a little faster then without calling another function.

df.loc[:2]

In [None]:
# Looking into "head([n])" it simply calls iloc[:n].  This 
# is just a little faster then without calling another function.

df['zip'].value_counts().iloc[:5]

In [None]:
df['twp'].value_counts().iloc[:5]

In [None]:
# unique() returns an array of unique codes.  Thus,
# the length or size of this array is the number of 
# unique codes.

df['title'].unique().size

In [None]:
# I've got two working codes here.  The top one (commented out) was 
# my first attempt and works well but does not use lambda expressions.

# CODE 1 W/OUT LAMBDA EXPRESSION
#tmp = df['title'].str.split(':', expand=True)
#df['Reason'] = tmp[0]
#df['Department'] = tmp[1]
#df.drop(columns =['title'],inplace= True)

# Interestingly, "expand=True" cannot be used inside lambda expressions.
# This is by design (I read developer notes from 2017 discussing this).
# Thus, to access the elements of the split text lists (now in a 1x2 list), you
# need to use a ".str", ".str.get(element of list)", or ".str[element of list]" 
# at the end of the line.

# CODE 2 WITH LAMBDA EXPRESSION
df['Reason'], df['Dept'] = df['title'].apply(lambda x: x.split(':')).str
df.drop(columns =['title'],inplace= True)
df.iloc[:3]

In [None]:
df['Reason'].value_counts()

In [None]:
# Seaborn is nice, but I had some difficulty finding a nice
# tutorial explaining how to do this.  It ended up being simple enough.
# I kept trying data=df['Reason'], not realizing that
# my declaration of x='Reason' took care of that for me.

sns.countplot(x='Reason',data=df,palette="Set2").set_title('911 Calls by Reason')

In [None]:
# This is an interesting question, because the code:
# df['timeStamp'].dtype returns "dtype('0')".
# df['timeStamp'].dtypes also returns "dtype('0')".
# type(df['timeStamp'].values) returns "numpy.ndarray"
# df['timeStamp'].values.dtype returns "dtype('0')".

#In the end I decided to just look at the type of an
# explicitly referenced item from the series.
type(df['timeStamp'].get(0))

In [None]:
df['timeStamp'] = pd.to_datetime(df['timeStamp'])

In [None]:
df['Hour'],df['Month'],df['Day of Week'] = df['timeStamp'].apply(lambda x: [x.hour,x.month,x.dayofweek]).str

In [None]:
dmap = {0:'Mon',1:'Tue',2:'Wed',3:'Thu',4:'Fri',5:'Sat',6:'Sun'}
df['Day of Week'] = df['Day of Week'].map(dmap)

In [None]:
plot = sns.countplot(x="Day of Week",hue="Reason",data=df,palette = "Set2")
plot.legend(loc='upper right', bbox_to_anchor=(1.25,1),ncol=1)
plot.set_title('911 Calls by Day of Week and Reason')

In [None]:
plot = sns.countplot(x="Month",hue="Reason",data=df,palette = "Set2")
plot.legend(loc='upper right', bbox_to_anchor=(1.25,1),ncol=1)
plot.set_title('911 Calls by Month and Reason')

In [None]:
byMonth = df.groupby(['Month']).count()
byMonth.iloc[:5]

In [None]:
# As each data point has potentially different counts (see January's zip and lattitude counts for example)
# this generates 12 lines.  You can pick an individual one by providing just the specific column label. Or,
# a specific list of them.  

byMonth.plot(y=['lat','zip'],title='911 Calls per Month')

In [None]:
# Placing the index into a column was an easy fix.  Seaborn should have this as a standard
# option.  Not being able to use the indexes seems odd.  

byMonth['Month'] = byMonth.index
linfit = sns.lmplot(x='Month',y='twp',data=byMonth,palette="Set2").fig.suptitle('Linear Fit of 911 Calls by Month')

In [None]:
df['Date'] = df['timeStamp'].apply(lambda x: x.date())
df.iloc[:3]

In [None]:
# I have to say that the plotting is extremely easy with these dataframes.  Very
# nice functionality here.

byDate = df.groupby(['Date']).count()
byDate.plot(y=['twp'],title='911 Calls by Date')
plt.tight_layout()

In [None]:
# Just a few data manipulations prior to the plotting.  
byDateReason = df.groupby(['Date','Reason']).count() # Aggregate the counts by date and reason.
byDateReason.reset_index(level='Date',inplace=True) # Reset date index to column.
byDateReason.reset_index(level='Reason',inplace=True) # Reset reason index to column.

In [None]:
byDateReason[byDateReason.Reason == 'EMS'].plot(x='Date',y='twp',title='911 Calls for EMS by Date')
plt.tight_layout()

In [None]:
byDateReason[byDateReason.Reason == 'Traffic'].plot(x='Date',y='twp',title='911 Calls for Traffic Accident by Date')
plt.tight_layout()

In [None]:
byDateReason[byDateReason.Reason == 'Fire'].plot(x='Date',y='twp',title='911 Calls for Fires by Date')
plt.tight_layout()