# Performing data analysis first

## Importing necessary libraries

In [None]:
import pandas as pd                                           #importing pandas library
import matplotlib.pyplot as mpt                    #importing python maths plot library
import seaborn as sb                                         #to work with more efficient statistical graph plots
import datetime as dt                                       #to import pandas timeStamp library
from decimal import Decimal                         #decimal library is used for more accuracy in arithmetic operations

In [None]:
sb.set()                                                                  #applying default seaborn scaling and themes

## Reading data and storing it in a variable named data

In [None]:
data = pd.read_csv("../input/911.csv")
data.head()                                                             #default value of head is 5, and it displays top 5 rows of data

## Knowing the dimensions of our dataset by using the shape facility

In [None]:
data.shape

In [None]:
data.shape[0]                                        #gives the number of rows

In [None]:
data.shape[1]                                          #gives the number of columns

## Knowing the information of the columns in the datasets

In [None]:
data.info()

## Making a list of all the columns

In [None]:
column_names = list(data.columns)

In [None]:
column_names

## Making another column having the types of call

In [None]:
data.title.head()                                         #now we seperate the type of call from the title column

In [None]:
def call_seperator(x):
    x= x.split(':')                         #here : is the delimeter
    return x[0]                          #this returns the value at 0th index after splitting, which is call type in this case

In [None]:
data['call_type'] = data['title'].apply(call_seperator)                               #inserting values in another column- call_type

In [None]:
data.head()

In [None]:
data['call_type'].nunique()                 #shows the number of unique type of calls 

In [None]:
data['call_type'].unique()                                      #shows the different values or calls

In [None]:
data['call_type'].value_counts()                           #counts all the unique value

## We need to convert data to pandas datetime datatype as it does not have one.

In [None]:
data['timeStamp'] = pd.to_datetime(data['timeStamp'], infer_datetime_format = True)         #infer_datatime_format is used when we want the default format and not our own

In [None]:
data['timeStamp'].head()

## Getting details of timestamp, with months, year and other things seperately.

In [None]:
data['year'] = data['timeStamp'].dt.year

In [None]:
data['month'] = data['timeStamp'].dt.month_name()

In [None]:
data['day'] = data['timeStamp'].dt.day_name()

In [None]:
data['hour'] = data['timeStamp'].dt.hour

## Fetching the call details

### Making a function to get the type of emergency from the title column of the dataset

In [None]:
def type_of_emergency(x):
    x = x.split(':')
    x= x[1]
    return x

In [None]:
data['emergecy_type'] = data['title'].apply(type_of_emergency)                        #creating a column named emergency_type to store fetched data

In [None]:
data.head()

## Performing data visualization

In [None]:
call_types = data['call_type'].value_counts()              #counting different types of calls

In [None]:
call_types

In [None]:
mpt.figure(figsize = (12,8))
ax = call_types.plot.bar()
for p in ax.patches:                                                                                                         #patches is used to create whatever type of patch we want for eg. rectangular patch
    ax.annotate(Decimal(str(p.get_height())), (p.get_x(), p.get_height()))       #to annotate all the points on the plot
mpt.xticks(rotation=0)                                                                                                   #this is used to give label on the axis
mpt.savefig("Emergency type vs Frequency.png")

In [None]:
data.info()                            #fetching info again to visualize in some other way

## Using groupby function to group specific feilds of data.

In [None]:
calls_data = data.groupby(['month', 'call_type'])['call_type'].count()           #grouped acc. to the types of calls in diff. months

In [None]:
calls_data.head()

## Calculating percentage of calls

In [None]:
call_percentage = calls_data.groupby(level = 0).apply(lambda x: round(100*x/float(x.sum())))              #applying lambda funcs to calculate percentage

In [None]:
call_percentage.head()

### Plotting a graph accordingly

In [None]:
font = {
    'size': 'x-large',
    'weight': 'bold'
}
month_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

In [None]:
call_percentage = call_percentage.reindex(month_order,  level=0)

In [None]:
call_percentage = call_percentage.reindex(['EMS', 'Traffic', 'Fire'], level=1)

In [None]:
call_percentage.head()

In [None]:
sb.set(rc={'figure.figsize':(12, 8)})
call_percentage.unstack().plot(kind='bar')                                             #making an unstacked graph
mpt.xlabel('Name of the Month', fontdict=font)
mpt.ylabel('Percentage of Calls', fontdict=font)
mpt.xticks(rotation=0)
mpt.title('Calls/Month', fontdict=font)

### Creating a pie chart of the same

In [None]:
call_percentage = call_percentage.sort_values(ascending=False)

In [None]:
mpt.figure(figsize=(12,8))
mpt.pie(call_percentage,  labels = call_percentage.index, autopct="%.2f")
mpt.savefig("call percentage pie chart.png")

## Visualizing hourly data

In [None]:
hours_data = data.groupby(['hour', 'call_type'])['call_type'].count()

In [None]:
hours_data.head()

## Calculating percentage of hours

In [None]:
hours_percentage = hours_data.groupby(level=0).apply(lambda x: round(100*x/float(x.sum())))

In [None]:
hours_percentage.head()

In [None]:
hours_percentage = hours_percentage.reindex(['EMS', 'Traffic', 'Fire'], level=1)

### Creating a plot accordingly

In [None]:
sb.set(rc={'figure.figsize':(18, 8)})
hours_percentage.unstack().plot(kind='bar')
mpt.xlabel('Hour of the day', fontdict=font)
mpt.ylabel('Percentage of Calls', fontdict=font)
mpt.xticks(rotation=0)
mpt.title('Calls/Hour', fontdict=font)
mpt.savefig("Percentage of calls vs hours.png")