In [None]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf

import plotly.graph_objects as go

In [None]:
df = pd.read_csv("../input/montcoalert/911.csv")

### Understanding the columns of the data

In [None]:
# Looking at some data

df.head(3)

Now lets look at the columns

In [None]:
df.columns

We shall not be dealing with the desc, zip, addr. So lets remove them

In [None]:
df = df[['lat', 'lng', 'title', 'timeStamp', 'twp', 'e']]

df.head(3)

Check for the nulls


In [None]:
print(df.isnull().sum())
print(len(df))

There are 663522 rows. If we remove the rows with twp null, it wont be much harm. After all, I cannot get data for these rows. 
Other option would be to introduce the "Unspecified" town here.

In [None]:
df['twp'].fillna(value="Unspecified", inplace = True)

In [None]:
df.isnull().sum()

Handling values end here!

In [None]:
df['title'].unique()

IF we see above, we can see that title first displays the type of call and then information. We need to segregate this information

In [None]:
df['type'] = df['title'].apply(lambda title: title.split(":")[0])
df['type explanation'] = df['title'].apply(lambda title: title.split(":")[1])

Now we need to do something with time.

In [None]:
type(df['timeStamp'].iloc[0])

We do not want to have it in string. So lets convert it to date time.

In [None]:
df['timeStamp'] = pd.to_datetime(df['timeStamp'])

In [None]:
time = df['timeStamp'].iloc[0]

In [None]:
df.set_index(df['timeStamp'])

In [None]:
df['Day of Week'] = df['timeStamp'].apply(lambda time: time.dayofweek)
df['Day of Week'] = df['Day of Week'].map({0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'})
df['Month No'] = df['timeStamp'].apply(lambda time: time.month)
df['Month'] = df['Month No'].map({1: 'January', 2: 'Febuary', 3: 'March', 4: 'April', 5: 'May', 6: 'June',7: 'July', 8: 'August', 9: 'September', 10: 'October', 11: 'November',12: 'December'})
df['Hour'] = df['timeStamp'].apply(lambda time: time.hour)

In [None]:
df.loc[(df.Hour >= 6) & (df.Hour < 12) , 'Time of Day'] = 'Morning'
df.loc[(df.Hour >= 12) & (df.Hour < 15) , 'Time of Day'] = 'Afternoon'
df.loc[(df.Hour >= 15) & (df.Hour < 18) , 'Time of Day'] = 'Evening'
df.loc[(df.Hour >= 18) | (df.Hour < 6) , 'Time of Day'] = 'Night'

#### Plot Data Geographically

We plot the data geographically just to see which visually from where calls are made more

In [None]:
init_notebook_mode(connected=True)
cf.go_offline()

### Exploring Type

First lets see which type of calls are received most

In [None]:
df['type'].value_counts().iplot(kind='bar', )

Now lets see pick one by one each type and see its trends per day

In [None]:
type_vs_days = pd.crosstab(df['type'], df['Day of Week'])

In [None]:
type_vs_days.iplot(kind='bar')

#type_vs_days

#### Type vs Day of Time

In [None]:
type_vs_daytime = pd.crosstab(df['Time of Day'], df['type'])

In [None]:
type_vs_daytime.iplot(kind="bar")

#### Type vs Months

In [None]:
type_vs_month = pd.crosstab(df['Month No'], df['type'] ) #df['Month']


months = ["January", "Febuary", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
type_vs_month['Month'] = months



In [None]:
type_vs_month

In [None]:
type_vs_month.iplot(x="Month")

### Type VS Hour

In [None]:
type_vs_hour = pd.crosstab(df['Hour'], df['type'] ) #df['Month']

In [None]:
type_vs_hour.iplot()

### Finally we have the Plot for the Geography

In [None]:
geo_data = df.groupby(['lat', 'lng']).size().reset_index().rename(columns={0: "no of calls"})


fig = go.Figure(data=go.Scattergeo(
        locationmode = 'USA-states',
        lon = geo_data['lng'],
        lat = geo_data['lat'],
        text = geo_data['no of calls'],
        mode = 'markers',
        marker = dict(
            size = 2,
            opacity = 0.8,
            reversescale = True,
            autocolorscale = False,
            line = dict(
                width=1,
                color='rgba(102, 102, 102)'
            ),
            colorscale = 'Blues',
             cmin = 0,
            color = geo_data['no of calls'],
            cmax = geo_data['no of calls'].max(),
            colorbar_title="Plot of the Calls"
        )))

fig.update_layout(
        title = 'Hover for County Name',
        geo = dict(
            scope='usa',
            projection_type='albers usa',
            showland = True,
            landcolor = "rgb(250, 250, 250)",
            subunitcolor = "rgb(217, 217, 217)",
            countrycolor = "rgb(217, 217, 217)",
            countrywidth = 0.5,
            subunitwidth = 0.5
        ),
    )
fig.show()