# Time Series Visualization using `plotly`

Report prepared by **Kunal Kotian** for the Data Visualization Course Assignment 3.

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np

import plotly
import plotly.plotly as py
import plotly.graph_objs as go

plotly.offline.init_notebook_mode(connected=True)

## Introduction to the Dataset

The dataset explored in this notebook is related to monthly property crime in San Francisco. It is sourced from the Data SF portal (Link: https://data.sfgov.org/Public-Safety/Monthly-Property-Crime-2005-to-2015/k5vw-3yuz)

In [2]:
df = pd.read_csv('Monthly_Property_Crime_2005_to_2015.csv')
df.head()

Unnamed: 0,Date,Category,IncidntNum
0,02/01/2014 12:00:00 AM,BURGLARY,506
1,02/01/2007 12:00:00 AM,VANDALISM,531
2,07/01/2012 12:00:00 AM,BURGLARY,522
3,07/01/2013 12:00:00 AM,LARCENY/THEFT,3318
4,08/01/2010 12:00:00 AM,VANDALISM,694


In [3]:
df.describe(include='all')

Unnamed: 0,Date,Category,IncidntNum
count,792,792,792.0
unique,132,6,
top,04/01/2007 12:00:00 AM,ARSON,
freq,6,132,
mean,,,713.981061
std,,,867.135416
min,,,6.0
25%,,,59.0
50%,,,496.5
75%,,,658.0


There are 6 unique levels in the `Category` column.

In [4]:
df.Category.unique()

array(['BURGLARY', 'VANDALISM', 'LARCENY/THEFT', 'VEHICLE THEFT',
       'STOLEN PROPERTY', 'ARSON'], dtype=object)

In [5]:
df.dtypes

Date          object
Category      object
IncidntNum     int64
dtype: object

Let's convert `Date` to a datetime object.

In [6]:
# df.Date = pd.to_datetime(df.Date).dt.date
df.Date = pd.to_datetime(df.Date)
df['month'] = df.Date.dt.month

In [7]:
df.dtypes

Date          datetime64[ns]
Category              object
IncidntNum             int64
month                  int64
dtype: object

In [8]:
df.head()

Unnamed: 0,Date,Category,IncidntNum,month
0,2014-02-01,BURGLARY,506,2
1,2007-02-01,VANDALISM,531,2
2,2012-07-01,BURGLARY,522,7
3,2013-07-01,LARCENY/THEFT,3318,7
4,2010-08-01,VANDALISM,694,8


## Exploration of Burglary & Larceny Data

### Time History

In [46]:
df_burgl = df[df.Category == 'BURGLARY'].sort_values(['Date'])
data = [go.Scatter(x=df_burgl.Date, y=df_burgl.IncidntNum)]

layout = go.Layout(
    title='Time history of the number of burglaries in San Francisco from 2005 - 2015',
    xaxis=dict(
        title='Date',
        titlefont=dict(
            family='Helvetica',
            size=18,
            color='#7f7f7f'
        ), 
        type='date'
    ),
    yaxis=dict(
        title='Number of Burglaries',
        titlefont=dict(
            family='Helvetica',
            size=18,
            color='#7f7f7f'
        )
    )
)

fig = go.Figure(data=data, layout=layout)

plotly.offline.iplot(fig)
# plotly.offline.plot(fig)

In [45]:
df_burgl = df[df.Category == 'BURGLARY'].sort_values(['Date'])
df_larc = df[df.Category == 'LARCENY/THEFT'].sort_values(['Date'])
data = [go.Scatter(x=df_burgl.Date, y=df_burgl.IncidntNum, name='Burglary'), 
        go.Scatter(x=df_larc.Date, y=df_larc.IncidntNum, name='Larceny')]

layout = go.Layout(
    title='Comparison of the time histories of the number of burglaries & larcenies in San Francisco from 2005 - 2015',
    xaxis=dict(
        title='Date',
        titlefont=dict(
            family='Helvetica',
            size=18,
            color='#7f7f7f'
        ), 
        type='date'
    ),
    yaxis=dict(
        title='Number of Incidents',
        titlefont=dict(
            family='Helvetica',
            size=18,
            color='#7f7f7f'
        )
    )
)

fig = go.Figure(data=data, layout=layout)

plotly.offline.iplot(fig)
# plotly.offline.plot(fig)

### Monthly Counts

In [27]:
df_sum = df[['month', 'IncidntNum']].groupby(['month']).sum().reset_index()
df_sum.columns = ['month', 'total_number_of_incidents']
df_sum.head()

Unnamed: 0,month,total_number_of_incidents
0,1,46369
1,2,40947
2,3,46709
3,4,46204
4,5,46753


In [47]:
# Make a list of month names
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

y_data = df_sum.total_number_of_incidents

# Make a Bar trace object
trace1 = go.Bar(
    x=months,  # a list of string as x-coords
    y=y_data,   # 1d array of numbers as y-coords
    marker=go.Marker(color='#E3BA22'),  # set bar color (hex color model))
)

title = "Total number of property-related crimes in San Francisco by month (2005 - 2015)"  # plot's title

# Make Layout object
layout = go.Layout(
    title=title,       # set plot title
    showlegend=False,  # remove legend
    yaxis = go.YAxis(title='Number of Incidents'),
    xaxis = go.XAxis(title='Month'),
    paper_bgcolor='rgb(233,233,233)',  # set paper (outside plot) 
    plot_bgcolor='rgb(233,233,233)',   #   and plot color to grey
)


# Make Figure object
fig = go.Figure(data=[trace1], layout=layout)

# (@) Send to Plotly and show in notebook
plotly.offline.iplot(fig)
# plotly.offline.plot(fig)

In [29]:
df_burgl_grp_sum = df_burgl[['month', 'IncidntNum']].groupby(['month']).sum().reset_index()
df_burgl_grp_sum.columns = ['month', 'total_number_of_burglaries']
df_burgl_grp_sum.head()

Unnamed: 0,month,total_number_of_burglaries
0,1,5555
1,2,4795
2,3,5473
3,4,5438
4,5,5550


In [15]:
df_larceny = df[df.Category == 'LARCENY/THEFT'].sort_values(['Date'])
df_larceny_sum = df_larceny[['month', 'IncidntNum']].groupby(['month']).sum().reset_index()
df_larceny_sum.columns = ['month', 'total_number_of_larcenies']
df_larceny_sum.head()

Unnamed: 0,month,total_number_of_larcenies
0,1,26476
1,2,23427
2,3,26866
3,4,26277
4,5,26897


In [40]:
# Make a list of month names
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

y_data_burgl = df_burgl_grp_sum.total_number_of_burglaries
y_data_arson = df_arson_sum.total_number_of_arson_incidents
y_data_larceny = df_larceny_sum.total_number_of_larcenies


# Make a Bar trace object
trace1 = go.Bar(
    x=months,  # a list of string as x-coords
    y=y_data_burgl,   # 1d array of numbers as y-coords
    marker=go.Marker(color='autocolorscale'),  # set bar color (hex color model)),
    name = 'Burglary'
)

trace2 = go.Bar(
    x=months,  # a list of string as x-coords
    y=y_data_larceny,   # 1d array of numbers as y-coords
    marker=go.Marker(color='autocolorscale'),  # set bar color (hex color model))
    name = 'Larceny'
)

title = "Comparison of the number of burglaries & larcenies in San Francisco by month (2005 - 2015)"  # plot's title

# Make Layout object
layout = go.Layout(barmode='stack',
    title=title,       # set plot title
    showlegend=True,  # remove legend
    yaxis = go.YAxis(title='Number of Incidents'),
    xaxis = go.XAxis(title='Month'),
    paper_bgcolor='rgb(233,233,233)',  # set paper (outside plot) 
    plot_bgcolor='rgb(233,233,233)',   #   and plot color to grey
)


# Make Figure object
fig = go.Figure(data=[trace1, trace2], layout=layout)

# (@) Send to Plotly and show in notebook
plotly.offline.iplot(fig)
# plotly.offline.plot(fig)

Overall, we can see that the number of larcenies/thefts is much higher than the number of burglaries.  Also, the total number of incidents does not show significant variation by month.