# Time-Series Visualization using plotly
### Nimesh Sinha

In [1]:
import plotly
import pandas as pd
import plotly.plotly as py
import plotly.graph_objs as go
from bokeh.palettes import Spectral11
from bokeh.plotting import figure, show, output_file
import plotly.offline as offline
from plotly.offline import init_notebook_mode, iplot
from IPython.display import display, HTML
init_notebook_mode(connected=True)

In [2]:
df = pd.read_csv('/Users/nimesh/Documents/spring2/viz/Monthly_Property_Crime_2005_to_2015.csv')

In [3]:
df.head()

Unnamed: 0,Date,Category,IncidntNum
0,02/01/2014 12:00:00 AM,BURGLARY,506
1,02/01/2007 12:00:00 AM,VANDALISM,531
2,07/01/2012 12:00:00 AM,BURGLARY,522
3,07/01/2013 12:00:00 AM,LARCENY/THEFT,3318
4,08/01/2010 12:00:00 AM,VANDALISM,694


In [4]:
df.Category.unique()

array(['BURGLARY', 'VANDALISM', 'LARCENY/THEFT', 'VEHICLE THEFT',
       'STOLEN PROPERTY', 'ARSON'], dtype=object)

In [5]:
df['Year'] = df['Date'].str[6:10]
df['Month'] = df['Date'].str[0:2]

I have used plotly for all the visualizations.

# 1. Line charts

In [6]:
df1 = df[df['Category']=='BURGLARY']
df1 = df1.sort_values(by = ['Year','Month'],ascending=[True, True])

data1 = [go.Scatter(
          x=df1.Date,
          y=df1.IncidntNum)]


#plotly.offline.plot(data1, filename='line plot')
iplot(data1)

The plot shows the variation of burglary cases in each month from 2005 to 2015. The burglary cases decrease from 2005 to 2011, but increase from 2011 to 2015.

# 2. Multiple line charts

In [7]:
new = df.groupby(['Category','Year']).sum().reset_index()
new1 = df.groupby(['Category','Month']).sum().reset_index()

In [8]:
trace1 = go.Scatter(
    x=new[new['Category']=='VANDALISM'].Year,
    y=new[new['Category']=='VANDALISM'].IncidntNum,
    mode = 'lines',
    name = 'VANDALISM', # Style name/legend entry with html tags
    
)
trace2 = go.Scatter(
    x=new[new['Category']=='BURGLARY'].Year,
    y=new[new['Category']=='BURGLARY'].IncidntNum,
    name = 'BURGLARY',
    mode = 'lines'
)

trace3 = go.Scatter(
    x=new[new['Category']=='LARCENY/THEFT'].Year,
    y=new[new['Category']=='LARCENY/THEFT'].IncidntNum,
    name = 'LARCENY/THEFT',
    mode = 'lines'
)

trace4 = go.Scatter(
    x=new[new['Category']=='VEHICLE THEFT'].Year,
    y=new[new['Category']=='VEHICLE THEFT'].IncidntNum,
    name = 'VEHICLE THEFT',
    mode = 'lines'
)
trace5 = go.Scatter(
    x=new[new['Category']=='STOLEN PROPERTY'].Year,
    y=new[new['Category']=='STOLEN PROPERTY'].IncidntNum,
    name = 'STOLEN PROPERTY',
    mode = 'lines'
)
trace6 = go.Scatter(
    x=new[new['Category']=='ARSON'].Year,
    y=new[new['Category']=='ARSON'].IncidntNum,
    name = 'ARSON',
    mode = 'lines'
)

data = [trace1, trace2, trace3, trace4, trace5, trace6]

#offline.plot(data, filename='multiple line-mode.html')
#plotly.offline.plot(fig)
iplot(data)

The above line plot shows that year wise variation of total crime cases for each category added across months. Larceny is always the most happened crime in any year. It is showing major rise after 2011. Vehicle theft declined a lot in 2006 which may be due to change in rules and policies.

# 3. Bar plots

In [9]:
trace1 = go.Bar(
    x=new[new['Category']=='VANDALISM'].Year,
    y=new[new['Category']=='VANDALISM'].IncidntNum,
    name = 'VANDALISM', 
    
)
trace2 = go.Bar(
    x=new[new['Category']=='BURGLARY'].Year,
    y=new[new['Category']=='BURGLARY'].IncidntNum,
    name = 'BURGLARY'
)

trace3 = go.Bar(
    x=new[new['Category']=='LARCENY/THEFT'].Year,
    y=new[new['Category']=='LARCENY/THEFT'].IncidntNum,
    name = 'LARCENY/THEFT'
)

trace4 = go.Bar(
    x=new[new['Category']=='VEHICLE THEFT'].Year,
    y=new[new['Category']=='VEHICLE THEFT'].IncidntNum,
    name = 'VEHICLE THEFT'
)
trace5 = go.Bar(
    x=new[new['Category']=='STOLEN PROPERTY'].Year,
    y=new[new['Category']=='STOLEN PROPERTY'].IncidntNum,
    name = 'STOLEN PROPERTY'
)
trace6 = go.Bar(
    x=new[new['Category']=='ARSON'].Year,
    y=new[new['Category']=='ARSON'].IncidntNum,
    name = 'ARSON'
)


data = [trace1, trace2, trace3, trace4, trace5, trace6]


fig = go.Figure(data=data)
#plotly.offline.plot(fig, filename='bar')
iplot(fig)


The barplot shows the year wise variation of total crime cases for each category added across months. Larceny is always the most happened crime in any year. Number of vandalism cases seems to be constant every year.

# 4. Stacked bar plot

In [10]:
trace1 = go.Bar(
    x=new[new['Category']=='VANDALISM'].Year,
    y=new[new['Category']=='VANDALISM'].IncidntNum,
    name = 'VANDALISM', # Style name/legend entry with html tags
    
)
trace2 = go.Bar(
    x=new[new['Category']=='BURGLARY'].Year,
    y=new[new['Category']=='BURGLARY'].IncidntNum,
    name = 'BURGLARY'
)

trace3 = go.Bar(
    x=new[new['Category']=='LARCENY/THEFT'].Year,
    y=new[new['Category']=='LARCENY/THEFT'].IncidntNum,
    name = 'LARCENY/THEFT'
)

trace4 = go.Bar(
    x=new[new['Category']=='VEHICLE THEFT'].Year,
    y=new[new['Category']=='VEHICLE THEFT'].IncidntNum,
    name = 'VEHICLE THEFT'
)
trace5 = go.Bar(
    x=new[new['Category']=='STOLEN PROPERTY'].Year,
    y=new[new['Category']=='STOLEN PROPERTY'].IncidntNum,
    name = 'STOLEN PROPERTY'
)
trace6 = go.Bar(
    x=new[new['Category']=='ARSON'].Year,
    y=new[new['Category']=='ARSON'].IncidntNum,
    name = 'ARSON'
)


data = [trace1, trace2, trace3, trace4, trace5, trace6]
layout = go.Layout(
  barmode='stack'  
)

fig = go.Figure(data=data, layout=layout)
#plotly.offline.plot(fig, filename='stack bar')
iplot(fig)

 The stacked barplot shows the year wise variation of total crime cases for each category added across months.Vehicle theft declined a lot in 2006 which may be due to change in rules and policies.

# 5. Stacked area charts

In [11]:
trace1 = go.Scatter(
    x=new1[new1['Category']=='VANDALISM'].Month,
    y=new1[new1['Category']=='VANDALISM'].IncidntNum,
    fill='tonexty',
    name = 'VANDALISM', # Style name/legend entry with html tags
    
)
trace2 = go.Scatter(
    x=new1[new1['Category']=='BURGLARY'].Month,
    y=new1[new1['Category']=='BURGLARY'].IncidntNum,
    fill='tonexty',
    name = 'BURGLARY'
)
trace3 = go.Scatter(
    x=new1[new1['Category']=='LARCENY/THEFT'].Month,
    y=new1[new1['Category']=='LARCENY/THEFT'].IncidntNum,
    fill='tonexty',
    name = 'LARCENY/THEFT'
    
)
trace4 = go.Scatter(
    x=new1[new1['Category']=='VEHICLE THEFT'].Month,
    y=new1[new1['Category']=='VEHICLE THEFT'].IncidntNum,
    fill='tonexty',
    name = 'VEHICLE THEFT'
)
trace5 = go.Scatter(
    x=new1[new1['Category']=='STOLEN PROPERTY'].Month,
    y=new1[new1['Category']=='STOLEN PROPERTY'].IncidntNum,
    fill='tonexty',
    name = 'STOLEN PROPERTY'
)
trace6 = go.Scatter(
    x=new1[new1['Category']=='ARSON'].Month,
    y=new1[new1['Category']=='ARSON'].IncidntNum,
    fill='tonexty',
    name = 'ARSON'
)


data = [trace1, trace2,trace3, trace4, trace5, trace6]


fig = go.Figure(data=data, layout=layout)
#plotly.offline.plot(fig, filename='stacked area')
iplot(fig)

The stacked areaplot shows variation of different crime cases in each month added across the year. Larceny records the highest number of cases in each month.

# 6. Heatmap

In [12]:
trace = go.Heatmap(z=new1['IncidntNum'],
                   x=new1['Month'],
                   y=new1['Category'])
data=[trace]

layout = go.Layout(
    title='Category wise yearly crime',
    xaxis = dict(ticks='', nticks=11),
    yaxis = dict(ticks='' )
)

fig = go.Figure(data=data, layout=layout)
#plotly.offline.plot(fig, filename='heatmap')
iplot(fig)

The heatmap shows the variation of different crime cases in each month added across the year. Larceny/Theft has less number of cases in January and February than other months.

# 7. Animated plot

In [13]:
years = df['Year'].unique()
years = sorted(years)
years


['2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015']

In [14]:
dataset = df

In [15]:
cats = []
for cat in df['Category']:
    if cat not in cats:
        cats.append(cat)
figure = {
    'data': [],
    'layout': {},
    'frames': []
}

figure['layout']['xaxis'] = {'range': [0, 12], 'title': 'Month'}
figure['layout']['yaxis'] = {'title': 'Number of incidents', 'range': [0, 1000]}
figure['layout']['hovermode'] = 'closest'
figure['layout']['sliders'] = {
    'args': [
        'transition', {
            'duration': 400,
            'easing': 'cubic-in-out'
        }
    ],
    'initialValue': '2014',
    'plotlycommand': 'animate',
    'values': years,
    'visible': True
}
figure['layout']['updatemenus'] = [
    {
        'buttons': [
            {
                'args': [None, {'frame': {'duration': 500, 'redraw': False},
                         'fromcurrent': True, 'transition': {'duration': 300, 'easing': 'quadratic-in-out'}}],
                'label': 'Play',
                'method': 'animate'
            },
            {
                'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate',
                'transition': {'duration': 0}}],
                'label': 'Pause',
                'method': 'animate'
            }
        ],
        'direction': 'left',
        'pad': {'r': 10, 't': 87},
        'showactive': False,
        'type': 'buttons',
        'x': 0.1,
        'xanchor': 'right',
        'y': 0,
        'yanchor': 'top'
    }
]

sliders_dict = {
    'active': 0,
    'yanchor': 'top',
    'xanchor': 'left',
    'currentvalue': {
        'font': {'size': 20},
        'prefix': 'Year:',
        'visible': True,
        'xanchor': 'right'
    },
    'transition': {'duration': 300, 'easing': 'cubic-in-out'},
    'pad': {'b': 10, 't': 50},
    'len': 0.9,
    'x': 0.1,
    'y': 0,
    'steps': []
}


Year = '2005'
for cat in cats:
    dataset_by_year = dataset[dataset['Year'] == Year]
    dataset_by_year_and_cont = dataset_by_year[dataset_by_year['Category'] == cat]

    data_dict = {
        'x': list(dataset_by_year_and_cont['Month']),
        'y': list(dataset_by_year_and_cont['IncidntNum']),
        'mode': 'markers',
        'text': list(dataset_by_year_and_cont['Category']),
        'name': cat
    }
    figure['data'].append(data_dict)
    

for year in years:
    frame = {'data': [], 'name': str(year)}
    for cat in cats:
        dataset_by_year = dataset[dataset['Year'] == year]
        dataset_by_year_and_cont = dataset_by_year[dataset_by_year['Category'] == cat]

        data_dict = {
            'x': list(dataset_by_year_and_cont['Month']),
            'y': list(dataset_by_year_and_cont['IncidntNum']),
            'mode': 'markers',
            'text': list(dataset_by_year_and_cont['Category']),
            'name': cat
        }
        frame['data'].append(data_dict)

    figure['frames'].append(frame)
    slider_step = {'args': [
        [year],
        {'frame': {'duration': 300, 'redraw': False},
         'mode': 'immediate',
       'transition': {'duration': 300}}
     ],
     'label': year,
     'method': 'animate'}
    sliders_dict['steps'].append(slider_step)

    
figure['layout']['sliders'] = [sliders_dict]
iplot(figure)
#plotly.offline.plot(figure, filename='animation')

The above animated plot shows the variation of number of incidents of all categories happening each month in each year.