In [37]:
import pandas as pd
import numpy as np

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from crime_analysis import *

In [3]:
df = crime_analysis(2019)

In [4]:
df.head()

Unnamed: 0,Date,Year,Month,Time,Tweet,Link,tokens,Cleaned_Tweet
0,2019-11-12,2019,11,01:00:39,A badly broken leg could have ended #Brampton ...,https://t.co/mgndlYyBhz,"[badly, broken, leg, could, end, brampton, nat...",badly broken leg could end brampton native tyl...
1,2019-11-12,2019,11,00:00:26,NEW BUSINESS: Align Custom Fit Footwear &amp; ...,https://t.co/hFvGVGsF8n,"[new, business, align, custom, fit, footwear, ...",new business align custom fit footwear amp foo...
2,2019-11-11,2019,11,21:05:53,‘Right time for him to immediately step down’:...,https://t.co/x4oxb4LaDD,"[right, time, immediately, step, sportsnet, cu...",right time immediately step sportsnet cut tie ...
3,2019-11-11,2019,11,20:00:19,Where were you 40 years ago during the #Missis...,https://t.co/UmHGqFToqh,"[40, year, ago, mississauga, miracle, train, d...",40 year ago mississauga miracle train derailment
4,2019-11-11,2019,11,19:57:01,Canadian Broadcast Standards Council says it c...,https://t.co/6l2RIg8EpP,"[canadian, broadcast, standard, council, say, ...",canadian broadcast standard council say take c...


In [5]:
# Testing to see if we are able to get the frequency count for January
jan = crime_topics('01', df)

jan['word'].value_counts()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.





police     34
assault     7
robbery     4
shoot       3
stab        2
Name: word, dtype: int64

In [6]:
# Let's get a list of the months that we want to get the data for 
months = []
for i in range(1,13):
    num = str(i)
    if len(num)!=2:
        num = '0' + num 
    months.append(num)
    
months

['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']

## 2019 Monthly Analysis

In [38]:
crimes = ['police', 'shoot', 'stab', 'robbery', 'assault']

In [39]:
yr_crimes = pd.DataFrame(columns=crimes)

yr_crimes

Unnamed: 0,police,shoot,stab,robbery,assault


In [40]:
for month in months:
    data_i = crime_topics(month, df)['word'].value_counts()
    yr_crimes = yr_crimes.append(data_i)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.





In [41]:
yr_crimes.index = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug' , 'Sep', 'Oct', 'Nov', 'Dec']

In [42]:
# Let's view our data 
yr_crimes

Unnamed: 0,police,shoot,stab,robbery,assault
Jan,34.0,3.0,2.0,4.0,7.0
Feb,48.0,6.0,4.0,2.0,2.0
Mar,39.0,13.0,5.0,6.0,3.0
Apr,35.0,8.0,3.0,2.0,3.0
May,41.0,4.0,4.0,4.0,4.0
Jun,37.0,2.0,10.0,1.0,2.0
Jul,35.0,4.0,1.0,2.0,1.0
Aug,36.0,9.0,1.0,3.0,2.0
Sep,39.0,12.0,5.0,4.0,2.0
Oct,53.0,8.0,3.0,13.0,4.0


As we can see here, we have the frequency of tweets with those words for all the months, November is not complete yet, and we do not have December data yet.

In [43]:
yearly_data = pd.DataFrame(yr_crimes.sum(), columns=['2019'])

yearly_data['2019'] = yearly_data['2019'].astype('int')

yearly_data

Unnamed: 0,2019
police,416
shoot,69
stab,41
robbery,46
assault,31


In [44]:
yearly_data['2019'].values

array([416,  69,  41,  46,  31])

## Visualization with Plot.ly
We want to use the helper functions we have to visualize this in a nice monthly analysis table with an overall yearly frequency histogram for the year

In [46]:
# Initialize the figure
fig = make_subplots(rows=2, cols=1, 
                    specs=[[{'type': 'table'}], [{'colspan': 1}]])

# Create traces
monthly_analysis = go.Table(
    header=dict(values=yr_crimes.columns),
    cells=dict(values=[yr_crimes['police'],
                       yr_crimes['shoot'],
                       yr_crimes['stab'],
                       yr_crimes['robbery'],
                       yr_crimes['assault']]))

yearly_freq = go.Bar(x=crimes, y=yearly_data['2019'].values, name='Overall Year Statistics')

# Add all the traces
fig.add_trace(monthly_analysis, row=1, col=1)
fig.add_trace(yearly_freq, row=2, col=1)

# Update layout
fig.update_layout(height=1000, width=1000, title_text = 'Mississauga 2019 Crime Analysis')

fig.show()