In [1]:
import numpy as np
import urllib.request
import pandas as pd
from bokeh.plotting import *
from bokeh.models import *
from bokeh.io import output_file, show
import seaborn as sns

In [2]:
#Import data
df = pd.read_csv('/Users/rasmuskongsted/Documents/Danmarks Tekniske Universitet/DTU/10. semester/Dataanalyse/Datasæt/Police_Department_Incident_Reports__Historical_2003_to_May_2018.csv')

In [3]:
focuscrimes = set(['WEAPON LAWS', 'PROSTITUTION', 'DRIVING UNDER THE INFLUENCE', 'ROBBERY', 'BURGLARY', 'ASSAULT', 'DRUNKENNESS', 'DRUG/NARCOTIC', 'TRESPASS', 'LARCENY/THEFT', 'VANDALISM', 'VEHICLE THEFT', 'STOLEN PROPERTY', 'DISORDERLY CONDUCT'])
df = df[df['Category'].isin(focuscrimes)]

In [4]:
#Change date column into date format
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
#Create year column
df['Year'] = df['Date'].dt.year
#Remove year 2003-2009 and 2018
#df = df[~df['Year'].isin(range(2003, 2010)) & (df['Year'] != 2018)]
#Remove 2018
df = df[df['Year'] != 2018]

In [5]:
year_crimes = df.groupby(['Year', 'Category']).size().unstack()

In [6]:
#Normalizing data for easy comparison
normalized_crimes = year_crimes.div(year_crimes.sum(axis=0), axis=1)
normalized_crimes.index = normalized_crimes.index.astype(str)
#year_crimes.index = year_crimes.index.astype(str)

In [7]:
df_bokeh = ColumnDataSource(normalized_crimes)
#df_bokeh = ColumnDataSource(year_crimes)

In [8]:
output_notebook()

In [9]:
years = [str(2003 + i) for i in range(15)]
colors = sns.color_palette("husl", len(focuscrimes))
colors = ['#%02x%02x%02x' % (int(r * 255), int(g * 255), int(b * 255)) for r, g, b in colors]

p = figure(x_range = FactorRange(factors=years), title="Crimes per year (Normalized values)", x_axis_label='Year', y_axis_label='Crimes (Normalized)', height=400, width=1000) 
k = 0

bar ={}
items = []
for indx,i in enumerate(focuscrimes):
    bar[i] = p.vbar(x='Year',  top=i, source = df_bokeh, width=0.7,
                      muted_alpha=0.05, muted = True, fill_color = colors[indx])

     ### for the custom legend // you need to figure out where to add it
    items.append((i, [bar[i]])) ### figure where to add it
    legend = Legend(items=items, location=(0, 0)) ## figure where to add it


p.add_layout(legend, 'left')
p.legend.click_policy="mute" ### assigns the click policy (you can try to use ''hide'


output_file("/Users/rasmuskongsted/Documents/Danmarks Tekniske Universitet/DTU/10. semester/Dataanalyse/Gitpage/Madsem2.github.io/bokeh.html")
show(p) #displays your plot

The bokeh plot above is based on the one from week 6. It shows the development for all focuscrimes in the years from 2003 to 2017. The data is normalized so that all crimes are more easily interpreted.
By using the interactive function in the plot we have been able to investigate interesting patterns for all of the crimes. Some of the more interesting crimes are 'Drug/narcotic' falling from 2009 to 2017, 'Vehicle Theft' suddently being reduced from 2005 to 2006, and prostetution falling from 2007 to 2017.
For this project we have chosen to continue with 'Vehicle theft'.