In [1]:
# traitement des données
import pandas as pd
import copy
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from folium.plugins import MarkerCluster, FastMarkerCluster, HeatMapWithTime

In [2]:
from bokeh.io import show, output_notebook, push_notebook
from bokeh.plotting import figure
from bokeh.models import CategoricalColorMapper, HoverTool, ColumnDataSource, Panel
from bokeh.models.widgets import CheckboxGroup, Slider, RangeSlider, Tabs,CheckboxButtonGroup
from bokeh.layouts import column, row, WidgetBox
from bokeh.palettes import Category20_16, Viridis256
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application

output_notebook()

In [3]:
Path_Socio = "../data/raw/Census_Data_-_Selected_socioeconomic_indicators_in_Chicago__2008___2012.csv"
Path_Crime = "../data/raw/Crimes_-_2001_to_present.csv"
Path_geo_json = "../data/raw/chicago_police_districts.geojson"
df_Socio = pd.read_csv(Path_Socio)
df_Crime = pd.read_csv(Path_Crime, sep=';')

In [4]:
my_color = ['#a6cee3', '#1f78b4', '#b2df8a', '#a6cee3', '#1f78b4', '#b2df8a',\
            '#33a02c', '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99',\
            '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c',\
            '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c', \
            '#fdbf6f', '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', \
            '#e31a1c', '#fdbf6f', '#ff7f00', '#a6cee3', '#1f78b4', '#b2df8a', \
            '#33a02c', '#fb9a99', '#e31a1c', '#fdbf6f', '#ff7f00', '#cab2d6', \
            '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c', \
            '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#a6cee3', '#1f78b4', \
            '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c', '#fdbf6f', '#ff7f00', \
            '#cab2d6', '#6a3d9a', '#ffff99', '#a6cee3', '#1f78b4', '#b2df8a', \
            '#33a02c', '#fb9a99', '#e31a1c', \
            '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#ffff99', '#b15928']

In [5]:
def rename_columns_socio():
    """
    
    """
    return {
'Community Area Number':'community_area_number',
'COMMUNITY AREA NAME':'community_area_name', 
'PERCENT OF HOUSING CROWDED':'pct_housing_crowded', 
'PERCENT HOUSEHOLDS BELOW POVERTY':'pct_households_below_poverty', 
'PERCENT AGED 16+ UNEMPLOYED':'pct_age16_unemployed',
'PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA':'pct_age25_no_highschool',
'PERCENT AGED UNDER 18 OR OVER 64':'pct_not_working_age',
'per_capita_income':'per_capita_income',
'HARDSHIP INDEX':'hardship_index',
'PER CAPITA INCOME ':'per_capita_income'}

def rename_columns_crimes():
    """
    
    """
    return {
        'ID': 'id',
        'Case Number': 'cas_number', 
        'Date':'date',
        'Block':'block', 
        'IUCR':'iucr', 
        'Primary Type':'primary_type',
        'Description':'description', 
        'Location Description':'location_description', 
        'Arrest':'arrest', 
        'Domestic':'domestic', 
        'Beat':'beat',
        'District':'district', 
        'Ward':'ward', 
        'Community Area': 'community_area_number', 
        'FBI Code':'fbi_code', 
        'X Coordinate':'x_coordinate',
        'Y Coordinate':'y_coordinate', 
        'Year':'year', 
        'Updated On':'updated_on', 
        'Latitude':'latitude', 
        'Longitude':'longitude',
        'Location':'location'   
    }

In [6]:
def df_by_year(year):
    df_Crime_year = df_Crime[df_Crime['year']==year]
    df_src = pd.merge(df_Crime_year, df_Socio, on='community_area_number', how='left')
    group = df_src.groupby(['community_area_name','primary_type'])
    df_grouped = df_src.groupby(['community_area_name','primary_type'],\
                                as_index=False).id.count().rename(columns={'id':'nb_crimes'})
    del df_Crime_year
    del group
    return df_grouped  

In [7]:
df_Socio.rename(columns=rename_columns_socio(), inplace=True)
df_Crime.rename(columns=rename_columns_crimes(), inplace=True)

In [8]:
df_Crime_2012 = df_Crime[df_Crime['year'] == 2012]
df_src = pd.merge(df_Crime_2012, df_Socio, on='community_area_number', how='left')
df_src = df_src[['id', 'primary_type', 'community_area_name']]

In [9]:
df_grouped = df_src.groupby(['community_area_name','primary_type'], as_index=False).id.count().rename(columns={'id':'nb_crimes'})

In [10]:
df_grouped = df_src.groupby(['community_area_name','primary_type'], as_index=False).id.count().rename(columns={'id':'nb_crimes'})

In [11]:
list_primary_type = df_grouped.primary_type.unique().tolist()
list_community_area = df_grouped.community_area_name.unique().tolist()

In [12]:
carrier_selection = CheckboxButtonGroup(labels=list_community_area, active = [0, 1])
#show(carrier_selection)

In [13]:
def modify_doc(doc):
    def make_dataset(list_community_area, year):
        import random
        list_df_total=[]
        df_grouped=df_by_year(year)
        random.shuffle(my_color, random.random)
        for i, community in enumerate(list_community_area):
            subset=df_grouped[df_grouped['community_area_name']==community]
            subset_copy= copy.deepcopy(subset) 
            #subset['community_area_name'].fillna(community,inplace =True)
            subset_copy['nb_crimes'].fillna(0, inplace=True)
            subset_copy['color']=  my_color[i]
            list_df_total.append(subset_copy)
        del df_grouped
        del subset
        return ColumnDataSource(pd.concat(list_df_total))
    def style(p):
        p.y_range.start = 0
        p.x_range.range_padding = 0.05
        p.xgrid.grid_line_color = None
        p.xaxis.axis_label = "type of crime"
        p.xaxis.major_label_orientation = 1.2
        #p.outline_line_color = None
        return p
        
    def make_plot(src):
        # Blank plot with correct labels
        p = figure(plot_width = 700, plot_height = 900, title='crimes by community',\
                   x_axis_label='community', y_axis_label='nb_crimes',\
           x_range=list_primary_type)

        p.vbar(x='primary_type', top='nb_crimes', width=1, source=src, \
               color='color', hover_fill_color='color', line_color="white")
        # Hover tool with vline mode
        hover = HoverTool(tooltips=[('community_area_name', '@community_area_name'), 
                                    ('primary_type', '@primary_type'),
                                    ('nb_crimes', '@nb_crimes')], mode='vline')
        p.add_tools(hover)
        # Styling
        p = style(p)
        return p
    
    def update(attr, old, new):
        carriers_to_plot = [carrier_selection.labels[i] for i in 
                            carrier_selection.active]
        new_src = make_dataset(carriers_to_plot,year=year_select.value)
        
        # Update the source used the quad glpyhs
        src.data.update(new_src.data)
        
    carrier_selection = CheckboxButtonGroup(labels=list_community_area, active = [0])
    carrier_selection.on_change('active', update)
    year_select = Slider(start=2001, end=2017, 
                         step=1, value=2001,
                         title='year')
    year_select.on_change('value', update)
    controls = WidgetBox(year_select, carrier_selection)
    initial_carriers = [carrier_selection.labels[i] for i in carrier_selection.active]
    src = make_dataset(initial_carriers, year=year_select.value)
    p = make_plot(src)
    #l = layout([controls,p], sizing_mode='stretch_both')
    layout = row(controls, p)
    doc.add_root(layout)   

In [14]:
# Set up an application
handler = FunctionHandler(modify_doc)
app = Application(handler)

In [15]:
show(app)