In [2]:
# traitement des données
import pandas as pd
import numpy as np
# Visualisation
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from folium import plugins
from folium.plugins import MarkerCluster, FastMarkerCluster, HeatMapWithTime

In [3]:
from bokeh.io import show, output_notebook, push_notebook
from bokeh.plotting import figure
from bokeh.models import CategoricalColorMapper, HoverTool, ColumnDataSource, Panel
from bokeh.models.widgets import CheckboxGroup, Slider, RangeSlider, Tabs
from bokeh.layouts import column, row, WidgetBox
from bokeh.palettes import Category20_16, Viridis256
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application

output_notebook()

In [4]:
Path_Socio = "../data/raw/Census_Data_-_Selected_socioeconomic_indicators_in_Chicago__2008___2012.csv"
Path_Crime = "../data/raw/Crimes_-_2001_to_present.csv"
Path_geo_json = "../data/raw/chicago_police_districts.geojson"
df_Socio = pd.read_csv(Path_Socio)
df_Crime = pd.read_csv(Path_Crime, sep=';')

In [5]:
def rename_columns_socio():
    """
    
    """
    return {
'Community Area Number':'community_area_number',
'COMMUNITY AREA NAME':'community_area_name', 
'PERCENT OF HOUSING CROWDED':'pct_housing_crowded', 
'PERCENT HOUSEHOLDS BELOW POVERTY':'pct_households_below_poverty', 
'PERCENT AGED 16+ UNEMPLOYED':'pct_age16_unemployed',
'PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA':'pct_age25_no_highschool',
'PERCENT AGED UNDER 18 OR OVER 64':'pct_not_working_age',
'per_capita_income':'per_capita_income',
'HARDSHIP INDEX':'hardship_index',
'PER CAPITA INCOME ':'per_capita_income'}

def rename_columns_crimes():
    """
    
    """
    return {
        'ID': 'id',
        'Case Number': 'cas_number', 
        'Date':'date',
        'Block':'block', 
        'IUCR':'iucr', 
        'Primary Type':'primary_type',
        'Description':'description', 
        'Location Description':'location_description', 
        'Arrest':'arrest', 
        'Domestic':'domestic', 
        'Beat':'beat',
        'District':'district', 
        'Ward':'ward', 
        'Community Area': 'community_area_number', 
        'FBI Code':'fbi_code', 
        'X Coordinate':'x_coordinate',
        'Y Coordinate':'y_coordinate', 
        'Year':'year', 
        'Updated On':'updated_on', 
        'Latitude':'latitude', 
        'Longitude':'longitude',
        'Location':'location'   
    }

In [6]:
df_Socio.rename(columns=rename_columns_socio(), inplace = True)
df_Crime.rename(columns= rename_columns_crimes(), inplace= True)

In [11]:
df_Crime_2012 = df_Crime[df_Crime['year']==2012]

In [12]:
df_src=pd.merge(df_Crime_2012, df_Socio, on='community_area_number', how='left')

In [13]:
df_grouped = df_src.groupby(['community_area_name','primary_type'],\
                            as_index=False).id.count().rename(columns={'id':'nb_crimes'})

In [14]:
df_grouped.sample(10)

Unnamed: 0,community_area_name,primary_type,nb_crimes
77,Ashburn,DECEPTIVE PRACTICE,137
754,Humboldt park,CRIMINAL DAMAGE,1014
1702,West Garfield Park,OTHER OFFENSE,249
1414,Riverdale,SEX OFFENSE,12
1461,Roseland,PROSTITUTION,135
111,Auburn Gresham,PROSTITUTION,18
502,Edgewater,BATTERY,525
692,Greater Grand Crossing,KIDNAPPING,4
1245,North Lawndale,KIDNAPPING,4
143,Austin,THEFT,3276


In [22]:
list_primary_type = df_grouped.primary_type.unique().tolist()
list_community_area = df_grouped.community_area_name.unique().tolist()

In [23]:
p = figure()

In [None]:
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ["2015", "2016", "2017"]
colors = ["#c9d9d3", "#718dbf", "#e84d60"]

data = {'fruits' : fruits,
        '2015'   : [2, 1, 4, 3, 2, 4],
        '2016'   : [5, 3, 4, 2, 4, 6],
        '2017'   : [3, 2, 4, 4, 5, 3]}

source = ColumnDataSource(data=data)

## Note [0:3] for first 3 fruits only in default graph
p = figure(x_range=fruits[0:3], plot_height=350, title="Fruit Counts by Year",
           toolbar_location=None, tools="")

renderers= p.vbar_stack(years, x='fruits', width=0.9, color=colors, source=source, \
                         legend=[value(x) for x in years], name=years)


p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"

print(renderers[0].data_source.data['fruits'])

callback = CustomJS(args=dict(fig=p, xr=renderers[0].data_source.data['fruits']), code="""
    var A = slider.value;
    fig.x_range.factors = [];
    for (i = 0; i < A; i++) {
      fig.x_range.factors.push(xr[i])
    }
""")
p.x_range.js_on_change('factors', callback)
slider = Slider(start=1, end=6, value=3, step=1, title="Range", callback=callback)
callback.args["slider"] = slider
layout = column(p, widgetbox(slider))
output_file("ChangenumCat.html")
show(layout) 