### Objectifs de ce notebook :
>Utilisation des données du site https://data.cityofchicago.org/ sur la criminalité ainsi que des données  socio-économiques du ministère de la Santé de Chicago https://data.cityofchicago.org/Health-Human-Services/Census-Data-Selected-socioeconomic-indicators-in-C/kn9c-c2s2.
- Explication des variables (nom des colonnes)
- Visualisation des différentes données 
- Détection des différentes corrélations entre les variables
- Agrégation du nombre de crime par mois, par région et par type
- Visualisation

In [1]:
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import IFrame
import datetime
import warnings; warnings.simplefilter('ignore')

In [2]:
# traitement des données
import pandas as pd
import numpy as np
# Visualisation
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from folium import plugins
from folium.plugins import MarkerCluster, FastMarkerCluster, HeatMapWithTime
import folium

In [3]:
chicago = location=[41.85, -87.68]

In [4]:
Path_figures = "../reports/figures/"

In [5]:
Path_Socio = "../data/raw/Census_Data_-_Selected_socioeconomic_indicators_in_Chicago__2008___2012.csv"
Path_Crime = "../data/raw/Crimes_-_2001_to_present.csv"
Path_geo_json = "../data/raw/chicago_police_districts.geojson"
df_Socio = pd.read_csv(Path_Socio)
df_Crime = pd.read_csv(Path_Crime, sep=';')

In [6]:
def rename_columns_socio():
    """
    
    """
    return {
'Community Area Number':'community_area_number',
'COMMUNITY AREA NAME':'community_area_name', 
'PERCENT OF HOUSING CROWDED':'pct_housing_crowded', 
'PERCENT HOUSEHOLDS BELOW POVERTY':'pct_households_below_poverty', 
'PERCENT AGED 16+ UNEMPLOYED':'pct_age16_unemployed',
'PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA':'pct_age25_no_highschool',
'PERCENT AGED UNDER 18 OR OVER 64': 'pct_not_working_age',
'per_capita_income':'per_capita_income',
'HARDSHIP INDEX' : 'hardship_index'}

def rename_columns_crimes():
    """
    
    """
    return {
        'ID': 'id',
        'Case Number': 'cas_number', 
        'Date':'date',
        'Block':'block', 
        'IUCR':'iucr', 
        'Primary Type':'primary_type',
        'Description':'description', 
        'Location Description':'location_description', 
        'Arrest':'arrest', 
        'Domestic':'domestic', 
        'Beat':'beat',
        'District':'district', 
        'Ward':'ward', 
        'Community Area': 'community_area_number', 
        'FBI Code':'fbi_code', 
        'X Coordinate':'x_coordinate',
        'Y Coordinate':'y_coordinate', 
        'Year':'year', 
        'Updated On':'updated_on', 
        'Latitude':'latitude', 
        'Longitude':'longitude',
        'Location':'location'   
    }

df_Socio.rename(columns=rename_columns_socio(), inplace = True)
df_Crime.rename(columns= rename_columns_crimes(), inplace= True)
df_Crime['date'] = df_Crime['date'].apply(lambda x: datetime.datetime.strptime(x,"%m/%d/%Y %I:%M:%S %p"))

In [7]:
list_crimes = list(df_Crime.primary_type.unique())

In [8]:
def toString(x):
    return str(int(x))

In [9]:
def plot_bar(type_crime):
    plt.figure(figsize=(12,6))
    df = df_Crime[df_Crime['primary_type']==type_crime]
    sns.barplot(x='year', y=type_crime,\
                data=df.groupby(['year'])['primary_type'].value_counts().\
                unstack().reset_index(),color='red').set_title("CHICAGO MURDER RATES: 2001 - 2018")

In [10]:
def plot_crimes(type_crime, year):
    df = df_Crime[df_Crime['primary_type']==type_crime]
    df= df[df['year']==year]
    df = df.loc[(df['x_coordinate']!=0)]
    sns.lmplot('x_coordinate',
           'y_coordinate',
           data=df[:],
           fit_reg=False, 
           hue="district", 
           palette='Dark2',
           height=15,
           ci=2,
           scatter_kws={"marker": "D", "s": 10}) 
    ax = plt.gca()
    ax.set_title("All " + type_crime + " " + str(year) +  " per District")
    
def plot_crime_heatmap(type_crime):
    df = df_Crime[df_Crime.primary_type==type_crime]
    lats = list(df.latitude)
    longs = list(df.longitude)
    locations = [lats,longs]
    m = folium.Map(chicago, zoom_start=9.5,control_scale = False)
    plugins.Fullscreen(
    position='topright',
    title='Expand me',
    title_cancel='Exit me',
    force_separate_button=True).add_to(m)
    m.choropleth(
    geo_data=Path_geo_json,
    name='choropleth',
    data=df,
    columns=['district', 'arrest'],
    key_on='feature.properties.dist_num',
    fill_color='YlOrRd', 
    fill_opacity=0.2, 
    line_opacity=0.2,
    legend_name='Homicides : 2001-2017',
    highlight=True)
    heat_df = df[df['year']>=2001].reset_index()
    heat_df = heat_df[['latitude', 'longitude','year']]
    heat_df['Weight'] = heat_df['year'].astype(float)
    heat_df = heat_df.dropna(axis=0, subset=['latitude','longitude', 'Weight'])
    heat_data = [[[row['latitude'],row['longitude']] for index, row in\
            heat_df[heat_df.Weight == i].iterrows()] for i in range(2001,2018)]
    m.add_child(plugins.HeatMapWithTime(data=heat_data,
                                   auto_play=True,
                                   max_opacity=0.8,
                                   display_index=True,
                                   radius=9,
                                   name='HeatMapWithTime'))
    folium.TileLayer('cartodbpositron').add_to(m)
    folium.LayerControl().add_to(m)
    m.save("map4.html") 
    display(IFrame('map4.html', width=990, height=700))
# geo locations of homicides -- January, February 2018

def plot_crime_map(type_crime, year):
    df = df_Crime[df_Crime.primary_type==type_crime]
    df= df[df['year']==year].dropna()
    lats = list(df.latitude)
    longs = list(df.longitude)
    locations = [lats,longs]

    m = folium.Map(
    location=[np.mean(lats), np.mean(longs)],
    zoom_start=10.3)
    plugins.Fullscreen(
        position='topright',
        title='Expand me',
        title_cancel='Exit me',
        force_separate_button=True).add_to(m)

    FastMarkerCluster(data=list(zip(lats, longs))).add_to(m)

    m.choropleth(
        geo_data=Path_geo_json,
        name='choropleth',
        data=df,
        columns=['district', 'arrest'],
        key_on='feature.properties.dist_num',
        fill_color='YlOrRd', 
        fill_opacity=0.4, 
        line_opacity=0.2,
        legend_name='Homicides : January, February 2018',
        highlight=False)

    folium.TileLayer('cartodbpositron').add_to(m)
    folium.LayerControl().add_to(m)
    m.save(Path_figures + type_crime + "_"+ str(year) + ".html") 
    display(IFrame(Path_figures + type_crime + "_"+ str(year)  + ".html", width=700, height=700))
    

In [11]:
interact(plot_crimes, type_crime = list_crimes, year=widgets.IntSlider(min=2001,max=2018, step=1, value=2001));
interact(plot_bar, type_crime = list_crimes);
interact(plot_crime_map, type_crime=list_crimes, year=widgets.IntSlider(min=2001,max=2018, step=1, value=2001));

interactive(children=(Dropdown(description='type_crime', options=('BATTERY', 'OTHER OFFENSE', 'ROBBERY', 'NARC…

interactive(children=(Dropdown(description='type_crime', options=('BATTERY', 'OTHER OFFENSE', 'ROBBERY', 'NARC…

interactive(children=(Dropdown(description='type_crime', options=('BATTERY', 'OTHER OFFENSE', 'ROBBERY', 'NARC…

In [12]:
interact(plot_crime_heatmap, type_crime=list_crimes);

interactive(children=(Dropdown(description='type_crime', options=('BATTERY', 'OTHER OFFENSE', 'ROBBERY', 'NARC…