In [1]:
%matplotlib inline
import pandas as pd
import geopandas as gpd
import numpy as np
import folium
from shapely.geometry import Polygon, Point 
import folium.colormap as cm

In [2]:
crime_chicago = pd.read_pickle('crime_chicago_with_timestamp.pkl')

#### Binning

In this part we tried using different numbers of bins, and we checked how much details the map has.

In [27]:
number_of_bins = 100
no = number_of_bins

lat_min = crime_chicago['Latitude'].min()
lon_min = crime_chicago['Longitude'].min()

lat_span = crime_chicago['Latitude'].max() - crime_chicago['Latitude'].min()
lon_span = crime_chicago['Longitude'].max() - crime_chicago['Longitude'].min()

lat_step = lat_span / no
lon_step = lon_span / no

In [28]:
crime_chicago['bin'] = \
    no*np.floor((crime_chicago['Longitude']-lon_min)/lon_step) + \
       np.floor((crime_chicago['Latitude'] -lat_min)/lat_step)

In [29]:
def getCornerLatLonForBin(bin_no):
    return (
        lat_min + (bin_no % no)         *lat_step, 
        lon_min + np.floor(bin_no / no) *lon_step
    )

In [30]:
def getPolygonForBin(bin_no):
    lat, lon = getCornerLatLonForBin(bin_no)
    return Polygon([
            (lat,            lon),
            (lat,            lon + lon_step),
            (lat + lat_step, lon + lon_step),
            (lat + lat_step, lon),
        ])

In [31]:
def getPolygonForBinReverse(bin_no):
    lat, lon = getCornerLatLonForBin(bin_no)
    return Polygon([
            (lon,            lat),
            (lon + lon_step, lat),
            (lon + lon_step, lat + lat_step),
            (lon,            lat + lat_step),
        ])

#### Grouping

In [32]:
crime_chicago_count = crime_chicago[['bin']].groupby(['bin']).size().reset_index().rename(columns={0:'count'})

We explore the frequency of crimes - over the whole period. We want to have some reasonable number of crimes per day.

In [2]:
crime_chicago_count['count'].describe()

NameError: name 'crime_chicago_count' is not defined

In [3]:
crime_chicago['Primary Type'].unique()

NameError: name 'crime_chicago' is not defined

Again, we explore the crimes. This time by their type - these ones are some of the most common. 

In [35]:
len(crime_chicago)

5846691

In [36]:
len(crime_chicago[crime_chicago['Primary Type']=='BATTERY'])

1073217

In [37]:
len(crime_chicago[crime_chicago['Primary Type']=='CRIMINAL DAMAGE'])

675922

In [38]:
crime_chicago[crime_chicago['Primary Type']=='THEFT']

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location,day,datetime,bin
7,10362148,HY553207,12/28/2015 11:08:00 PM,016XX S HOMAN AVE,0820,THEFT,$500 AND UNDER,VEHICLE NON-COMMERCIAL,False,False,...,30499,77017,2015,01/04/2016 04:02:14 PM,41.858409,-87.710317,"(41.858408833, -87.710317158)",12/28/2015,2015-12-28,3757
13,10363299,HY555030,12/28/2015 11:00:00 PM,034XX N NATOMA AVE,0810,THEFT,OVER $500,STREET,False,False,...,7995,107607,2015,01/04/2016 04:02:14 PM,41.942770,-87.792214,"(41.942770267, -87.792214317)",12/28/2015,2015-12-28,979
17,10362487,HY553386,12/28/2015 10:45:00 PM,028XX W ARDMORE AVE,0810,THEFT,OVER $500,STREET,False,False,...,33066,123808,2015,01/04/2016 04:02:14 PM,41.986755,-87.699625,"(41.986755468, -87.699625337)",12/28/2015,2015-12-28,4191
20,10363518,HY555428,12/28/2015 10:30:00 PM,031XX S INDIANA AVE,0820,THEFT,$500 AND UNDER,STREET,False,False,...,54643,69717,2015,01/04/2016 04:02:14 PM,41.837862,-87.621916,"(41.837862186, -87.621915865)",12/28/2015,2015-12-28,6751
38,10363555,HY555425,12/28/2015 09:28:00 PM,066XX S WESTERN AVE,0810,THEFT,OVER $500,GAS STATION,False,False,...,38049,45875,2015,01/04/2016 04:02:14 PM,41.772798,-87.683468,"(41.77279778, -87.683468276)",12/28/2015,2015-12-28,4634
49,10361580,HY553363,12/28/2015 09:00:00 PM,010XX N ASHLAND AVE,0820,THEFT,$500 AND UNDER,STREET,False,False,...,42073,92249,2015,01/04/2016 04:02:14 PM,41.899968,-87.667399,"(41.899968354, -87.667399375)",12/28/2015,2015-12-28,5268
50,10361648,HY553400,12/28/2015 09:00:00 PM,058XX W FILLMORE ST,0820,THEFT,$500 AND UNDER,STREET,False,False,...,14212,80161,2015,01/04/2016 04:02:14 PM,41.867345,-87.770026,"(41.867345159, -87.77002594)",12/28/2015,2015-12-28,1759
57,10362194,HY553946,12/28/2015 09:00:00 PM,078XX S EVANS AVE,0820,THEFT,$500 AND UNDER,STREET,False,False,...,59113,38654,2015,01/04/2016 04:02:14 PM,41.752520,-87.606477,"(41.752519908, -87.606477362)",12/28/2015,2015-12-28,7228
62,10361412,HY553133,12/28/2015 08:45:00 PM,079XX S PHILLIPS AVE,0820,THEFT,$500 AND UNDER,APARTMENT,False,True,...,70433,38240,2015,01/04/2016 04:02:14 PM,41.751114,-87.565009,"(41.751113941, -87.565008576)",12/28/2015,2015-12-28,8628
76,10361917,HY553584,12/28/2015 08:00:00 PM,002XX S CANAL ST,0870,THEFT,POCKET-PICKING,OTHER RAILROAD PROP / TRAIN DEPOT,False,False,...,49707,84643,2015,01/04/2016 04:02:14 PM,41.878931,-87.639586,"(41.878931004, -87.639585621)",12/28/2015,2015-12-28,6162


In [39]:
len(crime_chicago[crime_chicago['Primary Type']=='THEFT'])

1202328

#### Transforming to GeoDataFrame

In [40]:
crime_chicago_count_gpd = None

In [41]:
crime_chicago_count_gpd = gpd.GeoDataFrame(crime_chicago_count)

In [42]:
crime_chicago_count_gpd.geometry = crime_chicago_count_gpd['bin'] \
    .map(lambda x: getPolygonForBinReverse(x))

In [43]:
max_count = crime_chicago_count_gpd['count'].max()
crime_chicago_count_gpd['relative_count'] = crime_chicago_count_gpd['count']/max_count

In [44]:
linear = cm.LinearColormap(['darkgreen','yellow','darkred'])
linear

In [45]:
crime_chicago_count_gpd['style'] = crime_chicago_count_gpd['relative_count'] \
    .map(lambda x: {'fillColor' : linear(x), 'weight' : 0})

In [46]:
crime_chicago_count_gpd.head(1)

Unnamed: 0,bin,count,geometry,relative_count,style
0,78,370,"POLYGON ((-87.82109968499999 41.93618273014, -...",0.017246,"{u'fillColor': u'#086900', u'weight': 0}"


In [47]:
#coordinates of the new rectangle
#(41.910902, -87.653335) (41.911151, -87.626320)
#(41.867355, -87.644117) (41.867477, -87.623653)

#so boundaries from 41.867355 to 41.911151 and -87.653335 to -87.623653

In [48]:
crime_chicago_count_gpd.crs = {'init': 'epsg:4326', 'no_defs': True}

In [49]:
m = folium.Map([41.80,-87.75], zoom_start=11, tiles='cartodbpositron')

folium.GeoJson(crime_chicago_count_gpd).add_to(m)

m

May be useful later

In [50]:
# m = folium.Map([41.80,-87.75], zoom_start=11, tiles='cartodbpositron')

# folium.GeoJson(
#     crime_chicago_count_gpd,
#     style_function=lambda feature: {
#         'fillColor': linear(crime_chicago_count_gpd[]),
#         'color' : 'black',
#         'weight' : 2,
#         'dashArray' : '5, 5'
#         }).add_to(m)

# m