# Download the Data

In [None]:
import requests  
file_url='https://data.cityofchicago.org/api/views/kf7e-cur8/rows.csv?accessType=DOWNLOAD'
r = requests.get(file_url, stream = True)  
  
with open("chicago.csv", "wb") as file:  
    for block in r.iter_content(chunk_size = 1024): 
        if block:  
            file.write(block)

# Importations 

In [166]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns  #advanced visualization library
import warnings
warnings.filterwarnings('ignore')

# Fast EDA 

In [167]:
import pandas as pd 
data = pd.read_csv('chicago.csv')

### Data description

* TIME : Timestamp of the record 

* RGION_ID : Unique arbitrary number to represent each region 

* SPEED	: Estimated congestion level. Although expressed in miles per hour, this value is more a reflection of the congestion level in the region than it is indicative of the average raw speed vehicles are travelling within the region.

* REGION : 	Name of the region.

* BUS_COUNT	: The number of buses used to estimate traffic.

* NUM_READS : Number of GPS probes received(or used) for estimating the speed for that segment.

* WEST: Approximate longitude of the west edge of the region.

* EAST: Approximate longitude of the east edge of the region.

* SOUTH : Approximate latitude of the south edge of the region.

* NORTH : Approximate latitude of the north edge of the region.

* NW_LOACATION : The location corresponding to the intersection of NORTH and WEST in a format that allows for creation of maps and other geographic operations on this data portal.

* SE_LOCATION	: The location corresponding to the intersection of SOUTH and EAST in a format that allows for creation of maps and other geographic operations on this data portal.

In [168]:
data["TIME"]=pd.to_datetime(data["TIME"], format="%m/%d/%Y %I:%M:%S %p")

In [169]:
data=data[~data["SPEED"].isin([0])]
data=data[~data["HOUR"].isin([21,22,23,0,1,2,3,4,5,6])]

In [171]:
data['DAY'] = data['TIME'].dt.day
data['MONTH'] = data['TIME'].dt.month
data['YEAR'] = data['TIME'].dt.year


Faire une liste des regions (j'en aurai besoin pour les maps)

In [172]:
list_REGION = []
for i in range(29) : 
    reg = data[(data['REGION_ID']==i+1)].REGION.unique()[0]
    list_REGION.append(reg)

In [173]:
data = data.groupby(['REGION_ID','MONTH','DAY','YEAR','HOUR','NORTH','WEST','EAST', 'SOUTH','DAY_OF_WEEK'])[['SPEED','BUS_COUNT','NUM_READS']].agg('mean').reset_index()

In [174]:
data["SPEEDKM"]=data["SPEED"]*1.609

In [175]:
data['MINUTE'] = '00'
data['Time'] = pd.to_datetime(data[['YEAR','MONTH','DAY','HOUR','MINUTE']].astype(str).agg('-'.join,axis=1),format='%Y-%m-%d-%H-%M')

In [176]:
data['CENTER_LAT']=data['NORTH']*0.5+0.5*data['SOUTH']
data['CENTER_LON']=data['EAST']*0.5+0.5*data['WEST']
data['Time'] = data.Time.dt.strftime("%a, %d %b, %Y at %l:%M %p")
data.head()


Unnamed: 0,REGION_ID,MONTH,DAY,YEAR,HOUR,NORTH,WEST,EAST,SOUTH,DAY_OF_WEEK,SPEED,BUS_COUNT,NUM_READS,SPEEDKM,MINUTE,Time,CENTER_LAT,CENTER_LON
0,1,1,1,2019,7,42.026444,-87.709645,-87.654561,41.997946,3,25.556667,13.166667,321.833333,41.120677,0,"Tue, 01 Jan, 2019 at 7:00 AM",42.012195,-87.682103
1,1,1,1,2019,8,42.026444,-87.709645,-87.654561,41.997946,3,25.183333,15.333333,346.666667,40.519983,0,"Tue, 01 Jan, 2019 at 8:00 AM",42.012195,-87.682103
2,1,1,1,2019,9,42.026444,-87.709645,-87.654561,41.997946,3,24.568333,16.666667,372.833333,39.530448,0,"Tue, 01 Jan, 2019 at 9:00 AM",42.012195,-87.682103
3,1,1,1,2019,10,42.026444,-87.709645,-87.654561,41.997946,3,23.805,16.333333,370.5,38.302245,0,"Tue, 01 Jan, 2019 at 10:00 AM",42.012195,-87.682103
4,1,1,1,2019,11,42.026444,-87.709645,-87.654561,41.997946,3,23.521667,19.5,402.166667,37.846362,0,"Tue, 01 Jan, 2019 at 11:00 AM",42.012195,-87.682103


# first steps with Folium 

some things to know about Folium 
* Maps are defined as a folium.Map object, and we can add other folium objects on top of the folium.Map to improve/add to the map rendered
* Folium allows the users to select different map projections. We will be using the Spherical Mercator projection in this article since we are visualizing a relatively small area size, and it is a more commonly used projection.
* We can use different map tiles for the map rendered by Folium, such as from OpenStreetMap (The one I will be using in this tutorial), MapBox (Using tiles from MapBox require you to put the API Key that you have for MapBox licence as one of the arguments), and several other tiles that you can see from this github repo folder or this documentation page

In [177]:
import folium 
from folium import plugins
from folium.plugins import HeatMapWithTime

In [178]:
def generateBaseMap(default_location=[40.693943, -73.985880] ,default_zoom_start=10):
    base_map = folium.Map(location=default_location,  control_scale=True, zoom_start=default_zoom_start)
    return base_map

In [179]:
map = generateBaseMap([41.881832, -87.623177])


In [180]:
data['speed'] = 1/ data['SPEED']

### HeatMapWithTime

We can also animate our heat maps to change the data being shown on it based on certain dimensions (hour,month) using class method called HeatMapWithTime. 

The SPEED in the 29 regions for a whole week (Monday -> Sunday).

I choose the week of the 8th to 14th of april 2019

In [182]:
sub_set = data[(data['YEAR']==2019) & (data['MONTH'] == 3) & (17 <= data['DAY']) & (data['DAY'] <= 23)].copy()
sub_set = sub_set.replace([np.inf, -np.inf], 0)
l=[]

for date in sub_set.Time.unique():
    sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist() 
    l.append(sub) 
    

Liste des coordonnées des 29 regions

In [183]:
l1 = []
l2= []
for i in range(29) : 
    j = l[0][i]
    l2 = j[0:1] + j[1:2]
    l1.append(l2)
    l2 = []
   
    

In [184]:
base_map = generateBaseMap([41.881832, -87.623177])

HeatMapWithTime(l, index = sub_set.Time.unique().tolist() ,radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)

fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)

for i in range(29) :
    
    r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
    base_map.add_child(r)
    folium.Marker(l1[i]).add_to(r)

folium.LayerControl(collapsed=False).add_to(base_map)


base_map

In [185]:
import os 
base_map.save(os.path.join( 'SPEED.html'))

The BUS_COUNT in the 29 regions for a whole week (Monday -> Sunday).

I choose the week of the 21st to 27th of october 2019

In [186]:
sub_set = data[(data['YEAR']==2019) & (data['MONTH'] == 10) & (21 <= data['DAY']) & (data['DAY'] <= 27)].copy()

l=[]

for date in sub_set.Time.unique():
    sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','BUS_COUNT']].values.tolist() 
    l.append(sub) 
    

In [187]:
base_map = generateBaseMap([41.881832, -87.623177])

HeatMapWithTime(l, index = sub_set.Time.unique().tolist() ,radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)

fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)

for i in range(29) :
    
    r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
    base_map.add_child(r)
    folium.Marker(l1[i]).add_to(r)

folium.LayerControl(collapsed=False).add_to(base_map)


base_map

In [188]:
import os 
base_map.save(os.path.join('BUS_COUNT.html'))

In this part am only going focus on Sundays' traffic of 2018 & 2019

In [189]:
sub_set = data[((data['YEAR']==2018) | (data['YEAR']==2019))  & (data['DAY_OF_WEEK'] == 1)].copy()
l=[]

for date in sub_set.Time.sort_values().unique():
    sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist() 
    l.append(sub) 

In [190]:
base_map = generateBaseMap([41.881832, -87.623177])

HeatMapWithTime(l, index = sub_set.Time.sort_values().unique().tolist(),radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)

fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)

for i in range(29) :
    
    r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
    base_map.add_child(r)
    folium.Marker(l1[i]).add_to(r)

folium.LayerControl(collapsed=False).add_to(base_map)


base_map

In [191]:
import os 
base_map.save(os.path.join('SPEED_SUNDAYS.html'))

In this part am only going focus on Mondays' traffic of 2018 & 2019

In [192]:
sub_set = data[((data['YEAR']==2018) | (data['YEAR']==2019))  & (data['DAY_OF_WEEK'] == 2)].copy()
l=[]

for date in sub_set.Time.sort_values().unique():
    sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist() 
    l.append(sub) 

In [193]:
base_map = generateBaseMap([41.881832, -87.623177])

HeatMapWithTime(l, index = sub_set.Time.sort_values().unique().tolist(),radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)

fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)

for i in range(29) :
    
    r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
    base_map.add_child(r)
    folium.Marker(l1[i]).add_to(r)

folium.LayerControl(collapsed=False).add_to(base_map)


base_map

In [194]:
import os 
base_map.save(os.path.join('SPEED_MONDAYS.html'))

The traffic on christmas eve (i.e. 24th of decembre 2019) starting from 5 PM and christmas day (the 25th of decembre)

In [195]:
sub_set = data[(data['YEAR']==2019)  &  (data['MONTH']==12) ].copy()
sub_set = sub_set[((sub_set['DAY'] == 24) & (sub_set['HOUR']>=17) ) | (sub_set['DAY'] == 25)]
l=[]

for date in sub_set.Time.sort_values().unique():
    sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist() 
    l.append(sub) 
    

In [196]:
base_map = generateBaseMap([41.881832, -87.623177])

HeatMapWithTime(l, index = sub_set.Time.sort_values().unique().tolist(),radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)

fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)

for i in range(29) :
    
    r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
    base_map.add_child(r)
    folium.Marker(l1[i]).add_to(r)

folium.LayerControl(collapsed=False).add_to(base_map)


base_map

In [197]:
import os 
base_map.save(os.path.join('christmas.html'))

The traffic on rush hours (8 AM and 5 PM) on mondays (2018 & 2019)

In [198]:
sub_set = data[((data['YEAR']==2018) | (data['YEAR']==2019))  & (data['DAY_OF_WEEK'] == 2) & ((data['HOUR']==8) | (data['HOUR']==17) )].copy()
l=[]

for date in sub_set.Time.sort_values().unique():
    sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist() 
    l.append(sub) 

In [199]:
base_map = generateBaseMap([41.881832, -87.623177])

HeatMapWithTime(l, index = sub_set.Time.sort_values().unique().tolist(),radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)

fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)

for i in range(29) :
    
    r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
    base_map.add_child(r)
    folium.Marker(l1[i]).add_to(r)

folium.LayerControl(collapsed=False).add_to(base_map)


base_map

In [200]:
import os 
base_map.save(os.path.join('MONDAYS_RUSH_HOURS.html'))