## Notebook setup

In [None]:
# Restart Kernel after installation
!pip install --user geopandas

In [1]:
import pandas as pd
import geopandas as gpd
import folium
from folium import plugins
import numpy as np
import json, requests
import pprint
import matplotlib.pyplot as plt
from tqdm import tqdm

## Visual overview of the twenty boroughs of Paris

In [2]:
# Visualize the 20 Paris boroughs obtained through geojson file
paris_boroughs = gpd.read_file('arrondissements.geojson')

# Create blank map
paris_map = folium.Map(location=[48.8534, 2.3488], zoom_start=12)

# Map borough areas onto map
paris_map.choropleth(geo_data=paris_boroughs,
                     name='choropleth',
                     fill_color='beige')
folium.LayerControl().add_to(paris_map)

# Display
paris_map

## Visual overview of the 1,220 Velib stations

In [3]:
# Visualize the 1,220 Velib stations obtained through geojson file
velib_stations = gpd.read_file('velib-emplacement-des-stations.geojson')

# Add a 100 meter radius circle onto map for each station
for idx in tqdm(range(0,len(velib_stations))):
    folium.Circle([velib_stations.iloc[idx]['lat'],
                   velib_stations.iloc[idx]['lon']],
                  color='#3186cc',
                  fill=True,
                  fill_color='#3186cc',
                  radius=100).add_to(paris_map)

# Display
paris_map

100%|██████████| 1221/1221 [00:11<00:00, 108.56it/s]


## Clean up of Velib stations data

In [4]:
velib_stations.drop(['name', 'capacity', 'geometry', 'station_id'], axis=1, inplace=True)


In [5]:
velib_stations.head()

Unnamed: 0,lon,lat
0,2.334743,48.856604
1,2.33736,48.879296
2,2.366143,48.871212
3,2.34367,48.851519
4,2.387555,48.840855


## Add each station\'s borough and keep only intra-Paris locations

In [6]:
# Add an empty Borough column to the dataframe
velib_stations['borough'] = np.nan

In [7]:
# Get postal code through Google API and add to dataframe for each station
KEY = 'AIzaSyDqg9jD_INCX_HUfuOJaHdF3WCohJRX45Y'

# Requests are split into chunks to avoid timeout - takes approx. 6 minutes total
for idx in tqdm(range(0, 300)):
    url = 'https://maps.googleapis.com/maps/api/geocode/json?latlng={},{}&key={}'.format(velib_stations.loc[idx, 'lat'], velib_stations.loc[idx, 'lon'], KEY)
    resp = requests.get(url)
    data = json.loads(resp.text)
    postal_code = data['results'][0]['address_components'][-1]['short_name']
    velib_stations.loc[idx, 'borough'] = postal_code
print('0 to 300 done')

for idx in tqdm(range(300, 600)):
    url = 'https://maps.googleapis.com/maps/api/geocode/json?latlng={},{}&key={}'.format(velib_stations.loc[idx, 'lat'], velib_stations.loc[idx, 'lon'], KEY)
    resp = requests.get(url)
    data = json.loads(resp.text)
    postal_code = data['results'][0]['address_components'][-1]['short_name']
    velib_stations.loc[idx, 'borough'] = postal_code
print('300 to 600 done')

for idx in tqdm(range(600, 900)):
    url = 'https://maps.googleapis.com/maps/api/geocode/json?latlng={},{}&key={}'.format(velib_stations.loc[idx, 'lat'], velib_stations.loc[idx, 'lon'], KEY)
    resp = requests.get(url)
    data = json.loads(resp.text)
    postal_code = data['results'][0]['address_components'][-1]['short_name']
    velib_stations.loc[idx, 'borough'] = postal_code
print('600 to 900 done')

for idx in tqdm(range(900, len(velib_stations))):
    url = 'https://maps.googleapis.com/maps/api/geocode/json?latlng={},{}&key={}'.format(velib_stations.loc[idx, 'lat'], velib_stations.loc[idx, 'lon'], KEY)
    resp = requests.get(url)
    data = json.loads(resp.text)
    postal_code = data['results'][0]['address_components'][-1]['short_name']
    velib_stations.loc[idx, 'borough'] = postal_code
print('all done !!!')

100%|██████████| 300/300 [01:35<00:00,  2.90it/s]
  0%|          | 0/300 [00:00<?, ?it/s]

0 to 300 done


100%|██████████| 300/300 [01:32<00:00,  3.00it/s]
  0%|          | 0/300 [00:00<?, ?it/s]

300 to 600 done


100%|██████████| 300/300 [01:33<00:00,  3.23it/s]
  0%|          | 0/321 [00:00<?, ?it/s]

600 to 900 done


100%|██████████| 321/321 [01:40<00:00,  3.34it/s]

all done !!!





In [8]:
velib_stations.head()

Unnamed: 0,lon,lat,borough
0,2.334743,48.856604,75006
1,2.33736,48.879296,75009
2,2.366143,48.871212,75010
3,2.34367,48.851519,75005
4,2.387555,48.840855,75012


In [9]:
# Keep only Parisian postcodes (starting with 75)
velib_stations['borough'] = velib_stations['borough'].astype(str)
pattern = velib_stations['borough'].str.contains('^75')
velib_stations = velib_stations[pattern]
velib_stations['borough'].value_counts()

75015    81
75020    65
75013    60
75017    59
75019    58
75018    55
75012    53
75011    52
75010    50
75008    50
75009    45
75014    44
75005    38
75016    33
75007    31
75006    30
75116    26
75001    23
75004    23
75002    22
75003    16
Name: borough, dtype: int64

In [10]:
# Save csv file
velib_stations.to_csv(r'velib_stations.csv', index=False)

In [12]:
velib_stations.head()

Unnamed: 0,lon,lat,borough
0,2.334743,48.856604,75006
1,2.33736,48.879296,75009
2,2.366143,48.871212,75010
3,2.34367,48.851519,75005
4,2.387555,48.840855,75012


In [14]:
# New map displaying only Parisian stations
paris_map = folium.Map(location=[48.8534, 2.3488], zoom_start=12)

paris_map.choropleth(geo_data=paris_boroughs,
                     name='choropleth',
                     fill_color='beige')
folium.LayerControl().add_to(paris_map)

for idx in tqdm(range(0,len(velib_stations))):
    folium.Circle([velib_stations.iloc[idx]['lat'],
                   velib_stations.iloc[idx]['lon']],
                  radius=100, popup=str(idx)).add_to(paris_map)

paris_map

100%|██████████| 914/914 [00:15<00:00, 57.64it/s]


## Remove outlier

In [15]:
velib_stations.drop(velib_stations.index[615], inplace=True)

In [16]:
# New map displaying only Parisian stations, clean
paris_map = folium.Map(location=[48.8534, 2.3488], zoom_start=12)

paris_map.choropleth(geo_data=paris_boroughs,
                     name='choropleth',
                     fill_color='beige')
folium.LayerControl().add_to(paris_map)

for idx in tqdm(range(0,len(velib_stations))):
    folium.Circle([velib_stations.iloc[idx]['lat'],
                   velib_stations.iloc[idx]['lon']],
                  radius=100, popup=str(idx)).add_to(paris_map)

paris_map

100%|██████████| 913/913 [00:15<00:00, 59.25it/s]


## Get venues for each borough

In [17]:
# Keep only the surface of each borough
paris_boroughs.drop(['n_sq_co', 'objectid', 'l_ar', 'longueur', 'n_sq_ar', 'l_aroff', 'c_arinsee', 'geometry'], axis=1, inplace=True)

# Replace borough number by postcode
for br in paris_boroughs['c_ar']:
    if br < 10:
        paris_boroughs.loc[paris_boroughs['c_ar'] == br, 'c_ar'] = '7500{}'.format(br)
    else:
        paris_boroughs.loc[paris_boroughs['c_ar'] == br, 'c_ar'] = '750{}'.format(br)

# Rename postcode column
paris_boroughs.rename(index=str, columns={'c_ar': 'postcode'}, inplace=True)
        
# Place postcode column at index 0
paris_boroughs = paris_boroughs[['postcode', 'perimetre', 'surface']]

# Sort by postcode
paris_boroughs.sort_values('postcode', axis=0, inplace=True)
paris_boroughs.reset_index(drop=True, inplace=True)

In [18]:
# Display
paris_boroughs.head()

Unnamed: 0,postcode,perimetre,surface
0,75001,6054.936862,1824613.0
1,75002,4554.10436,991153.7
2,75003,4519.263648,1170883.0
3,75004,5420.908434,1600586.0
4,75005,6239.195396,2539375.0


In [19]:
# Add center coordinates to dataframe for each borough through Google API 
KEY = 'AIzaSyDqg9jD_INCX_HUfuOJaHdF3WCohJRX45Y'

paris_boroughs['lat'] = np.nan
paris_boroughs['lng'] = np.nan

for postcode in tqdm(paris_boroughs['postcode']):
    url = 'https://maps.googleapis.com/maps/api/geocode/json?address={}&key={}'.format(postcode + ',Paris', KEY)
    resp = requests.get(url)
    data = json.loads(resp.text)
    results = data['results'][0]['geometry']['location']
    lat = results.get('lat')
    lng = results.get('lng')
    paris_boroughs.loc[paris_boroughs['postcode'] == postcode, 'lat'] = lat
    paris_boroughs.loc[paris_boroughs['postcode'] == postcode, 'lng'] = lng

100%|██████████| 20/20 [00:13<00:00,  1.58it/s]


In [20]:
# Display
paris_boroughs.head()

Unnamed: 0,postcode,perimetre,surface,lat,lng
0,75001,6054.936862,1824613.0,48.864049,2.331053
1,75002,4554.10436,991153.7,48.867564,2.34399
2,75003,4519.263648,1170883.0,48.86348,2.359115
3,75004,5420.908434,1600586.0,48.853428,2.358279
4,75005,6239.195396,2539375.0,48.843491,2.351834


In [21]:
# Search for venues within each borough's radius through Foursquare API
CLIENT_ID = 'RAYTIFT1ZANF5XJOUIIC4G0VQFNOZQVJ0YD0UOTCFXHI1D2X'
CLIENT_SECRET = 'AAAORVPBONUXTKW2S3Z4AD10AKXBOYDA5QCFMFDJWSWI1KOZ'
VERSION = '20190215'
LIMIT = 50

venues_list = []

for postcode, perimetre, lat, lng in zip(paris_boroughs['postcode'], paris_boroughs['perimetre'], paris_boroughs['lat'], paris_boroughs['lng']):

    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        lat, 
        lng, 
        perimetre,
        LIMIT)

    request = requests.get(url).json()
    results = request['response']['groups'][0]['items']
    
    for v in results:
        venues_list.append([postcode,
                            v['venue']['name'],
                            v['venue']['location']['lat'],
                            v['venue']['location']['lng'],
                            v['venue']['categories'][0]['name']])

In [23]:
# Create venues dataframe from results
columns=['postcode','venue_name','venue_lat','venue_lng','venue_category']
nearby_venues = pd.DataFrame(data=venues_list, columns=columns)
nearby_venues.head()

Unnamed: 0,postcode,venue_name,venue_lat,venue_lng,venue_category
0,75001,Sanukiya,48.864713,2.334059,Udon Restaurant
1,75001,Jardin des Tuileries,48.863642,2.326484,Garden
2,75001,Place Vendôme,48.867798,2.329741,Plaza
3,75001,Hôtel Costes,48.866666,2.327908,Hotel
4,75001,Jardin du Palais Royal,48.864941,2.337728,Garden


In [24]:
print('There are {} uniques categories.'.format(len(nearby_venues['venue_category'].unique())))

There are 89 uniques categories.


In [25]:
nearby_venues['venue_category'].value_counts()

French Restaurant           88
Hotel                       72
Plaza                       65
Art Museum                  46
Wine Bar                    31
Garden                      30
Bookstore                   29
Cocktail Bar                28
Italian Restaurant          24
Sandwich Place              22
Park                        20
Pastry Shop                 18
Historic Site               18
Bakery                      17
Music Venue                 16
Seafood Restaurant          16
Bistro                      16
Concert Hall                14
Restaurant                  14
Gourmet Shop                14
Ice Cream Shop              12
Church                      11
Pizza Place                 11
Pedestrian Plaza            11
Theater                     11
Monument / Landmark         11
Coffee Shop                 11
Department Store            10
Indie Movie Theater         10
Japanese Restaurant         10
                            ..
Tennis Court                 5
Cosmetic

## Regroup similar categories

In [26]:
nearby_venues['venue_category'].replace(to_replace=(r'\w.+Restaurant',
                                                    'Restaurant Store',
                                                    'Restaurant & Drink Shop',
                                                    'Gastropub',
                                                    'Bistro',
                                                    'Steakhouse',
                                                    'Food',
                                                    'Restaurant Truck',
                                                    'Restaurant Court',
                                                    'Diner',
                                                    'Trattoria/Osteria'),
                                                    value='Restaurant', regex=True, inplace=True)

nearby_venues['venue_category'].replace(to_replace=('Pizza Place',
                                                    'Sandwich Place',
                                                    'Burger Joint',
                                                    'Burrito Place',
                                                    'Ice Cream Shop',
                                                    'Mac & Cheese Joint',
                                                    'Salad Place',
                                                    'Creperie',
                                                    'Noodle House',
                                                    'Bagel Shop',
                                                    'BBQ Joint',
                                                    'Wings Joint',
                                                    'Fried Chicken Joint',
                                                    'Deli / Bodega',
                                                    'Food Truck',
                                                    'Fish & Chips Shop',
                                                    'Snack Place',
                                                    'Taco Place',
                                                    'Poke Place',
                                                    'Poutine Place'),
                                                    value='Fast Food', inplace=True)

nearby_venues['venue_category'].replace(to_replace=('Bubble Tea Shop',
                                                    'Tea Room',
                                                    'Juice Bar',
                                                    'Smoothie Shop',
                                                    'Café',
                                                    'Breakfast Spot'),
                                                    value='Coffee Shop', inplace=True)

nearby_venues['venue_category'].replace(to_replace=('Dessert Shop',
                                                    'Donut Shop',
                                                    'Cupcake Shop',
                                                    'Pastry Shop',
                                                    'Chocolate Shop'),
                                                    value='Bakery', inplace=True)

nearby_venues['venue_category'].replace(to_replace=('Farmers Market',
                                                    'Grocery Store',
                                                    'Liquor Store',
                                                    'Fish Market',
                                                    'Food Court',
                                                    'Cheese Shop',
                                                    'Soup Place',
                                                    'Convenience Store',
                                                    'Food & Drink Shop',
                                                    'Health Food Store',
                                                    'Gourmet Shop',
                                                    'Organic Grocery',
                                                    'Fruit & Vegetable Store',
                                                    'Butcher',
                                                    'Beer Store',
                                                    'Wine Shop',
                                                    'Supermarket',
                                                    'Market',
                                                    'Miscellaneous Shop'),
                                                    value='Groceries', inplace=True)

nearby_venues['venue_category'].replace(to_replace=('Shopping Mall',
                                                    'Gift Shop',
                                                    'Smoke Shop',
                                                    'Men\'s Store',
                                                    'Clothing Store',
                                                    'Lingerie Store',
                                                    'Thrift / Vintage Store',
                                                    'Plaza',
                                                    'Department Store',
                                                    'Furniture / Home Store',
                                                    'Auto Workshop',
                                                    'Electronics Store',
                                                    'Pet Store',
                                                    'Women\'s Store',
                                                    'Boutique',
                                                    'Shoe Store',
                                                    'Costume Shop',
                                                    'Flea Market',
                                                    'Tailor Shop',
                                                    'Jewelry Store',
                                                    'Optical Shop',
                                                    'Video Game Store',
                                                    'Rental Car Location',
                                                    'Garden Center',
                                                    'Hobby Shop',
                                                    'Discount Store',
                                                    'Stationery Store',
                                                    'Toy / Game Store',
                                                    'Adult Boutique',
                                                    'Plaza',
                                                    'Arts & Crafts Store',
                                                    'Antique Shop',
                                                    'Pop-Up Shop',
                                                    'Souvenir Shop',
                                                    'Cosmetics Shop'),
                                                    value='Shopping', inplace=True)

nearby_venues['venue_category'].replace(to_replace=('Spa',
                                                    'Tanning Salon',
                                                    'Salon / Barbershop',
                                                    'Health & Beauty Service',
                                                    'Massage Studio'),
                                                    value='Commerce', inplace=True)

nearby_venues['venue_category'].replace(to_replace=('Gym',
                                                    'Gym / Fitness Center',
                                                    'College Gym',
                                                    'Yoga Studio',
                                                    'Pool',
                                                    'Martial Arts Dojo',
                                                    'Sporting Goods Shop',
                                                    'Climbing Gym',
                                                    'Trail',
                                                    'Spa',
                                                    'Beauty',
                                                    'Tanning Salon',
                                                    'Salon / Barbershop',
                                                    'Health & Beauty Service',
                                                    'Massage Studio',
                                                    'Dance Studio',
                                                    'Fitness'),
                                                    value='Wellness', inplace=True)

nearby_venues['venue_category'].replace(to_replace=('Theater',
                                                    'Movie Theater',
                                                    'Bookstore',
                                                    'Comic Shop',
                                                    'Record Shop',
                                                    'Music Store',
                                                    'Recording Studio',
                                                    'Concert Hall',
                                                    'Museum',
                                                    'Art Gallery',
                                                    'Aquarium',
                                                    'Event Space',
                                                    'Church',
                                                    'Art Museum',
                                                    'Multiplex',
                                                    'General Entertainment',
                                                    'Performing Arts Venue',
                                                    'Opera House',
                                                    'Exhibit',
                                                    'Indie Movie Theater',
                                                    'Science Museum',
                                                    'College Arts Building',
                                                    'History Museum'),
                                                    value='Culture', inplace=True)

nearby_venues['venue_category'].replace(to_replace=('Monument / Landmark',
                                                    'Fountain',
                                                    'Scenic Lookout',
                                                    'Lake',
                                                    'Building',
                                                    'Skating Rink',
                                                    'Skate Park',
                                                    'Harbor / Marina',
                                                    'Garden',
                                                    'Historic Site',
                                                    'Botanical Garden',
                                                    'Park',
                                                    'Bridge',
                                                    'Castle',
                                                    'Forest',
                                                    'Canal',
                                                    'Pedestrian Plaza',
                                                    'Roof Deck',
                                                    'Neighborhood',
                                                    'Other Great Outdoors',
                                                    'Sculpture Garden'),
                                                    value='Outdoor', inplace=True)

nearby_venues['venue_category'].replace(to_replace=('Gay Bar',
                                                    'Sports Bar',
                                                    'Gaming Cafe',
                                                    'Wine Bar',
                                                    'Jazz Club',
                                                    'Music Venue',
                                                    'Brewery',
                                                    'Bar',
                                                    'Beer Bar',
                                                    'Hotel Bar',
                                                    'Cocktail Bar',
                                                    'Pub',
                                                    'Hookah Bar',
                                                    'Speakeasy',
                                                    'Lounge',
                                                    'Piano Bar',
                                                    'Nightclub',
                                                    'Sake Bar',
                                                    'Strip Club',
                                                    'Irish Pub'),
                                                    value='Nightlife', inplace=True)

nearby_venues['venue_category'].replace(to_replace=('Stadium',
                                                    'Athletics & Sports',
                                                    'Soccer Stadium',
                                                    'Tennis Court',
                                                    'Basketball Stadium'),
                                                    value='Sports', inplace=True)

nearby_venues['venue_category'].replace(to_replace=('Light Rail Station',
                                                    'Train Station',
                                                    'Bus Line',
                                                    'Bike Rental / Bike Share',
                                                    'Metro Station',
                                                    'General Travel'),
                                                    value='Transports', inplace=True)

nearby_venues['venue_category'].replace(to_replace='Hostel',
                                                    value='Hotel', inplace=True)
                                                    
nearby_venues['venue_category'].replace(to_replace='Coworking Space',
                                                    value='Office', inplace=True)

print('There are {} uniques categories.'.format(len(nearby_venues['venue_category'].unique())))

There are 14 uniques categories.


In [27]:
nearby_venues['venue_category'].value_counts()

Restaurant     230
Culture        157
Outdoor        133
Shopping       115
Nightlife       95
Hotel           72
Fast Food       50
Bakery          45
Groceries       42
Coffee Shop     33
Wellness        14
Sports           7
Transports       4
Commerce         3
Name: venue_category, dtype: int64

In [28]:
nearby_venues.head()

Unnamed: 0,postcode,venue_name,venue_lat,venue_lng,venue_category
0,75001,Sanukiya,48.864713,2.334059,Restaurant
1,75001,Jardin des Tuileries,48.863642,2.326484,Outdoor
2,75001,Place Vendôme,48.867798,2.329741,Shopping
3,75001,Hôtel Costes,48.866666,2.327908,Hotel
4,75001,Jardin du Palais Royal,48.864941,2.337728,Outdoor


In [30]:
# Add venues and heatmap onto Paris map
paris_map = folium.Map(location=[48.8534, 2.3488], zoom_start=12)
velib_stations = pd.read_csv('velib_stations.csv')

for idx in tqdm(range(0,len(nearby_venues))):
    folium.Circle([nearby_venues.iloc[idx]['venue_lat'],
                   nearby_venues.iloc[idx]['venue_lng']],
                  popup=nearby_venues.iloc[idx]['venue_category'],
                  fill_color='red',
                  radius=1).add_to(paris_map)

heatmap_data = nearby_venues[['venue_lat', 'venue_lng']].values.tolist()
paris_map.add_child(plugins.HeatMap(heatmap_data, radius=25))

for idx in tqdm(range(0,len(velib_stations))):
    folium.Circle([velib_stations.iloc[idx]['lat'],
                   velib_stations.iloc[idx]['lon']],
                  radius=100).add_to(paris_map)

paris_map

100%|██████████| 1000/1000 [00:17<00:00, 57.52it/s]
