# 1. Data Scraping for Paris Districts (Neighborhoods)

In [1]:
# importing the libraries for data scraping
import pandas as pd
import numpy as np

- We're going to use the data from an HTML link of a Wikipedia page. It has information about all the neighborhoods of the city of Paris, France.
- The link is : https://fr.geneawiki.com/index.php/Liste_des_quartiers_de_Paris
- We will extract the data from the url with pandas.

In [2]:
url = 'https://fr.geneawiki.com/index.php/Liste_des_quartiers_de_Paris'

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

data = pd.read_html(url) # extracting the data from the html link
# We look for the table we need from the data previously extracted
df = data[1]

df.head()

Unnamed: 0,0,1,2,3,4
0,Code INSEE 1,Code Postal,Arrondissements,Arrondissements,Quartiers
1,75101,75001,I,Le Louvre,01 - Saint-Germain-l'Auxerrois 02 - Les Halles...
2,75102,75002,II,La Bourse,05 - Gaillon 06 - Vivienne 07 - Le Mail 08 - B...
3,75103,75003,III,Le Temple,09 - Les Arts-et-Métiers 10 - Les Enfants-Roug...
4,75104,75004,IV,L'Hôtel-de-Ville,13 - Saint-Merri 14 - Saint-Gervais 15 - L'Ars...


In [3]:
# Making some edits to the dataframe 
df.drop(0, axis = 0, inplace = True)
df.drop([0,2], axis = 1, inplace = True)
df.reset_index(inplace = True, drop = True)

In [4]:
# Renaming the columns
df.rename({1 : 'Postal Code', 3 : 'District', 4 : 'Neighborhoods'}, axis = 1, inplace = True)
df['Postal Code'] = df['Postal Code'].astype(int)
df.head()

Unnamed: 0,Postal Code,District,Neighborhoods
0,75001,Le Louvre,01 - Saint-Germain-l'Auxerrois 02 - Les Halles...
1,75002,La Bourse,05 - Gaillon 06 - Vivienne 07 - Le Mail 08 - B...
2,75003,Le Temple,09 - Les Arts-et-Métiers 10 - Les Enfants-Roug...
3,75004,L'Hôtel-de-Ville,13 - Saint-Merri 14 - Saint-Gervais 15 - L'Ars...
4,75005,Le Panthéon,17 - Saint-Victor 18 - Le Jardin-des-Plantes 1...


In [5]:
print(df.shape)
print(df.dtypes)

(20, 3)
Postal Code       int32
District         object
Neighborhoods    object
dtype: object


# 2. Merging the Geospacial Coordinates with the dataframe

For each postal code, we're going to associate coordinates (lat, long) using the geospatial coordiantes csv file.

In [6]:
coordinates = pd.read_csv("Geospacial_data.csv")

In [7]:
# merging the coordinates with the dataframe with the .merge() function
df = df.merge(coordinates, on = 'Postal Code', how = 'left')
df.head()

Unnamed: 0,Postal Code,District,Neighborhoods,Latitude,Longitude,Population Density ha/km²
0,75001,Le Louvre,01 - Saint-Germain-l'Auxerrois 02 - Les Halles...,48.8592,2.3417,9 041
1,75002,La Bourse,05 - Gaillon 06 - Vivienne 07 - Le Mail 08 - B...,48.8655,2.3426,21 006
2,75003,Le Temple,09 - Les Arts-et-Métiers 10 - Les Enfants-Roug...,48.8637,2.3615,29 956
3,75004,L'Hôtel-de-Ville,13 - Saint-Merri 14 - Saint-Gervais 15 - L'Ars...,48.8601,2.3507,16 966
4,75005,Le Panthéon,17 - Saint-Victor 18 - Le Jardin-des-Plantes 1...,48.8448,2.3471,23 359


# 3. Segmenting and Clustering 

In [8]:
import requests # library to handle requests
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium # for map visualization
from sklearn.cluster import KMeans # clustering algorithm

print('Libraries imported')

Libraries imported


In [9]:
# Visualizing all the Districts of Paris on a folium map

latitude = 48.8566
longitude = 2.3522 # the geospatial coordinates of Paris

map_Paris= folium.Map(location = [latitude, longitude], zoom_start = 11)
for lat, lng, District in zip(df['Latitude'], df['Longitude'], df['District']):
    
    label = '{}'.format(District)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Paris)  
    
map_Paris                       

Now we will start using the Foursquare API to explore the Districts of Paris and segment them.

### Define Foursquare Credentials and Version

In [19]:
# @hidden_cell
CLIENT_ID = 'LFWIVGKLFVHTSTOSWB00B0O0ODDZ0HYAK53AU3NRPEK53VRJ' # Foursquare ID
CLIENT_SECRET = '5L2WQY3QGGDJERHVVYKHIW2TUPVDEOJVHWIWCCO1NQS024WC' # Foursquare Secret
VERSION = '20200105' # Foursquare API version

### Explore the districts of Paris

In [51]:
LIMIT = 50 # limit the number of venues returned by the Foursquare API

radius = 500 # define radius

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['District', 
                  'District Latitude', 
                  'District Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [53]:
paris_venues = getNearbyVenues(names = df['District'],
                                   latitudes = df['Latitude'],
                                   longitudes = df['Longitude']
                                  )

Le Louvre
La Bourse
Le Temple
L'Hôtel-de-Ville
Le Panthéon
Le Luxembourg
Le Palais-Bourbon
L'Élysée
L'Opéra
L'Enclos-Saint-Laurent
Popincourt
Reuilly
Les Gobelins
L'Observatoire
Vaugirard
Passy
Les Batignolles-Monceau
La Butte-Montmartre
Les Buttes-Chaumont
Ménilmontant


In [54]:
paris_venues.shape # the number of venues 

(935, 7)

In [55]:
paris_venues.head()

Unnamed: 0,District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Le Louvre,48.8592,2.3417,Place du Louvre,48.859841,2.340822,Plaza
1,Le Louvre,48.8592,2.3417,Église Saint-Germain-l'Auxerrois (Église Saint...,48.85952,2.341306,Church
2,Le Louvre,48.8592,2.3417,Coffee Crêpes,48.858841,2.340802,Coffee Shop
3,Le Louvre,48.8592,2.3417,Le Fumoir,48.860424,2.340868,Cocktail Bar
4,Le Louvre,48.8592,2.3417,Cour Carrée du Louvre,48.86036,2.338543,Pedestrian Plaza


Lets check the number of venues returned for each district.

In [56]:
paris_venues.groupby('District').count()

Unnamed: 0_level_0,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
L'Enclos-Saint-Laurent,50,50,50,50,50,50
L'Hôtel-de-Ville,50,50,50,50,50,50
L'Observatoire,47,47,47,47,47,47
L'Opéra,50,50,50,50,50,50
L'Élysée,42,42,42,42,42,42
La Bourse,50,50,50,50,50,50
La Butte-Montmartre,50,50,50,50,50,50
Le Louvre,50,50,50,50,50,50
Le Luxembourg,50,50,50,50,50,50
Le Palais-Bourbon,50,50,50,50,50,50


Lets find the number of unique venues in all the districts.

In [57]:
print('There are {} uniques categories.'.format(len(paris_venues['Venue Category'].unique())))

There are 162 uniques categories.


 ## Analyzing each district in Paris

In [58]:
# one hot encoding
paris_onehot = pd.get_dummies(paris_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
paris_onehot['District'] = paris_venues['District'] 

# move neighborhood column to the first column
fixed_columns = [paris_onehot.columns[-1]] + list(paris_onehot.columns[:-1])
paris_onehot = paris_onehot[fixed_columns]

paris_onehot.head()

Unnamed: 0,District,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,BBQ Joint,Bakery,Bar,Basketball Court,Basque Restaurant,Beer Bar,Beer Garden,Beer Store,Bike Rental / Bike Share,Bistro,Bookstore,Boutique,Brasserie,Breakfast Spot,Bridge,Bubble Tea Shop,Burger Joint,Café,Cambodian Restaurant,Cemetery,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Convenience Store,Corsican Restaurant,Cosmetics Shop,Creperie,Cultural Center,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Doner Restaurant,Electronics Store,Embassy / Consulate,Ethiopian Restaurant,Exhibit,Farmers Market,Fast Food Restaurant,Fish Market,Flower Shop,Food & Drink Shop,Fountain,French Restaurant,Furniture / Home Store,Gaming Cafe,Garden,Gastropub,Gay Bar,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Health Food Store,Historic Site,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Juice Bar,Korean Restaurant,Latin American Restaurant,Lebanese Restaurant,Liquor Store,Lounge,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Multiplex,Museum,Music Store,Music Venue,Noodle House,Okonomiyaki Restaurant,Organic Grocery,Park,Pastry Shop,Pedestrian Plaza,Performing Arts Venue,Perfume Shop,Peruvian Restaurant,Pharmacy,Pizza Place,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Record Shop,Resort,Restaurant,Russian Restaurant,Salad Place,Sandwich Place,Savoyard Restaurant,Scandinavian Restaurant,Scenic Lookout,Science Museum,Seafood Restaurant,Shanxi Restaurant,Shoe Store,Smoke Shop,Soup Place,South American Restaurant,Southwestern French Restaurant,Souvenir Shop,Souvlaki Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Steakhouse,Supermarket,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store
0,Le Louvre,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Le Louvre,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Le Louvre,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Le Louvre,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Le Louvre,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [59]:
paris_onehot.shape

(935, 163)

### Grouping rows by neighborhood and by taking the mean frequency of occurence of each category

In [60]:
paris_grouped = paris_onehot.groupby('District').mean().reset_index()
paris_grouped.head()

Unnamed: 0,District,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,BBQ Joint,Bakery,Bar,Basketball Court,Basque Restaurant,Beer Bar,Beer Garden,Beer Store,Bike Rental / Bike Share,Bistro,Bookstore,Boutique,Brasserie,Breakfast Spot,Bridge,Bubble Tea Shop,Burger Joint,Café,Cambodian Restaurant,Cemetery,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Convenience Store,Corsican Restaurant,Cosmetics Shop,Creperie,Cultural Center,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Doner Restaurant,Electronics Store,Embassy / Consulate,Ethiopian Restaurant,Exhibit,Farmers Market,Fast Food Restaurant,Fish Market,Flower Shop,Food & Drink Shop,Fountain,French Restaurant,Furniture / Home Store,Gaming Cafe,Garden,Gastropub,Gay Bar,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Health Food Store,Historic Site,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Juice Bar,Korean Restaurant,Latin American Restaurant,Lebanese Restaurant,Liquor Store,Lounge,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Multiplex,Museum,Music Store,Music Venue,Noodle House,Okonomiyaki Restaurant,Organic Grocery,Park,Pastry Shop,Pedestrian Plaza,Performing Arts Venue,Perfume Shop,Peruvian Restaurant,Pharmacy,Pizza Place,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Record Shop,Resort,Restaurant,Russian Restaurant,Salad Place,Sandwich Place,Savoyard Restaurant,Scandinavian Restaurant,Scenic Lookout,Science Museum,Seafood Restaurant,Shanxi Restaurant,Shoe Store,Smoke Shop,Soup Place,South American Restaurant,Southwestern French Restaurant,Souvenir Shop,Souvlaki Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Steakhouse,Supermarket,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store
0,L'Enclos-Saint-Laurent,0.02,0.0,0.0,0.02,0.02,0.0,0.0,0.02,0.0,0.06,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.06,0.0,0.0,0.02,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.08,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.06,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.06,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.02,0.0
1,L'Hôtel-de-Ville,0.0,0.0,0.0,0.0,0.08,0.02,0.0,0.0,0.0,0.06,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.08,0.02,0.0,0.02,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02
2,L'Observatoire,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06383,0.06383,0.0,0.0,0.0,0.0,0.0,0.021277,0.042553,0.021277,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.021277,0.0,0.0,0.276596,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.06383,0.0,0.021277,0.0,0.0,0.0,0.06383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.021277,0.021277,0.021277,0.0
3,L'Opéra,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.18,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.18,0.0,0.0,0.0,0.0,0.0,0.08,0.02,0.02,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.04,0.0,0.0
4,L'Élysée,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.047619,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.214286,0.0,0.0,0.0,0.0,0.0,0.095238,0.02381,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.02381,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0


In [61]:
# confirm the new size 
paris_grouped.shape 

(20, 163)

### Lets print each neighborhood along with the top 5 most commun venues

In [62]:
num_top_venues = 5

for hood in paris_grouped['District']:
    print("----"+hood+"----")
    temp = paris_grouped[paris_grouped['District'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')
    

----L'Enclos-Saint-Laurent----
               venue  freq
0       Cocktail Bar  0.08
1             Bakery  0.06
2     Breakfast Spot  0.06
3  French Restaurant  0.06
4              Hotel  0.06


----L'Hôtel-de-Ville----
               venue  freq
0  French Restaurant  0.08
1        Art Gallery  0.08
2     Ice Cream Shop  0.06
3             Bakery  0.06
4                Pub  0.04


----L'Observatoire----
                venue  freq
0   French Restaurant  0.28
1               Hotel  0.06
2              Bakery  0.06
3                 Bar  0.06
4  Italian Restaurant  0.06


----L'Opéra----
                venue  freq
0   French Restaurant  0.18
1               Hotel  0.18
2  Italian Restaurant  0.08
3        Cocktail Bar  0.08
4              Lounge  0.04


----L'Élysée----
                venue  freq
0               Hotel  0.21
1  Italian Restaurant  0.10
2                Café  0.07
3              Bakery  0.05
4           Wine Shop  0.05


----La Bourse----
                venue  freq
0   

### Let's put this into a dataframe

In [64]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [66]:
# lets create the new dataframe and display the top 10 venues for each neighborhood

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['District'] = paris_grouped['District']

for ind in np.arange(paris_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(paris_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,L'Enclos-Saint-Laurent,Cocktail Bar,Hotel,French Restaurant,Seafood Restaurant,Bar,Breakfast Spot,Pizza Place,Bakery,Coffee Shop,Bistro
1,L'Hôtel-de-Ville,French Restaurant,Art Gallery,Ice Cream Shop,Bakery,Burger Joint,Pub,Bar,Plaza,Gay Bar,Park
2,L'Observatoire,French Restaurant,Hotel,Bar,Bakery,Italian Restaurant,Bistro,Garden,Bookstore,Brasserie,Southwestern French Restaurant
3,L'Opéra,French Restaurant,Hotel,Cocktail Bar,Italian Restaurant,Wine Bar,Lounge,Bistro,Café,Museum,Juice Bar
4,L'Élysée,Hotel,Italian Restaurant,Café,French Restaurant,Bar,Thai Restaurant,Theater,Bakery,Wine Shop,Modern European Restaurant


## Clustering the neighborhoods

 We will run k-means to cluster the neighborhoods into 5 clusters.

In [67]:
# set number of clusters
kclusters = 5

paris_grouped_clustering = paris_grouped.drop('District', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(paris_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 3, 2, 1, 1, 2, 2, 3, 2, 2])

In [68]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
paris_merged = df.join(neighborhoods_venues_sorted.set_index('District'), on='District')

paris_merged.head() # check the last columns!

Unnamed: 0,Postal Code,District,Neighborhoods,Latitude,Longitude,Population Density ha/km²,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,75001,Le Louvre,01 - Saint-Germain-l'Auxerrois 02 - Les Halles...,48.8592,2.3417,9 041,3,Plaza,Hotel,French Restaurant,Bar,Tea Room,Theater,Wine Bar,Clothing Store,Restaurant,Church
1,75002,La Bourse,05 - Gaillon 06 - Vivienne 07 - Le Mail 08 - B...,48.8655,2.3426,21 006,2,French Restaurant,Hotel,Bistro,Italian Restaurant,Restaurant,Spa,Clothing Store,Bakery,Pizza Place,Plaza
2,75003,Le Temple,09 - Les Arts-et-Métiers 10 - Les Enfants-Roug...,48.8637,2.3615,29 956,0,Japanese Restaurant,Italian Restaurant,Vietnamese Restaurant,Cocktail Bar,Sandwich Place,Art Gallery,Burger Joint,Bistro,Café,Bookstore
3,75004,L'Hôtel-de-Ville,13 - Saint-Merri 14 - Saint-Gervais 15 - L'Ars...,48.8601,2.3507,16 966,3,French Restaurant,Art Gallery,Ice Cream Shop,Bakery,Burger Joint,Pub,Bar,Plaza,Gay Bar,Park
4,75005,Le Panthéon,17 - Saint-Victor 18 - Le Jardin-des-Plantes 1...,48.8448,2.3471,23 359,2,French Restaurant,Italian Restaurant,Pub,Wine Bar,Coffee Shop,Plaza,Bakery,Bar,Creperie,Hotel


Finally we visualize the clusters.

In [69]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(paris_merged['Latitude'],paris_merged['Longitude'], paris_merged['District'], paris_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examining the clusters

- ### Cluster 1

In [76]:
paris_merged.loc[paris_merged['Cluster Labels'] == 0, paris_merged.columns[[1] + list(range(5, paris_merged.shape[1]))]]

Unnamed: 0,District,Population Density ha/km²,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Le Temple,29 956,0,Japanese Restaurant,Italian Restaurant,Vietnamese Restaurant,Cocktail Bar,Sandwich Place,Art Gallery,Burger Joint,Bistro,Café,Bookstore
9,L'Enclos-Saint-Laurent,31 754,0,Cocktail Bar,Hotel,French Restaurant,Seafood Restaurant,Bar,Breakfast Spot,Pizza Place,Bakery,Coffee Shop,Bistro
10,Popincourt,40 827,0,French Restaurant,Cocktail Bar,Italian Restaurant,Bistro,Café,Wine Bar,Pastry Shop,Bar,Gluten-free Restaurant,Pizza Place
11,Reuilly,22 345,0,Hotel,Bistro,Supermarket,Chinese Restaurant,French Restaurant,Japanese Restaurant,Bar,Cheese Shop,Middle Eastern Restaurant,Soup Place


- ### Cluster 2

In [77]:
paris_merged.loc[paris_merged['Cluster Labels'] == 1, paris_merged.columns[[1] + list(range(5, paris_merged.shape[1]))]]

Unnamed: 0,District,Population Density ha/km²,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,L'Élysée,9 457,1,Hotel,Italian Restaurant,Café,French Restaurant,Bar,Thai Restaurant,Theater,Bakery,Wine Shop,Modern European Restaurant
8,L'Opéra,27 251,1,French Restaurant,Hotel,Cocktail Bar,Italian Restaurant,Wine Bar,Lounge,Bistro,Café,Museum,Juice Bar


- ### Cluster 3

In [78]:
paris_merged.loc[paris_merged['Cluster Labels'] == 2, paris_merged.columns[[1] + list(range(5, paris_merged.shape[1]))]]

Unnamed: 0,District,Population Density ha/km²,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,La Bourse,21 006,2,French Restaurant,Hotel,Bistro,Italian Restaurant,Restaurant,Spa,Clothing Store,Bakery,Pizza Place,Plaza
4,Le Panthéon,23 359,2,French Restaurant,Italian Restaurant,Pub,Wine Bar,Coffee Shop,Plaza,Bakery,Bar,Creperie,Hotel
5,Le Luxembourg,19 734,2,French Restaurant,Bakery,Hotel,Wine Bar,Art Gallery,Chocolate Shop,Dessert Shop,Cocktail Bar,Coffee Shop,Cupcake Shop
6,Le Palais-Bourbon,13 235,2,French Restaurant,Hotel,Bistro,Italian Restaurant,Coffee Shop,Art Museum,Park,Chocolate Shop,Bakery,Pizza Place
13,L'Observatoire,24 821,2,French Restaurant,Hotel,Bar,Bakery,Italian Restaurant,Bistro,Garden,Bookstore,Brasserie,Southwestern French Restaurant
14,Vaugirard,27 712,2,French Restaurant,Hotel,Italian Restaurant,Coffee Shop,Japanese Restaurant,Supermarket,Korean Restaurant,Lebanese Restaurant,Bar,Ethiopian Restaurant
17,La Butte-Montmartre,32 875,2,French Restaurant,Bar,Pizza Place,Café,Wine Bar,Italian Restaurant,Gastropub,Restaurant,Deli / Bodega,Coffee Shop
18,Les Buttes-Chaumont,27 342,2,French Restaurant,Italian Restaurant,Bar,Pool,Restaurant,Beer Garden,Dessert Shop,Concert Hall,Park,Scenic Lookout


- ### Cluster 4

In [79]:
paris_merged.loc[paris_merged['Cluster Labels'] == 3, paris_merged.columns[[1] + list(range(5, paris_merged.shape[1]))]]

Unnamed: 0,District,Population Density ha/km²,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Le Louvre,9 041,3,Plaza,Hotel,French Restaurant,Bar,Tea Room,Theater,Wine Bar,Clothing Store,Restaurant,Church
3,L'Hôtel-de-Ville,16 966,3,French Restaurant,Art Gallery,Ice Cream Shop,Bakery,Burger Joint,Pub,Bar,Plaza,Gay Bar,Park
12,Les Gobelins,25 625,3,French Restaurant,Thai Restaurant,Hotel,Vietnamese Restaurant,Italian Restaurant,Asian Restaurant,Bakery,Indian Restaurant,Bar,Cambodian Restaurant
16,Les Batignolles-Monceau,29 724,3,Wine Bar,French Restaurant,Bar,Thai Restaurant,Coffee Shop,Italian Restaurant,Korean Restaurant,Pizza Place,Bookstore,Restaurant
19,Ménilmontant,32 702,3,French Restaurant,Bar,Bakery,Japanese Restaurant,Bistro,Plaza,Park,Food & Drink Shop,Italian Restaurant,Café


- ### Cluster 5

In [80]:
paris_merged.loc[paris_merged['Cluster Labels'] == 4, paris_merged.columns[[1] + list(range(5, paris_merged.shape[1]))]]

Unnamed: 0,District,Population Density ha/km²,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Passy,20 921,4,French Restaurant,Bakery,Café,Grocery Store,Supermarket,Garden,Sandwich Place,Chinese Restaurant,Restaurant,Asian Restaurant
