## A.1 Reading some useful packages

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import geopandas as gpd
import contextily
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium 
import urllib
import requests
from geopy.geocoders import Nominatim 
from shapely.geometry import Point, Polygon

%matplotlib inline

plt.rcParams['figure.figsize'] = 10,8

## A.2 Data Preparation

### A.2.1 Paris Restaurants

In [6]:
paris_restaurants = gpd.read_file('../../data/paris_restaurants.csv')
paris_restaurants.drop(['geometry'], axis=1, inplace=True)
paris_restaurants.head()

Unnamed: 0,type,x,y
0,European restuarant,259641.69164572324,6251867.062617987
1,Traditional French restaurant,259572.33960295672,6252029.683163137
2,Traditional French restaurant,259657.2763744336,6252143.400946027
3,Indian / Middle Eastern restaurant,259684.4383301869,6252203.137238394
4,Traditional French restaurant,259597.94308584128,6252230.044091299


In [7]:
paris_restaurants.shape

(5735, 3)

### A.2.2 Paris Borough

In [8]:
paris_arrondissements = gpd.read_file('../../data/paris_arrondissements.geojson')

col_to_keep = ['l_aroff', 'c_ar', 'geometry']

paris_arrondissements.sort_values(by='c_ar')

paris_arrondissements = paris_arrondissements\
                        .loc [:, col_to_keep]\
                        .rename(columns={"l_aroff":"Borough", 
                                         "c_ar":'Borough_nbr'})


paris_arrondissements.head()

Unnamed: 0,Borough,Borough_nbr,geometry
0,Temple,3,"POLYGON ((2.36383 48.86750, 2.36389 48.86747, ..."
1,Panthéon,5,"POLYGON ((2.36443 48.84614, 2.36484 48.84584, ..."
2,Luxembourg,6,"POLYGON ((2.34459 48.85405, 2.34428 48.85375, ..."
3,Reuilly,12,"POLYGON ((2.41388 48.83357, 2.41401 48.83357, ..."
4,Bourse,2,"POLYGON ((2.35152 48.86443, 2.35095 48.86341, ..."


### A.2.2 Paris Neighborhoods 

In [9]:
paris_districts = pd.read_csv('../../data/paris_distrcts.csv', sep=';')

col_to_keep = ['C_QU' , 'L_QU', 'C_AR','Geometry X Y']

paris_districts = paris_districts\
                    .loc[:, col_to_keep]\
                    .rename(columns={"C_QU":"Neighborhood_nbr", 
                                     "L_QU":"Neighborhood", 
                                     "C_AR":"Borough_nbr"})

paris_districts['Latitude']  = paris_districts['Geometry X Y'].apply(lambda x: x.split(',')[0])
paris_districts['Longitude'] = paris_districts['Geometry X Y'].apply(lambda x: x.split(',')[1])

paris_districts.drop(['Geometry X Y'], axis=1)

paris_districts.head()

Unnamed: 0,Neighborhood_nbr,Neighborhood,Borough_nbr,Geometry X Y,Latitude,Longitude
0,7,Mail,2,"48.8680083374,2.34469912743",48.8680083374,2.34469912743
1,8,Bonne-Nouvelle,2,"48.8671501183,2.35008019041",48.8671501183,2.35008019041
2,50,Gare,13,"48.8275274578,2.37239773692",48.8275274578,2.37239773692
3,70,Clignancourt,18,"48.8916675911,2.34597875459",48.8916675911,2.34597875459
4,71,Goutte-d'Or,18,"48.8921381876,2.3555361633",48.8921381876,2.3555361633


### A.2.3 Merge of Two Dataframes

In [469]:
paris_arrondissements_districts = paris_districts.merge(paris_arrondissements.loc[:, ['Borough_nbr', 'Borough']], on='Borough_nbr')

col_to_keep = [ 'Borough', 'Neighborhood', 'Latitude', 'Longitude']

paris_arrondissements_districts = paris_arrondissements_districts.loc[:, col_to_keep]

paris_arrondissements_districts.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bourse,Mail,48.8680083374,2.34469912743
1,Bourse,Bonne-Nouvelle,48.8671501183,2.35008019041
2,Bourse,Gaillon,48.8693066381,2.33343180766
3,Bourse,Vivienne,48.8691001998,2.33946074375
4,Gobelins,Gare,48.8275274578,2.37239773692


# A.3 Build and Plot maps using Foursquare API

### A.3.1 Define Foursquare Credentials and Version

In [470]:
CLIENT_ID = 'P52DXE3CKHPXKZWXWS4HUULKQSOJB5KVUPM55HT30N11015R'     # your Foursquare ID
CLIENT_SECRET = 'PYJMVGLZSRWCIVW03MY3OEKHNYYQCSXTLCOTVKMGHAU3ELZ5' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: P52DXE3CKHPXKZWXWS4HUULKQSOJB5KVUPM55HT30N11015R
CLIENT_SECRET:PYJMVGLZSRWCIVW03MY3OEKHNYYQCSXTLCOTVKMGHAU3ELZ5


In [471]:
#address = '102 North End Ave, New York, NY'
address = 'Paris, FR'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print(f'The geograpical coordinate of {address} are {latitude}, {longitude}.')

The geograpical coordinate of Paris, FR are 48.8566969, 2.3514616.


In [472]:
paris_arrondissements

Unnamed: 0,Borough,Borough_nbr,geometry
0,Temple,3,"POLYGON ((2.36383 48.86750, 2.36389 48.86747, ..."
1,Panthéon,5,"POLYGON ((2.36443 48.84614, 2.36484 48.84584, ..."
2,Luxembourg,6,"POLYGON ((2.34459 48.85405, 2.34428 48.85375, ..."
3,Reuilly,12,"POLYGON ((2.41388 48.83357, 2.41401 48.83357, ..."
4,Bourse,2,"POLYGON ((2.35152 48.86443, 2.35095 48.86341, ..."
5,Batignolles-Monceau,17,"POLYGON ((2.29517 48.87396, 2.29504 48.87378, ..."
6,Opéra,9,"POLYGON ((2.33978 48.88203, 2.33982 48.88202, ..."
7,Buttes-Chaumont,19,"POLYGON ((2.38943 48.90122, 2.39014 48.90108, ..."
8,Hôtel-de-Ville,4,"POLYGON ((2.36851 48.85573, 2.36900 48.85374, ..."
9,Élysée,8,"POLYGON ((2.32584 48.86956, 2.32569 48.86954, ..."


### A.3.2 Map of Paris neighborhoods superimposed on top.

In [473]:
#paris_arrondissements_districts_data = paris_arrondissements_districts[paris_arrondissements_districts['Borough'] == 'Bourse']\
#                                        .reset_index(drop=True)

list_of_borough = ['Louvre', 'Bourse', 'Temple', 'Hôtel-de-Ville', 'Panthéon', 'Luxembourg', 'Palais-Bourbon']
paris_arrondissements_districts_data = paris_arrondissements_districts[paris_arrondissements_districts['Borough'].isin(list_of_borough)].reset_index(drop=True)

paris_arrondissements_districts_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bourse,Mail,48.8680083374,2.34469912743
1,Bourse,Bonne-Nouvelle,48.8671501183,2.35008019041
2,Bourse,Gaillon,48.8693066381,2.33343180766
3,Bourse,Vivienne,48.8691001998,2.33946074375
4,Hôtel-de-Ville,Arsenal,48.851585175,2.36476795387


In [474]:
# create map of New York using latitude and longitude values
map_paris = folium.Map(location=[latitude, longitude], zoom_start=15)

# add markers to map
for lat, lng, borough, neighborhood in zip(
            paris_arrondissements_districts_data['Latitude'], 
            paris_arrondissements_districts_data['Longitude'], 
            paris_arrondissements_districts_data['Borough'], 
            paris_arrondissements_districts_data['Neighborhood']):
    
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_paris)  
    
map_paris

# Save map on html file
#map_paris.save('plot_data.html')

### A.3.3 Foursquare venues

In [475]:
def getNearbyVenues(names, latitudes, longitudes, radius=5000, categoryIds=''):
    '''
        Given a name latitude, longitude and categotyIds, this unction will return venues within 
        5000 around the names.
    '''
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = f'https://api.foursquare.com/v2/venues/search?&client_id={CLIENT_ID}\
                                                                &client_secret={CLIENT_SECRET}\
                                                                &v={VERSION}\
                                                                &ll={lat},{lng}\
                                                                &radius={radius}\
                                                                &limit={LIMIT}'

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']

            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'], 
                        v['location']['distance'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue distance' , 
                  'Venue Category']
    
    except:
        print(url)
        print(response)
        print(results)
        print(nearby_venues)

    return(nearby_venues)

In [476]:
LIMIT = 500 
radius = 5000 

In [477]:
#neighborhoods = paris_arrondissements_districts.copy()
neighborhoods = paris_arrondissements_districts_data.copy()
neighborhoods.shape

(28, 4)

In [478]:
#https://developer.foursquare.com/docs/resources/categories
#Sushi = 4bf58dd8d48988d1d2941735
#Japanese food =4bf58dd8d48988d111941735
#Asian= 4bf58dd8d48988d142941735
#Chinese Restaurant = '4bf58dd8d48988d145941735'

paris_venues_cn = getNearbyVenues(names=neighborhoods['Neighborhood'], 
                                     latitudes=neighborhoods['Latitude'], 
                                     longitudes=neighborhoods['Longitude'], 
                                     radius=1000, 
                                  categoryIds='4bf58dd8d48988d145941735'
                                    #categoryIds='4bf58dd8d48988d142941735'
                                    #categoryIds='4bf58dd8d48988d111941735'
                                  
                                    )
paris_venues_cn 

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue distance,Venue Category
0,Mail,48.8680083374,2.34469912743,Mr Zhao,48.869766,2.343644,210,Chinese Restaurant
1,Mail,48.8680083374,2.34469912743,Au Piment du Centre,48.859342,2.348966,1014,Chinese Restaurant
2,Mail,48.8680083374,2.34469912743,Hao Long,48.865660,2.336954,624,Chinese Restaurant
3,Mail,48.8680083374,2.34469912743,Traiteur Chez Zhang,48.861904,2.344043,681,Chinese Restaurant
4,Mail,48.8680083374,2.34469912743,Tafa,48.867023,2.336098,639,Chinese Restaurant
...,...,...,...,...,...,...,...,...
1223,Notre-Dame-des-Champs,48.846427594,2.32735687823,La Maison de thé,48.850964,2.332814,644,Chinese Restaurant
1224,Notre-Dame-des-Champs,48.846427594,2.32735687823,Han Yuan Xuan 翰苑蓒,48.841003,2.337439,954,Chinese Restaurant
1225,Notre-Dame-des-Champs,48.846427594,2.32735687823,Feng Man,48.843813,2.315072,945,Chinese Restaurant
1226,Notre-Dame-des-Champs,48.846427594,2.32735687823,Taokan,48.852635,2.331434,752,Chinese Restaurant


In [479]:
paris_venues_cn.loc[:,'Venue Category'].value_counts()

Chinese Restaurant       948
Szechuan Restaurant       57
Asian Restaurant          51
Noodle House              50
Dim Sum Restaurant        48
Cantonese Restaurant      21
Taiwanese Restaurant      15
Tea Room                  14
Shandong Restaurant        5
Fondue Restaurant          4
BBQ Joint                  3
Shanxi Restaurant          3
Sushi Restaurant           2
Karaoke Bar                2
Dessert Shop               1
Dongbei Restaurant         1
Bubble Tea Shop            1
Thai Restaurant            1
Vietnamese Restaurant      1
Name: Venue Category, dtype: int64

Let's check how many venues were returned for each neighborhood

In [480]:
paris_venues_cn.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue distance,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Archives,50,50,50,50,50,50,50
Arsenal,34,34,34,34,34,34,34
Arts-et-Métiers,50,50,50,50,50,50,50
Bonne-Nouvelle,49,49,49,49,49,49,49
Ecole-Militaire,32,32,32,32,32,32,32
Enfants-Rouges,50,50,50,50,50,50,50
Gaillon,49,49,49,49,49,49,49
Gros-Caillou,27,27,27,27,27,27,27
Halles,50,50,50,50,50,50,50
Invalides,20,20,20,20,20,20,20


In [481]:
def give_geograpical_coordinate(address):
    '''
        Given an address this function return the geograpical coordinate
    '''

    geolocator = Nominatim(user_agent="foursquare_agent")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    print(f'The geograpical coordinate of {address} are {latitude}, {longitude}.')
    return latitude, longitude

In [482]:
give_geograpical_coordinate("46 rue de la Roquette, 75011 Paris, France")

The geograpical coordinate of 46 rue de la Roquette, 75011 Paris, France are 48.8546902, 2.3726206.


(48.8546902, 2.3726206)

In [483]:
paris_venues_cn.shape

(1228, 8)

Let's add venues to Map

In [484]:
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], 
                                                df['Venue Longitude'], 
                                                df['Neighborhood'], 
                                                df['Venue'], 
                                                df['Venue Category']):
        
        label = f'{venue} ({venueCat}) - {local}'
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [510]:
map_paris_cn = folium.Map(location=[latitude, longitude], zoom_start=13.5)

addToMap(paris_venues_cn, 'red', map_paris_cn)

map_paris_cn

In [486]:
def addColumn(startDf, columnTitle, dataDf):
    grouped = dataDf.groupby('Neighborhood').count()
    
    for n in startDf['Neighborhood']:
        try:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = grouped.loc[n, 'Venue']
        except:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = 0

In [487]:
paris_grouped = paris_venues_cn.groupby('Neighborhood').count()
paris_grouped

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue distance,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Archives,50,50,50,50,50,50,50
Arsenal,34,34,34,34,34,34,34
Arts-et-Métiers,50,50,50,50,50,50,50
Bonne-Nouvelle,49,49,49,49,49,49,49
Ecole-Militaire,32,32,32,32,32,32,32
Enfants-Rouges,50,50,50,50,50,50,50
Gaillon,49,49,49,49,49,49,49
Gros-Caillou,27,27,27,27,27,27,27
Halles,50,50,50,50,50,50,50
Invalides,20,20,20,20,20,20,20


Let's find out how many unique categories can be curated from all the returned venues


In [488]:
print('There are {} uniques categories.'.format(len(paris_venues_cn['Venue Category'].unique())))

There are 19 uniques categories.


In [489]:
paris_venues_cn

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue distance,Venue Category
0,Mail,48.8680083374,2.34469912743,Mr Zhao,48.869766,2.343644,210,Chinese Restaurant
1,Mail,48.8680083374,2.34469912743,Au Piment du Centre,48.859342,2.348966,1014,Chinese Restaurant
2,Mail,48.8680083374,2.34469912743,Hao Long,48.865660,2.336954,624,Chinese Restaurant
3,Mail,48.8680083374,2.34469912743,Traiteur Chez Zhang,48.861904,2.344043,681,Chinese Restaurant
4,Mail,48.8680083374,2.34469912743,Tafa,48.867023,2.336098,639,Chinese Restaurant
...,...,...,...,...,...,...,...,...
1223,Notre-Dame-des-Champs,48.846427594,2.32735687823,La Maison de thé,48.850964,2.332814,644,Chinese Restaurant
1224,Notre-Dame-des-Champs,48.846427594,2.32735687823,Han Yuan Xuan 翰苑蓒,48.841003,2.337439,954,Chinese Restaurant
1225,Notre-Dame-des-Champs,48.846427594,2.32735687823,Feng Man,48.843813,2.315072,945,Chinese Restaurant
1226,Notre-Dame-des-Champs,48.846427594,2.32735687823,Taokan,48.852635,2.331434,752,Chinese Restaurant


## 3. Analyze Each Neighborhood

In [490]:
# one hot encoding
paris_onehot = pd.get_dummies(paris_venues_cn[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
paris_onehot['Neighborhood'] = paris_venues_cn['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [paris_onehot.columns[-1]] + list(paris_onehot.columns[:-1])
paris_onehot = paris_onehot[fixed_columns]

paris_onehot.head()

Unnamed: 0,Neighborhood,Asian Restaurant,BBQ Joint,Bubble Tea Shop,Cantonese Restaurant,Chinese Restaurant,Dessert Shop,Dim Sum Restaurant,Dongbei Restaurant,Fondue Restaurant,Karaoke Bar,Noodle House,Shandong Restaurant,Shanxi Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tea Room,Thai Restaurant,Vietnamese Restaurant
0,Mail,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Mail,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Mail,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Mail,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Mail,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [491]:
paris_grouped = paris_onehot.groupby('Neighborhood').mean().reset_index()
paris_grouped

Unnamed: 0,Neighborhood,Asian Restaurant,BBQ Joint,Bubble Tea Shop,Cantonese Restaurant,Chinese Restaurant,Dessert Shop,Dim Sum Restaurant,Dongbei Restaurant,Fondue Restaurant,Karaoke Bar,Noodle House,Shandong Restaurant,Shanxi Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tea Room,Thai Restaurant,Vietnamese Restaurant
0,Archives,0.04,0.02,0.0,0.02,0.78,0.0,0.04,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0
1,Arsenal,0.029412,0.029412,0.0,0.029412,0.794118,0.0,0.029412,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0
2,Arts-et-Métiers,0.06,0.0,0.0,0.0,0.76,0.0,0.04,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.08,0.0,0.0,0.0,0.0
3,Bonne-Nouvelle,0.061224,0.0,0.0,0.0,0.714286,0.0,0.040816,0.0,0.0,0.0,0.061224,0.0,0.020408,0.0,0.040816,0.040816,0.020408,0.0,0.0
4,Ecole-Militaire,0.09375,0.0,0.0,0.0,0.875,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Enfants-Rouges,0.04,0.0,0.0,0.02,0.76,0.0,0.04,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.08,0.0,0.0,0.0,0.0
6,Gaillon,0.040816,0.0,0.0,0.020408,0.653061,0.0,0.061224,0.0,0.0,0.0,0.081633,0.0,0.0,0.0,0.061224,0.040816,0.020408,0.020408,0.0
7,Gros-Caillou,0.037037,0.0,0.0,0.0,0.925926,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Halles,0.04,0.0,0.0,0.0,0.74,0.0,0.04,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.08,0.04,0.02,0.0,0.0
9,Invalides,0.1,0.0,0.0,0.0,0.85,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [492]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [493]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = paris_grouped['Neighborhood']

for ind in np.arange(paris_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(paris_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Archives,Chinese Restaurant,Szechuan Restaurant,Asian Restaurant,Noodle House,Dim Sum Restaurant,BBQ Joint,Cantonese Restaurant,Dongbei Restaurant,Bubble Tea Shop,Dessert Shop
1,Arsenal,Chinese Restaurant,Szechuan Restaurant,Asian Restaurant,BBQ Joint,Cantonese Restaurant,Noodle House,Dim Sum Restaurant,Dongbei Restaurant,Bubble Tea Shop,Dessert Shop
2,Arts-et-Métiers,Chinese Restaurant,Szechuan Restaurant,Asian Restaurant,Noodle House,Dim Sum Restaurant,Shanxi Restaurant,Dongbei Restaurant,BBQ Joint,Bubble Tea Shop,Cantonese Restaurant
3,Bonne-Nouvelle,Chinese Restaurant,Asian Restaurant,Noodle House,Dim Sum Restaurant,Taiwanese Restaurant,Szechuan Restaurant,Tea Room,Shanxi Restaurant,BBQ Joint,Bubble Tea Shop
4,Ecole-Militaire,Chinese Restaurant,Asian Restaurant,Dessert Shop,Fondue Restaurant,BBQ Joint,Bubble Tea Shop,Cantonese Restaurant,Dim Sum Restaurant,Dongbei Restaurant,Vietnamese Restaurant


## A.4 Cluster Neighborhoods

In [494]:
# set number of clusters
kclusters = 5

paris_grouped_clustering = paris_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(paris_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 4, 4, 2, 0, 4, 2, 0, 4, 0], dtype=int32)

In [495]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

paris_merged = paris_arrondissements_districts_data.copy()
#paris_merged = paris_arrondissements_districts.copy()
paris_merged = paris_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
paris_merged.dropna(inplace=True)
paris_merged.loc[:, 'Cluster Labels'] = paris_merged.loc[:, 'Cluster Labels'].astype(int)
paris_merged

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bourse,Mail,48.8680083374,2.34469912743,2,Chinese Restaurant,Dim Sum Restaurant,Noodle House,Asian Restaurant,Szechuan Restaurant,Tea Room,Taiwanese Restaurant,Cantonese Restaurant,BBQ Joint,Bubble Tea Shop
1,Bourse,Bonne-Nouvelle,48.8671501183,2.35008019041,2,Chinese Restaurant,Asian Restaurant,Noodle House,Dim Sum Restaurant,Taiwanese Restaurant,Szechuan Restaurant,Tea Room,Shanxi Restaurant,BBQ Joint,Bubble Tea Shop
2,Bourse,Gaillon,48.8693066381,2.33343180766,2,Chinese Restaurant,Noodle House,Dim Sum Restaurant,Szechuan Restaurant,Asian Restaurant,Taiwanese Restaurant,Tea Room,Cantonese Restaurant,Thai Restaurant,BBQ Joint
3,Bourse,Vivienne,48.8691001998,2.33946074375,2,Chinese Restaurant,Asian Restaurant,Dim Sum Restaurant,Szechuan Restaurant,Noodle House,Taiwanese Restaurant,Tea Room,Cantonese Restaurant,BBQ Joint,Bubble Tea Shop
4,Hôtel-de-Ville,Arsenal,48.851585175,2.36476795387,4,Chinese Restaurant,Szechuan Restaurant,Asian Restaurant,BBQ Joint,Cantonese Restaurant,Noodle House,Dim Sum Restaurant,Dongbei Restaurant,Bubble Tea Shop,Dessert Shop
5,Hôtel-de-Ville,Saint-Merri,48.8585213723,2.35166696714,4,Chinese Restaurant,Noodle House,Asian Restaurant,Szechuan Restaurant,Tea Room,Cantonese Restaurant,Dim Sum Restaurant,Dongbei Restaurant,BBQ Joint,Bubble Tea Shop
6,Hôtel-de-Ville,Notre-Dame,48.8528955862,2.35277501212,4,Chinese Restaurant,Szechuan Restaurant,Asian Restaurant,Cantonese Restaurant,Noodle House,Tea Room,Dim Sum Restaurant,Dongbei Restaurant,BBQ Joint,Bubble Tea Shop
7,Hôtel-de-Ville,Saint-Gervais,48.8557186509,2.35816233385,4,Chinese Restaurant,Szechuan Restaurant,Asian Restaurant,Cantonese Restaurant,BBQ Joint,Noodle House,Dim Sum Restaurant,Dongbei Restaurant,Bubble Tea Shop,Dessert Shop
8,Panthéon,Jardin-des-Plantes,48.8419401934,2.35689388962,4,Chinese Restaurant,Szechuan Restaurant,Shandong Restaurant,Noodle House,Dongbei Restaurant,Bubble Tea Shop,Dim Sum Restaurant,Vietnamese Restaurant,BBQ Joint,Cantonese Restaurant
9,Panthéon,Sorbonne,48.8490447659,2.34574660019,1,Chinese Restaurant,Asian Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Noodle House,Cantonese Restaurant,Karaoke Bar,Tea Room,Shandong Restaurant,Shanxi Restaurant


### A.4.1 Create cluster map

In [496]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(paris_merged['Latitude'], 
                                  paris_merged['Longitude'], 
                                  paris_merged['Neighborhood'], 
                                  paris_merged['Cluster Labels']):
    
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [497]:
paris_merged.loc[paris_merged['Cluster Labels'] == 0, paris_merged.columns[[1] + list(range(5, paris_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Invalides,Chinese Restaurant,Asian Restaurant,Dim Sum Restaurant,Fondue Restaurant,BBQ Joint,Bubble Tea Shop,Cantonese Restaurant,Dessert Shop,Dongbei Restaurant,Vietnamese Restaurant
13,Ecole-Militaire,Chinese Restaurant,Asian Restaurant,Dessert Shop,Fondue Restaurant,BBQ Joint,Bubble Tea Shop,Cantonese Restaurant,Dim Sum Restaurant,Dongbei Restaurant,Vietnamese Restaurant
14,Gros-Caillou,Chinese Restaurant,Asian Restaurant,Dim Sum Restaurant,Fondue Restaurant,BBQ Joint,Bubble Tea Shop,Cantonese Restaurant,Dessert Shop,Dongbei Restaurant,Vietnamese Restaurant


In [498]:
paris_merged.loc[paris_merged['Cluster Labels'] == 1, paris_merged.columns[[1] + list(range(5, paris_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Sorbonne,Chinese Restaurant,Asian Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Noodle House,Cantonese Restaurant,Karaoke Bar,Tea Room,Shandong Restaurant,Shanxi Restaurant
21,Palais-Royal,Chinese Restaurant,Tea Room,Taiwanese Restaurant,Noodle House,Asian Restaurant,Dim Sum Restaurant,Szechuan Restaurant,Cantonese Restaurant,BBQ Joint,Bubble Tea Shop
22,Saint-Germain-l'Auxerrois,Chinese Restaurant,Dim Sum Restaurant,Taiwanese Restaurant,Asian Restaurant,Tea Room,Szechuan Restaurant,Sushi Restaurant,Noodle House,Cantonese Restaurant,BBQ Joint
23,Place-Vendôme,Chinese Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Noodle House,Vietnamese Restaurant,Dim Sum Restaurant,Sushi Restaurant,Cantonese Restaurant,BBQ Joint,Bubble Tea Shop
25,Monnaie,Chinese Restaurant,Noodle House,Dim Sum Restaurant,Tea Room,Szechuan Restaurant,Cantonese Restaurant,Vietnamese Restaurant,Dongbei Restaurant,BBQ Joint,Bubble Tea Shop


In [499]:
paris_merged.loc[paris_merged['Cluster Labels'] == 2, paris_merged.columns[[1] + list(range(5, paris_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Mail,Chinese Restaurant,Dim Sum Restaurant,Noodle House,Asian Restaurant,Szechuan Restaurant,Tea Room,Taiwanese Restaurant,Cantonese Restaurant,BBQ Joint,Bubble Tea Shop
1,Bonne-Nouvelle,Chinese Restaurant,Asian Restaurant,Noodle House,Dim Sum Restaurant,Taiwanese Restaurant,Szechuan Restaurant,Tea Room,Shanxi Restaurant,BBQ Joint,Bubble Tea Shop
2,Gaillon,Chinese Restaurant,Noodle House,Dim Sum Restaurant,Szechuan Restaurant,Asian Restaurant,Taiwanese Restaurant,Tea Room,Cantonese Restaurant,Thai Restaurant,BBQ Joint
3,Vivienne,Chinese Restaurant,Asian Restaurant,Dim Sum Restaurant,Szechuan Restaurant,Noodle House,Taiwanese Restaurant,Tea Room,Cantonese Restaurant,BBQ Joint,Bubble Tea Shop


In [500]:
paris_merged.loc[paris_merged['Cluster Labels'] == 3, paris_merged.columns[[1] + list(range(5, paris_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Saint-Thomas-d'Aquin,Chinese Restaurant,Asian Restaurant,Fondue Restaurant,Dim Sum Restaurant,BBQ Joint,Bubble Tea Shop,Cantonese Restaurant,Dessert Shop,Dongbei Restaurant,Vietnamese Restaurant
24,Saint-Germain-des-Prés,Chinese Restaurant,Asian Restaurant,Cantonese Restaurant,Dim Sum Restaurant,Noodle House,Fondue Restaurant,Karaoke Bar,Tea Room,Shandong Restaurant,Shanxi Restaurant
27,Notre-Dame-des-Champs,Chinese Restaurant,Asian Restaurant,Dim Sum Restaurant,Fondue Restaurant,Cantonese Restaurant,Noodle House,BBQ Joint,Bubble Tea Shop,Dessert Shop,Dongbei Restaurant


In [501]:
paris_merged.loc[paris_merged['Cluster Labels'] == 4, paris_merged.columns[[1] + list(range(5, paris_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Arsenal,Chinese Restaurant,Szechuan Restaurant,Asian Restaurant,BBQ Joint,Cantonese Restaurant,Noodle House,Dim Sum Restaurant,Dongbei Restaurant,Bubble Tea Shop,Dessert Shop
5,Saint-Merri,Chinese Restaurant,Noodle House,Asian Restaurant,Szechuan Restaurant,Tea Room,Cantonese Restaurant,Dim Sum Restaurant,Dongbei Restaurant,BBQ Joint,Bubble Tea Shop
6,Notre-Dame,Chinese Restaurant,Szechuan Restaurant,Asian Restaurant,Cantonese Restaurant,Noodle House,Tea Room,Dim Sum Restaurant,Dongbei Restaurant,BBQ Joint,Bubble Tea Shop
7,Saint-Gervais,Chinese Restaurant,Szechuan Restaurant,Asian Restaurant,Cantonese Restaurant,BBQ Joint,Noodle House,Dim Sum Restaurant,Dongbei Restaurant,Bubble Tea Shop,Dessert Shop
8,Jardin-des-Plantes,Chinese Restaurant,Szechuan Restaurant,Shandong Restaurant,Noodle House,Dongbei Restaurant,Bubble Tea Shop,Dim Sum Restaurant,Vietnamese Restaurant,BBQ Joint,Cantonese Restaurant
10,Val-de-Grâce,Chinese Restaurant,Noodle House,Szechuan Restaurant,Shandong Restaurant,Dim Sum Restaurant,Vietnamese Restaurant,Dongbei Restaurant,BBQ Joint,Bubble Tea Shop,Cantonese Restaurant
11,Saint-Victor,Chinese Restaurant,Asian Restaurant,Szechuan Restaurant,Noodle House,Dim Sum Restaurant,Cantonese Restaurant,Shandong Restaurant,Dongbei Restaurant,BBQ Joint,Bubble Tea Shop
16,Sainte-Avoie,Chinese Restaurant,Szechuan Restaurant,Asian Restaurant,Noodle House,Dim Sum Restaurant,Tea Room,Dongbei Restaurant,BBQ Joint,Bubble Tea Shop,Cantonese Restaurant
17,Arts-et-Métiers,Chinese Restaurant,Szechuan Restaurant,Asian Restaurant,Noodle House,Dim Sum Restaurant,Shanxi Restaurant,Dongbei Restaurant,BBQ Joint,Bubble Tea Shop,Cantonese Restaurant
18,Archives,Chinese Restaurant,Szechuan Restaurant,Asian Restaurant,Noodle House,Dim Sum Restaurant,BBQ Joint,Cantonese Restaurant,Dongbei Restaurant,Bubble Tea Shop,Dessert Shop


In [502]:
map_clusters.save('plot_data.html')

# Appendicites

* [Vasserot 'Quartiers' (Neighbourhoods) (1790-1860)](https://geo.nyu.edu/catalog/stanford-ry227xq8127)