# How to predict the best business to open in Rome

### Paolo Mammoliti

August 13, 2020


Importing All the packages and libraries needed for this project

In [2]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import numpy as np
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
from pandas.io.json import json_normalize  # tranform JSON file into a pandas dataframe

!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

# import k-means from clustering stage
from sklearn.cluster import KMeans

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    openssl-1.1.1g             |       h516909a_1         2.1 MB  conda-forge
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    certifi-2020.6.20          |   py36h9f0ad1d_0         151 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    ------------------------------------------------------------
                       

I'll now use a wikipedia page to get the Roma's borough data located in a table

In [3]:
source = requests.get("https://it.wikipedia.org/wiki/Municipi_di_Roma").text
soup = BeautifulSoup(source, 'lxml')

table = soup.find("table")
table_rows = table.tbody.find_all("tr")

res = []
for tr in table_rows:
    td = tr.find_all("td")
    row = [tr.text for tr in td]
    
    # Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
    if row != [] and row[1] != "Not assigned":
        # If a cell has a borough but a "Not assigned" neighborhood, then the neighborhood will be the same as the borough.
        if "Not assigned" in row[2]: 
            row[2] = row[1]
        res.append(row)

# Dataframe with 3 columns
df = pd.DataFrame(res, columns = ["Borough","Population","Area","Density","President"])
df.head()

Unnamed: 0,Borough,Population,Area,Density,President
0,I Centro Storico\n,170 328\n,"20,09\n","8 478,25\n",Sabrina Alfonsi (PD)\n
1,II Parioli/Nomentano\n,168 410\n,"19,66\n","8 566,12\n",Francesca Del Bello (PD)\n
2,III Monte Sacro\n,205 832\n,"98,03\n","2 099,68\n",Giovanni Caudo (centrosinistra)\n
3,IV Tiburtino\n,175 921\n,"48,94\n","3 594,63\n",Virginia Raggi (Commissario Straordinario)\n
4,V Prenestino/Centocelle\n,245 073\n,"26,92\n","9 103,75\n",Giovanni Boccuzzi (M5S)\n


Replacing some weirds characters with the empty ones

In [4]:
df["Population"] = df["Population"].str.replace("\n","")
df["Borough"] = df["Borough"].str.replace("\n","")
df["Area"] = df["Area"].str.replace("\n","")
df["Density"] = df["Density"].str.replace("\n","")
df["President"] = df["President"].str.replace("\n","")
df.head()

Unnamed: 0,Borough,Population,Area,Density,President
0,I Centro Storico,170 328,2009,"8 478,25",Sabrina Alfonsi (PD)
1,II Parioli/Nomentano,168 410,1966,"8 566,12",Francesca Del Bello (PD)
2,III Monte Sacro,205 832,9803,"2 099,68",Giovanni Caudo (centrosinistra)
3,IV Tiburtino,175 921,4894,"3 594,63",Virginia Raggi (Commissario Straordinario)
4,V Prenestino/Centocelle,245 073,2692,"9 103,75",Giovanni Boccuzzi (M5S)


Dropping a no needed data frame column

In [5]:
df.drop('President', axis=1, inplace=True)

Dropping the last two rows of the data frame with some info not needed

In [6]:
df.drop([15, 16])

Unnamed: 0,Borough,Population,Area,Density
0,I Centro Storico,170 328,2009,"8 478,25"
1,II Parioli/Nomentano,168 410,1966,"8 566,12"
2,III Monte Sacro,205 832,9803,"2 099,68"
3,IV Tiburtino,175 921,4894,"3 594,63"
4,V Prenestino/Centocelle,245 073,2692,"9 103,75"
5,VI Roma delle Torri,257 556,11388,"2 261,64"
6,VII Appio-Latino/Tuscolana/Cinecittà,307 184,4584,"6 701,22"
7,VIII Appia Antica,130 784,4715,"2 773,79"
8,IX Eur,183 343,18331,"1 000,18"
9,X Ostia/Acilia,231 701,15074,"1 537,09"


Now we'll load and import a csv file with the Roma's boroughs latitude and longitude data. I'll remove the sentitive credentials to share the notebook

In [7]:

import types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.

# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_data_3 = pd.read_csv(body)
df_data_3.head()


Unnamed: 0,Borough,Latitude,Longitude
0,I Centro Storico,41.893056,12.482778
1,II Parioli/Nomentano,41.929958,12.518931
2,III Monte Sacro,41.93608,12.535116
3,IV Tiburtino,41.933491,12.598746
4,V Prenestino/Centocelle,41.890665,12.548488


In [8]:
df_data_3

Unnamed: 0,Borough,Latitude,Longitude
0,I Centro Storico,41.893056,12.482778
1,II Parioli/Nomentano,41.929958,12.518931
2,III Monte Sacro,41.93608,12.535116
3,IV Tiburtino,41.933491,12.598746
4,V Prenestino/Centocelle,41.890665,12.548488
5,VI Roma delle Torri,41.869657,12.632731
6,VII Appio-Latino/Tuscolana/Cinecitta,41.8817,12.5228
7,VIII Appia Antica,41.841228,12.48429
8,IX Eur,41.814879,12.47998
9,X Ostia/Acilia,41.73066,12.280531


I'll now merge the two dataframe created above

In [9]:
df_Roma = pd.merge(df, df_data_3, how='right', left_on = 'Borough', right_on = 'Borough')
# remove the "Postal Code" column
df_Roma.head()

Unnamed: 0,Borough,Population,Area,Density,Latitude,Longitude
0,I Centro Storico,170 328,2009,"8 478,25",41.893056,12.482778
1,II Parioli/Nomentano,168 410,1966,"8 566,12",41.929958,12.518931
2,III Monte Sacro,205 832,9803,"2 099,68",41.93608,12.535116
3,IV Tiburtino,175 921,4894,"3 594,63",41.933491,12.598746
4,V Prenestino/Centocelle,245 073,2692,"9 103,75",41.890665,12.548488


In [10]:
df_Roma

Unnamed: 0,Borough,Population,Area,Density,Latitude,Longitude
0,I Centro Storico,170 328,2009.0,"8 478,25",41.893056,12.482778
1,II Parioli/Nomentano,168 410,1966.0,"8 566,12",41.929958,12.518931
2,III Monte Sacro,205 832,9803.0,"2 099,68",41.93608,12.535116
3,IV Tiburtino,175 921,4894.0,"3 594,63",41.933491,12.598746
4,V Prenestino/Centocelle,245 073,2692.0,"9 103,75",41.890665,12.548488
5,VI Roma delle Torri,257 556,11388.0,"2 261,64",41.869657,12.632731
6,VIII Appia Antica,130 784,4715.0,"2 773,79",41.841228,12.48429
7,IX Eur,183 343,18331.0,"1 000,18",41.814879,12.47998
8,X Ostia/Acilia,231 701,15074.0,"1 537,09",41.73066,12.280531
9,XI Arvalia/Portuense,155 652,7148.0,"2 177,56",41.855282,12.444762


Removing 3 more columns

In [11]:
df_Roma.drop(["Population","Area","Density"], axis=1, inplace=True)

Using the geolocator library to find the geocrapical coordinate of Rome

In [12]:
address = "Rome, IT"

geolocator = Nominatim(user_agent="roma_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Rome are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Rome are 41.8933203, 12.4829321.


Creating the Roma map with Folium

In [13]:
# create map of Rome using latitude and longitude values
map_Roma = folium.Map(location=[latitude, longitude], zoom_start=10)
map_Roma

Adding the boroughs circle marker with Folium using the data frame created

In [14]:
for lat, lng, borough in zip(
        df_Roma['Latitude'], 
        df_Roma['Longitude'], 
        df_Roma['Borough']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Roma)  

map_Roma

Adding my Foursquare API credentials to find the venues (this will be removed to share the notebook)

In [15]:
CLIENT_ID = ''
CLIENT_SECRET = ''
VERSION = ''

In [16]:
borough_name = df_Roma.loc[0, 'Borough']
print(f"The first neighborhood's name is '{borough_name}'.")

The first neighborhood's name is 'I Centro Storico'.


In [17]:
borough_latitude = df_Roma.loc[0, 'Latitude'] # neighborhood latitude value
borough_longitude = df_Roma.loc[0, 'Longitude'] # neighborhood longitude value

print('Latitude and longitude values of {} are {}, {}.'.format(borough_name, 
                                                               borough_latitude, 
                                                               borough_longitude))

Latitude and longitude values of I Centro Storico are 41.893056, 12.482778.


In [18]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    borough_latitude, 
    borough_longitude, 
    radius, 
    LIMIT)

# get the result to a json file
results = requests.get(url).json()

In [19]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Finding a venues list situated in the first borough "I Centro Storico"

In [20]:
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Piazza del Campidoglio,Plaza,41.893321,12.482956
1,Terrazza delle Quadrighe,Scenic Lookout,41.894346,12.483336
2,Foro di Cesare,Historic Site,41.894128,12.485232
3,Musei Capitolini,Art Museum,41.893343,12.482885
4,Capitoline Hill (Campidoglio),Scenic Lookout,41.893462,12.483588
5,Teatro Della Cometa,Theater,41.893399,12.481593
6,Foro di Traiano,Historic Site,41.894729,12.484871
7,Tempio di Vespasiano e Tito,Temple,41.892494,12.483688
8,Arco di Settimio Severo,Monument / Landmark,41.892894,12.484658
9,Terrazza Caffarelli,Scenic Lookout,41.892589,12.481666


In [21]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    
    for name, lat, lng in zip(names, latitudes, longitudes):
        # print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Borough Latitude', 
                  'Borough Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [22]:
df_Roma_venues = getNearbyVenues(names=df_Roma['Borough'],
                                   latitudes=df_Roma['Latitude'],
                                   longitudes=df_Roma['Longitude']
                                  )

In [23]:
df_Roma_venues.head()

Unnamed: 0,Borough,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,I Centro Storico,41.893056,12.482778,Piazza del Campidoglio,41.893321,12.482956,Plaza
1,I Centro Storico,41.893056,12.482778,Terrazza delle Quadrighe,41.894346,12.483336,Scenic Lookout
2,I Centro Storico,41.893056,12.482778,Foro di Cesare,41.894128,12.485232,Historic Site
3,I Centro Storico,41.893056,12.482778,Musei Capitolini,41.893343,12.482885,Art Museum
4,I Centro Storico,41.893056,12.482778,Capitoline Hill (Campidoglio),41.893462,12.483588,Scenic Lookout


Calculating the number of venue's categories on each borough

In [24]:
df_Roma_venues.groupby('Borough').count()

Unnamed: 0_level_0,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
I Centro Storico,70,70,70,70,70,70
II Parioli/Nomentano,30,30,30,30,30,30
III Monte Sacro,25,25,25,25,25,25
IV Tiburtino,9,9,9,9,9,9
IX Eur,5,5,5,5,5,5
V Prenestino/Centocelle,11,11,11,11,11,11
VI Roma delle Torri,7,7,7,7,7,7
VII Appio-Latino/Tuscolana/Cinecitta,14,14,14,14,14,14
VIII Appia Antica,14,14,14,14,14,14
X Ostia/Acilia,45,45,45,45,45,45


In [25]:
print('There are {} uniques categories.'.format(len(df_Roma_venues['Venue Category'].unique())))

There are 83 uniques categories.


In [26]:
# one hot encoding
df_Roma_onehot = pd.get_dummies(df_Roma_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
df_Roma_onehot['Borough'] = df_Roma_venues['Borough'] 

# move neighborhood column to the first column
fixed_columns = [df_Roma_onehot.columns[-1]] + list(df_Roma_onehot.columns[:-1])
df_Roma_onehot = df_Roma_onehot[fixed_columns]

df_Roma_onehot.head()

Unnamed: 0,Borough,African Restaurant,American Restaurant,Art Gallery,Art Museum,Asian Restaurant,Bakery,Bar,Basketball Court,Beach,...,Steakhouse,Supermarket,Temple,Thai Restaurant,Theater,Tourist Information Center,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Wine Bar
0,I Centro Storico,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,I Centro Storico,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,I Centro Storico,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,I Centro Storico,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,I Centro Storico,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Calculatering the percentage of the venues categories on each borough

In [27]:
df_Roma_grouped = df_Roma_onehot.groupby('Borough').mean().reset_index()
df_Roma_grouped.head()

Unnamed: 0,Borough,African Restaurant,American Restaurant,Art Gallery,Art Museum,Asian Restaurant,Bakery,Bar,Basketball Court,Beach,...,Steakhouse,Supermarket,Temple,Thai Restaurant,Theater,Tourist Information Center,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Wine Bar
0,I Centro Storico,0.0,0.0,0.0,0.014286,0.0,0.0,0.028571,0.0,0.0,...,0.0,0.0,0.085714,0.0,0.014286,0.014286,0.028571,0.0,0.0,0.0
1,II Parioli/Nomentano,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,III Monte Sacro,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0
3,IV Tiburtino,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,IX Eur,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Discovering the most commons venue's categories on each borough

In [28]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Borough_venues_sorted = pd.DataFrame(columns=columns)
Borough_venues_sorted['Borough'] = df_Roma_grouped['Borough']

for ind in np.arange(df_Roma_grouped.shape[0]):
    Borough_venues_sorted.iloc[ind, 1:] = return_most_common_venues(df_Roma_grouped.iloc[ind, :], num_top_venues)

Borough_venues_sorted

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,I Centro Storico,Historic Site,Hotel,Temple,Monument / Landmark,Pub,Scenic Lookout,Italian Restaurant,Fountain,Garden,Museum
1,II Parioli/Nomentano,Ice Cream Shop,Dessert Shop,Italian Restaurant,Pastry Shop,Plaza,Café,Bookstore,Japanese Restaurant,Park,Pizza Place
2,III Monte Sacro,Pizza Place,Cocktail Bar,Café,Hotel,Beer Garden,Mexican Restaurant,Plaza,Pub,Japanese Restaurant,Boutique
3,IV Tiburtino,Café,Shoe Store,American Restaurant,Hotel,Rental Car Location,Italian Restaurant,Fast Food Restaurant,Motorcycle Shop,Fountain,Department Store
4,IX Eur,Pizza Place,Hotel,Restaurant,Dessert Shop,Fountain,Cocktail Bar,Convention Center,Department Store,Falafel Restaurant,Fast Food Restaurant
5,V Prenestino/Centocelle,Gym,Sandwich Place,Italian Restaurant,Light Rail Station,Market,Noodle House,Pizza Place,Café,African Restaurant,Art Gallery
6,VI Roma delle Torri,Plaza,Theater,Bus Station,Shopping Mall,Supermarket,Pizza Place,Hotel,German Restaurant,Falafel Restaurant,Clothing Store
7,VII Appio-Latino/Tuscolana/Cinecitta,Plaza,Hotel,Trattoria/Osteria,Italian Restaurant,Bistro,Photography Lab,Pizza Place,Hostel,Thai Restaurant,Asian Restaurant
8,VIII Appia Antica,Café,Italian Restaurant,Plaza,Park,Salad Place,Mexican Restaurant,Japanese Restaurant,Bistro,Garden,Furniture / Home Store
9,X Ostia/Acilia,Pizza Place,Café,Beach,Italian Restaurant,Seafood Restaurant,Hotel,Ice Cream Shop,Fast Food Restaurant,Cocktail Bar,Restaurant


Creating 5 random clusters showing the closest boroughs based on the venues discriminance

In [29]:
# set number of clusters
kclusters = 5

df_Roma_grouped_clustering = df_Roma_grouped.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df_Roma_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 2, 2, 1, 3, 2, 2, 2, 1, 2], dtype=int32)

In [30]:
# add clustering labels
Borough_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

df_Roma_merged = df_Roma

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
df_Roma_merged = df_Roma_merged.join(Borough_venues_sorted.set_index('Borough'), on='Borough')

df_Roma_merged.head() # check the last columns!

Unnamed: 0,Borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,I Centro Storico,41.893056,12.482778,0,Historic Site,Hotel,Temple,Monument / Landmark,Pub,Scenic Lookout,Italian Restaurant,Fountain,Garden,Museum
1,II Parioli/Nomentano,41.929958,12.518931,2,Ice Cream Shop,Dessert Shop,Italian Restaurant,Pastry Shop,Plaza,Café,Bookstore,Japanese Restaurant,Park,Pizza Place
2,III Monte Sacro,41.93608,12.535116,2,Pizza Place,Cocktail Bar,Café,Hotel,Beer Garden,Mexican Restaurant,Plaza,Pub,Japanese Restaurant,Boutique
3,IV Tiburtino,41.933491,12.598746,1,Café,Shoe Store,American Restaurant,Hotel,Rental Car Location,Italian Restaurant,Fast Food Restaurant,Motorcycle Shop,Fountain,Department Store
4,V Prenestino/Centocelle,41.890665,12.548488,2,Gym,Sandwich Place,Italian Restaurant,Light Rail Station,Market,Noodle House,Pizza Place,Café,African Restaurant,Art Gallery


In [31]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(
        df_Roma_merged['Latitude'], 
        df_Roma_merged['Longitude'], 
        df_Roma_merged['Borough'], 
        df_Roma_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [32]:
df_Roma_merged.loc[df_Roma_merged['Cluster Labels'] == 0, df_Roma_merged.columns[[0] + list(range(3, df_Roma_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,I Centro Storico,0,Historic Site,Hotel,Temple,Monument / Landmark,Pub,Scenic Lookout,Italian Restaurant,Fountain,Garden,Museum


In [33]:
df_Roma_merged.loc[df_Roma_merged['Cluster Labels'] == 1, df_Roma_merged.columns[[0] + list(range(3, df_Roma_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,IV Tiburtino,1,Café,Shoe Store,American Restaurant,Hotel,Rental Car Location,Italian Restaurant,Fast Food Restaurant,Motorcycle Shop,Fountain,Department Store
6,VIII Appia Antica,1,Café,Italian Restaurant,Plaza,Park,Salad Place,Mexican Restaurant,Japanese Restaurant,Bistro,Garden,Furniture / Home Store


In [34]:
df_Roma_merged.loc[df_Roma_merged['Cluster Labels'] == 2, df_Roma_merged.columns[[0] + list(range(3, df_Roma_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,II Parioli/Nomentano,2,Ice Cream Shop,Dessert Shop,Italian Restaurant,Pastry Shop,Plaza,Café,Bookstore,Japanese Restaurant,Park,Pizza Place
2,III Monte Sacro,2,Pizza Place,Cocktail Bar,Café,Hotel,Beer Garden,Mexican Restaurant,Plaza,Pub,Japanese Restaurant,Boutique
4,V Prenestino/Centocelle,2,Gym,Sandwich Place,Italian Restaurant,Light Rail Station,Market,Noodle House,Pizza Place,Café,African Restaurant,Art Gallery
5,VI Roma delle Torri,2,Plaza,Theater,Bus Station,Shopping Mall,Supermarket,Pizza Place,Hotel,German Restaurant,Falafel Restaurant,Clothing Store
8,X Ostia/Acilia,2,Pizza Place,Café,Beach,Italian Restaurant,Seafood Restaurant,Hotel,Ice Cream Shop,Fast Food Restaurant,Cocktail Bar,Restaurant
9,XI Arvalia/Portuense,2,Gym / Fitness Center,Park,Pizza Place,Supermarket,Café,Bistro,Gym,Italian Restaurant,Fast Food Restaurant,Clothing Store
10,XII Monte Verde,2,Pizza Place,Café,Ice Cream Shop,Italian Restaurant,Restaurant,Sandwich Place,Garden,Food,Market,Plaza
11,XIII Aurelio,2,Hotel,Café,Plaza,Italian Restaurant,Supermarket,Steakhouse,Clothing Store,Gym / Fitness Center,Convention Center,Department Store
13,XV Milvio,2,Café,Trattoria/Osteria,Restaurant,Brewery,Basketball Court,Light Rail Station,Sandwich Place,Bookstore,Fountain,Department Store
14,VII Appio-Latino/Tuscolana/Cinecitta,2,Plaza,Hotel,Trattoria/Osteria,Italian Restaurant,Bistro,Photography Lab,Pizza Place,Hostel,Thai Restaurant,Asian Restaurant


In [35]:
df_Roma_merged.loc[df_Roma_merged['Cluster Labels'] == 3, df_Roma_merged.columns[[0] + list(range(3, df_Roma_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,IX Eur,3,Pizza Place,Hotel,Restaurant,Dessert Shop,Fountain,Cocktail Bar,Convention Center,Department Store,Falafel Restaurant,Fast Food Restaurant


In [36]:
df_Roma_merged.loc[df_Roma_merged['Cluster Labels'] == 4, df_Roma_merged.columns[[0] + list(range(3, df_Roma_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,XIV Monte Mario,4,Chinese Restaurant,Park,Italian Restaurant,Supermarket,Wine Bar,French Restaurant,Department Store,Dessert Shop,Falafel Restaurant,Fast Food Restaurant
