# Searching where live in Lima


In [30]:
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import folium
import requests
from tqdm import tqdm
from collections import deque
import matplotlib.cm as cm
import matplotlib.colors as colors

## 1. Load data

I load the data generated in the previous notebook and work with only boroughs that contain the word Toronto  

In [6]:
data = pd.read_csv("lima.csv")
data.head()


Unnamed: 0,Código UBIGEO,Centro poblado,Distrito,Provincia,Región,LATITUD,LONGITUD,CLASIFICACIÓN INEI
0,101010001,CHACHAPOYAS,CHACHAPOYAS,CHACHAPOYAS,AMAZONAS,-6.2293,-77.87241,URBANO
1,102010001,BAGUA,BAGUA,BAGUA,AMAZONAS,-5.638645,-78.53141,URBANO
2,103070007,CHAQUIL,JAZAN,BONGARA,AMAZONAS,-5.934738,-77.97297,RURAL
3,103070022,BARRIO PUERTO SAN JERONIMO,JAZAN,BONGARA,AMAZONAS,-5.950358,-77.97885,RURAL
4,103070052,PEDRO RUIZ,JAZAN,BONGARA,AMAZONAS,-5.944999,-77.97896,RURAL


Select central districts of Lima

In [47]:
LimaData = data[data.Provincia=='LIMA']
centralDistricts = ['LIMA', 'BARRANCO', 'BREÑA','CHORRILLOS',
       'JESUS MARIA', 'LA MOLINA', 'LA VICTORIA',
       'LINCE', 'LOS OLIVOS', 
       'MAGDALENA DEL MAR', 'PUEBLO LIBRE', 'MIRAFLORES',
       'SAN BORJA', 'SAN ISIDRO', 'SAN LUIS',
       'SAN MIGUEL', 'SANTA ANITA',
       'SANTIAGO DE SURCO', 'SURQUILLO']
LimaData = LimaData[LimaData['Distrito'].isin(centralDistricts)]
LimaData.head()

Unnamed: 0,Código UBIGEO,Centro poblado,Distrito,Provincia,Región,LATITUD,LONGITUD,CLASIFICACIÓN INEI
717,1501010001,LIMA,LIMA,LIMA,LIMA,-12.046679,-77.0323,URBANO
719,1501040001,BARRANCO,BARRANCO,LIMA,LIMA,-12.149599,-77.02474,URBANO
720,1501050001,BREÑA,BREÑA,LIMA,LIMA,-12.05691,-77.05366,URBANO
737,1501080001,CHORRILLOS,CHORRILLOS,LIMA,LIMA,-12.174429,-77.02482,URBANO
741,1501130001,JESUS MARIA,JESUS MARIA,LIMA,LIMA,-12.069999,-77.04524,URBANO


Drop columns and rename columns 

In [48]:
LimaData.drop(['Provincia','Región'], axis=1, inplace=True)

In [93]:

LimaData.rename(columns={'Código UBIGEO':'code',
                          'Centro poblado':'neighborhood',
                          'Distrito':'district',
                         'LATITUD':'latitude',
                         'LONGITUD':'longitude',
                         'CLASIFICACIÓN INEI':'zone'}, 
                 inplace=True)
LimaData = LimaData.reset_index()
LimaData.head()

Unnamed: 0,level_0,index,code,neighborhood,district,latitude,longitude,zone
0,0,717,1501010001,LIMA,LIMA,-12.046679,-77.0323,URBANO
1,1,719,1501040001,BARRANCO,BARRANCO,-12.149599,-77.02474,URBANO
2,2,720,1501050001,BREÑA,BREÑA,-12.05691,-77.05366,URBANO
3,3,737,1501080001,CHORRILLOS,CHORRILLOS,-12.174429,-77.02482,URBANO
4,4,741,1501130001,JESUS MARIA,JESUS MARIA,-12.069999,-77.04524,URBANO


In [95]:
print('The dataframe has {} districts and {} neighborhoods.'.format(
        len(LimaData['district'].unique()),
        LimaData.shape[0]
    )
)

The dataframe has 19 districts and 19 neighborhoods.


## 2. Explore data

I just use geolocator to get the geographical coordinates of Toronto to plot the information using Folium

In [51]:
address = 'Lima, Peru'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Lima are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Lima are -12.0621065, -77.0365256.


In [110]:
# create map of New York using latitude and longitude values
map_lima = folium.Map(location=[latitude, longitude], zoom_start=11)

neighborhoods = LimaData

# add markers to map
for lat, lng, district, neighborhood in zip(neighborhoods['latitude'],
                                           neighborhoods['longitude'],
                                           neighborhoods['district'],
                                           neighborhoods['neighborhood']):
    label = '{}, {}'.format(neighborhood, district)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_lima)  
    
map_lima

### Configure Foursquare access

In [53]:
CLIENT_ID = 'xxx' # your Foursquare ID
CLIENT_SECRET = 'yyy' # your Foursquare Secret
VERSION = '20180604' # Foursquare API version


I make use of a function that takes the names and locations of the neighborhoods in Toronto and obtains the 100 top venues around.

In [54]:
def getNearbyVenues(names, zones, latitudes, longitudes, radius=500, LIMIT = 100):
    
    venues_list=[]
    for name, zone, lat, lng in tqdm(zip(names, zones, latitudes, longitudes), total = names.size):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            zone,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 'Zone',
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [55]:
Lima_venues = getNearbyVenues(LimaData['neighborhood'],
                              LimaData['zone'],
                            LimaData.latitude,
                            LimaData.longitude)

100%|██████████| 19/19 [00:05<00:00,  3.79it/s]


In [113]:
Lima_venues.shape

(607, 8)

I print the shape of my data and also a sample of the initial rows of information

In [56]:
print(Lima_venues.shape)
Lima_venues.head()

(607, 8)


Unnamed: 0,Neighborhood,Zone,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,LIMA,URBANO,-12.046679,-77.0323,Plaza Mayor de Lima,-12.045983,-77.030565,Plaza
1,LIMA,URBANO,-12.046679,-77.0323,Casa Bernardo O'Higgins,-12.047588,-77.032498,Art Gallery
2,LIMA,URBANO,-12.046679,-77.0323,Palacio Municipal de Lima,-12.045283,-77.030917,City Hall
3,LIMA,URBANO,-12.046679,-77.0323,Teatro Municipal de Lima,-12.04577,-77.034839,Theater
4,LIMA,URBANO,-12.046679,-77.0323,Galería Municipal Pancho Fierro,-12.045495,-77.031341,Art Gallery


I can check the amount of venues per neighboorhood

In [57]:
Lima_venues.groupby("Neighborhood").Venue.count().sort_values(ascending=False).head()

Neighborhood
LIMA            100
SAN ISIDRO       82
BARRANCO         74
MIRAFLORES       61
PUEBLO LIBRE     49
Name: Venue, dtype: int64

And the total amount of unique categories in my data

In [58]:
print('There are {} uniques categories.'.format(len(Lima_venues['Venue Category'].unique())))

There are 156 uniques categories.


Let us now take the venue category and zone information and create a dataframe with a one hot enconding of these data.

In [59]:
Lima_OHE = Lima_venues
Lima_OHE = pd.concat([Lima_OHE.get(['Neighborhood']),
                           pd.get_dummies(Lima_OHE['Venue Category'], prefix='Venue Category'),
                           pd.get_dummies(Lima_OHE['Zone'], prefix='Zone')],axis=1)

Lima_OHE.head()


Unnamed: 0,Neighborhood,Venue Category_American Restaurant,Venue Category_Arcade,Venue Category_Arepa Restaurant,Venue Category_Art Gallery,Venue Category_Art Museum,Venue Category_Arts & Crafts Store,Venue Category_Athletics & Sports,Venue Category_Auto Workshop,Venue Category_BBQ Joint,...,Venue Category_Theme Park Ride / Attraction,Venue Category_Toy / Game Store,Venue Category_Trail,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Veterinarian,Venue Category_Wine Bar,Venue Category_Winery,Venue Category_Wings Joint,Venue Category_Yoga Studio,Zone_URBANO
0,LIMA,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,LIMA,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,LIMA,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,LIMA,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,LIMA,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


As there are 607 unique categories, the datagrame contains all the neighborhoods and 158 columns

In [60]:
Lima_OHE.shape

(607, 158)

In [None]:
All columns:

In [61]:
Lima_OHE.columns

Index(['Neighborhood', 'Venue Category_American Restaurant',
       'Venue Category_Arcade', 'Venue Category_Arepa Restaurant',
       'Venue Category_Art Gallery', 'Venue Category_Art Museum',
       'Venue Category_Arts & Crafts Store',
       'Venue Category_Athletics & Sports', 'Venue Category_Auto Workshop',
       'Venue Category_BBQ Joint',
       ...
       'Venue Category_Theme Park Ride / Attraction',
       'Venue Category_Toy / Game Store', 'Venue Category_Trail',
       'Venue Category_Vegetarian / Vegan Restaurant',
       'Venue Category_Veterinarian', 'Venue Category_Wine Bar',
       'Venue Category_Winery', 'Venue Category_Wings Joint',
       'Venue Category_Yoga Studio', 'Zone_URBANO'],
      dtype='object', length=158)

I can compute the average number of venue categories per neighborhood

In [62]:
Lima_grouped = Lima_OHE.groupby('Neighborhood').mean().reset_index()
Lima_grouped.head()

Unnamed: 0,Neighborhood,Venue Category_American Restaurant,Venue Category_Arcade,Venue Category_Arepa Restaurant,Venue Category_Art Gallery,Venue Category_Art Museum,Venue Category_Arts & Crafts Store,Venue Category_Athletics & Sports,Venue Category_Auto Workshop,Venue Category_BBQ Joint,...,Venue Category_Theme Park Ride / Attraction,Venue Category_Toy / Game Store,Venue Category_Trail,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Veterinarian,Venue Category_Wine Bar,Venue Category_Winery,Venue Category_Wings Joint,Venue Category_Yoga Studio,Zone_URBANO
0,BARRANCO,0.013514,0.0,0.0,0.040541,0.0,0.013514,0.0,0.0,0.013514,...,0.0,0.0,0.0,0.027027,0.0,0.013514,0.0,0.013514,0.0,1.0
1,BREÑA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,CHORRILLOS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,JESUS MARIA,0.028571,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,LA MOLINA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,1.0


In total I have 19 Neighborhoods, grouped by district, and 158 distinct categories

In [63]:
Lima_grouped.shape

(19, 158)

The number of Neighborhoods obtained from the venues is smaller than the original number of postcodes, what means that in some postcodes there are no venues

For neighborhood clustering let us select only the N most frequent venue categories per neighborhood

In [64]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [65]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Lima_grouped['Neighborhood']

for ind in np.arange(Lima_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Lima_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,BARRANCO,Zone_URBANO,Venue Category_Bar,Venue Category_Restaurant,Venue Category_Peruvian Restaurant,Venue Category_Art Gallery,Venue Category_Ice Cream Shop,Venue Category_Cocktail Bar,Venue Category_Café,Venue Category_Health Food Store,Venue Category_Performing Arts Venue
1,BREÑA,Zone_URBANO,Venue Category_Restaurant,Venue Category_Juice Bar,Venue Category_Fried Chicken Joint,Venue Category_Fast Food Restaurant,Venue Category_Bus Stop,Venue Category_Seafood Restaurant,Venue Category_Latin American Restaurant,Venue Category_Pizza Place,Venue Category_Dessert Shop
2,CHORRILLOS,Zone_URBANO,Venue Category_Seafood Restaurant,Venue Category_BBQ Joint,Venue Category_Restaurant,Venue Category_Park,Venue Category_Burger Joint,Venue Category_Fried Chicken Joint,Venue Category_Football Stadium,Venue Category_Historic Site,Venue Category_Grocery Store
3,JESUS MARIA,Zone_URBANO,Venue Category_Chinese Restaurant,Venue Category_Peruvian Restaurant,Venue Category_Coffee Shop,Venue Category_Convenience Store,Venue Category_Department Store,Venue Category_Fast Food Restaurant,Venue Category_Fried Chicken Joint,Venue Category_Italian Restaurant,Venue Category_Shopping Mall
4,LA MOLINA,Zone_URBANO,Venue Category_Garden Center,Venue Category_Ice Cream Shop,Venue Category_Gym / Fitness Center,Venue Category_Gym,Venue Category_Yoga Studio,Venue Category_Arts & Crafts Store,Venue Category_Dance Studio,Venue Category_Fried Chicken Joint,Venue Category_French Restaurant


## 3. Neighborhood clustering

Before clustering, let us use a PCA decomposition to reduce the noise of the signal and improve clustering efficiency

In [100]:
pca = PCA(.95)
grouped_clustering = pca.fit_transform(Lima_grouped.drop('Neighborhood', 1))
grouped_clustering = Lima_grouped.drop('Neighborhood', 1)

The dimensionality reduction has kept only a reduced number of the total dimensions

In [67]:
grouped_clustering.head()

Unnamed: 0,Venue Category_American Restaurant,Venue Category_Arcade,Venue Category_Arepa Restaurant,Venue Category_Art Gallery,Venue Category_Art Museum,Venue Category_Arts & Crafts Store,Venue Category_Athletics & Sports,Venue Category_Auto Workshop,Venue Category_BBQ Joint,Venue Category_Bakery,...,Venue Category_Theme Park Ride / Attraction,Venue Category_Toy / Game Store,Venue Category_Trail,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Veterinarian,Venue Category_Wine Bar,Venue Category_Winery,Venue Category_Wings Joint,Venue Category_Yoga Studio,Zone_URBANO
0,0.013514,0.0,0.0,0.040541,0.0,0.013514,0.0,0.0,0.013514,0.027027,...,0.0,0.0,0.0,0.027027,0.0,0.013514,0.0,0.013514,0.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.028571,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,1.0


In [101]:
grouped_clustering.shape

(19, 158)

With reduced dimensional dataset we carry out the KMeans clustering

In [102]:
# set number of clusters
kclusters = 5

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

# check cluster labels generated for each row in the dataframe
print(kmeans.labels_[0:10])
print(kmeans.labels_.shape)

[0 1 1 0 3 1 1 0 1 0]
(19,)


Let us now create a dataframe that containes the neighborhood, the location and the cluster information, together with the top 10 venues

In [103]:
Lima_grouped["Cluster Labels"] = kmeans.labels_

# add clustering labels
Lima_combined = LimaData.merge(Lima_grouped, left_on = "neighborhood", right_on = "Neighborhood", how = "outer")

# merge Lima_grouped with lima_data to add latitude/longitude for each neighborhood
Lima_combined = Lima_combined.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Lima_combined["Cluster Labels"] = Lima_combined["Cluster Labels"].fillna(5).astype("int")

Lima_combined[['neighborhood','Cluster Labels']]


Unnamed: 0,neighborhood,Cluster Labels
0,LIMA,0
1,BARRANCO,0
2,BREÑA,1
3,CHORRILLOS,1
4,JESUS MARIA,0
5,LA MOLINA,3
6,LA VICTORIA,1
7,LINCE,1
8,LAS PALMERAS,1
9,MAGDALENA DEL MAR,0


In [104]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

kclusters = kclusters + 1

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Lima_combined['latitude'],
                                  Lima_combined['longitude'],
                                  Lima_combined['neighborhood'],
                                  Lima_combined['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Analyze clusters

## Cluster 1

Cluster 1 contains many restaurants and diners, as well as some other services, like art gallery

In [105]:
Lima_combined.loc[Lima_combined['Cluster Labels'] == 0, 
                     "1st Most Common Venue":"10th Most Common Venue"].head()

Unnamed: 0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Zone_URBANO,Venue Category_Peruvian Restaurant,Venue Category_Restaurant,Venue Category_Art Gallery,Venue Category_Museum,Venue Category_Café,Venue Category_Gastropub,Venue Category_Seafood Restaurant,Venue Category_Sandwich Place,Venue Category_Breakfast Spot
1,Zone_URBANO,Venue Category_Bar,Venue Category_Restaurant,Venue Category_Peruvian Restaurant,Venue Category_Art Gallery,Venue Category_Ice Cream Shop,Venue Category_Cocktail Bar,Venue Category_Café,Venue Category_Health Food Store,Venue Category_Performing Arts Venue
4,Zone_URBANO,Venue Category_Chinese Restaurant,Venue Category_Peruvian Restaurant,Venue Category_Coffee Shop,Venue Category_Convenience Store,Venue Category_Department Store,Venue Category_Fast Food Restaurant,Venue Category_Fried Chicken Joint,Venue Category_Italian Restaurant,Venue Category_Shopping Mall
9,Zone_URBANO,Venue Category_Theme Park Ride / Attraction,Venue Category_Concert Hall,Venue Category_Scenic Lookout,Venue Category_Seafood Restaurant,Venue Category_Burger Joint,Venue Category_Beach,Venue Category_Bakery,Venue Category_Theater,Venue Category_Cupcake Shop
10,Zone_URBANO,Venue Category_Chinese Restaurant,Venue Category_Bar,Venue Category_Coffee Shop,Venue Category_Bakery,Venue Category_Fried Chicken Joint,Venue Category_Japanese Restaurant,Venue Category_Park,Venue Category_Restaurant,Venue Category_Burger Joint


## Cluster 2

Cluster 2 seems to be dominated by cofee shop and mall

In [106]:
Lima_combined.loc[Lima_combined['Cluster Labels'] == 1, 
                     "1st Most Common Venue":"10th Most Common Venue"].head()

Unnamed: 0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Zone_URBANO,Venue Category_Restaurant,Venue Category_Juice Bar,Venue Category_Fried Chicken Joint,Venue Category_Fast Food Restaurant,Venue Category_Bus Stop,Venue Category_Seafood Restaurant,Venue Category_Latin American Restaurant,Venue Category_Pizza Place,Venue Category_Dessert Shop
3,Zone_URBANO,Venue Category_Seafood Restaurant,Venue Category_BBQ Joint,Venue Category_Restaurant,Venue Category_Park,Venue Category_Burger Joint,Venue Category_Fried Chicken Joint,Venue Category_Football Stadium,Venue Category_Historic Site,Venue Category_Grocery Store
6,Zone_URBANO,Venue Category_Seafood Restaurant,Venue Category_Nightclub,Venue Category_Restaurant,Venue Category_Soccer Stadium,Venue Category_South American Restaurant,Venue Category_Shopping Mall,Venue Category_Electronics Store,Venue Category_Big Box Store,Venue Category_Latin American Restaurant
7,Zone_URBANO,Venue Category_Peruvian Restaurant,Venue Category_Seafood Restaurant,Venue Category_Bar,Venue Category_Restaurant,Venue Category_Music Venue,Venue Category_Café,Venue Category_Sandwich Place,Venue Category_Coffee Shop,Venue Category_Historic Site
8,Zone_URBANO,Venue Category_Park,Venue Category_Restaurant,Venue Category_Steakhouse,Venue Category_Soccer Field,Venue Category_Cafeteria,Venue Category_Seafood Restaurant,Venue Category_Department Store,Venue Category_Dessert Shop,Venue Category_Diner


## Cluster 3

Cluster 3 is mostly linked to open areas for bar,parks and music

In [107]:
Lima_combined.loc[Lima_combined['Cluster Labels'] == 2, 
                     "1st Most Common Venue":"10th Most Common Venue"].head()

Unnamed: 0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Zone_URBANO,Venue Category_Peruvian Restaurant,Venue Category_Scenic Lookout,Venue Category_Fried Chicken Joint,Venue Category_Soccer Field,Venue Category_Gym,Venue Category_Go Kart Track,Venue Category_Donut Shop,Venue Category_Food,Venue Category_Fast Food Restaurant


## Cluster 4

Cluster 4 is the most homogeneous one (it only contains one neighborhood)

In [108]:
Lima_combined.loc[Lima_combined['Cluster Labels'] == 3, 
                     "1st Most Common Venue":"10th Most Common Venue"].head()

Unnamed: 0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Zone_URBANO,Venue Category_Garden Center,Venue Category_Ice Cream Shop,Venue Category_Gym / Fitness Center,Venue Category_Gym,Venue Category_Yoga Studio,Venue Category_Arts & Crafts Store,Venue Category_Dance Studio,Venue Category_Fried Chicken Joint,Venue Category_French Restaurant


## Cluster 5

Cluster 5 is characterized by shops, gym  and restaurants

In [109]:
Lima_combined.loc[Lima_combined['Cluster Labels'] == 4, 
                     "1st Most Common Venue":"10th Most Common Venue"].head()

Unnamed: 0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Zone_URBANO,Venue Category_Auto Workshop,Venue Category_Soccer Field,Venue Category_Bridge,Venue Category_Gastropub,Venue Category_Convenience Store,Venue Category_Hardware Store,Venue Category_Electronics Store,Venue Category_History Museum,Venue Category_Food & Drink Shop
