# Finding convenience stores' optimal locations in Montevideo
#### A first approximation.

In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

import requests # library to handle requests
import random # library for random number generation

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

# install the Geocoder
!pip -q install geocoder
import geocoder
# import time
import time

print('Libraries imported.')
print('Folium installed')
print('Libraries imported.')

Collecting package metadata: done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2019.3.9   |       hecc5488_0         146 KB  conda-forge
    certifi-2019.3.9           |           py36_0         149 KB  conda-forge
    conda-4.6.13               |           py36_0         2.1 MB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    geopy-1.19.0               |             py_0          53 KB  conda-forge
    openssl-1.1.1b             |       h14c3975_1         4.0 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         6.4 MB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::g

In [3]:
df_base = pd.read_csv('/resources/base.csv', error_bad_lines=False, encoding = "ISO-8859-2", delimiter=';', decimal=',')

df_base.replace(to_replace='VILLA MUNOZ RETIRO', value='VILLA MUÑOZ RETIRO', inplace=True, regex=True)
df_base.replace(to_replace='BANADOS DE CARRASCO', value='BAÑADOS DE CARRASCO', inplace=True, regex=True)

for col in df_base:
    df_base[col] = pd.to_numeric(pd.Series(df_base[col]), errors='ignore')

#Transform FREQ_HOM to float64 for print circles in map later.
FREQ_HOM = df_base['FREQ_HOM'].astype('float64', copy=False)

df_base.head()


Unnamed: 0,BARRIO,MEDIA_08,MEDIANA_08,3_08,MEDIA_09,MEDIANA_09,3_09,VAR,FREQ_HOM,U_PERS,_U_AREA,U_DENS,SUB_PERS,SUB_AREA,SUB_DENS,RU_PERS,RU_AREA,RU_DENS,TOT_PERS,TOT_AREA,TOT_DENS
0,AGUADA,2.55,1.962,3.35,3.529,3.08,4.311,38.40%,186,76607.0,641.67,117.63,0,0,??,0,0,??,76607.0,641.67,117.63
1,AIRES PUROS,4.621,4.363,6.503,4.731,4.391,6.125,2.40%,186,29091.0,433.0,67.2,0,0,??,0,0,??,29091.0,433.0,67.2
2,ATAHUALPA,4.161,3.793,5.319,6.561,6.258,8.61,57.70%,186,29091.0,433.0,67.2,0,0,??,0,0,??,29091.0,433.0,67.2
3,BAÑADOS DE CARRASCO,3.99,3.529,5.16,3.843,3.449,5.402,-3.70%,186,145190.0,2513.0,57.8,6253,921,6.8,17434,5089,3.4,168877.0,8522.0,19.8
4,BARRIO SUR,3.99,3.529,5.16,3.843,3.449,5.402,-3.70%,186,46361.0,460.0,100.8,0,0,??,0,0,??,46361.0,460.0,100.8


'MEDIA_X' and 'MEDIANA_X' stand for "common property" squared meter price's mean and median respectively, in indexed units. The number indicates the year (2008 and 2009). '3_X' stands for the third quartile of the same variable. This project use the price's mean from 2009 as purchasing power indicator.

'FREQ_HOM' stands for rapine complaint frequency from January 1, 2018 to December 31, 2018. This will be used as a criminality indicator.

'X_PERS', 'X_AREA', 'X_DENS' stand for total population, area in hectares, and population density respectively. Also, this base considers Urban, Sub-urban, Rural and Total area. Total density will be used for display a choropleth map. 


### Extract neighborhood's centric coordinates:

This will be useful for print bubbles that describes the price per squared meter and rapine frequency in each neighborhood.

In [4]:
def get_latlng(arcgis_geocoder):
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Montevideo, Uruguay'.format(arcgis_geocoder))
        lat_lng_coords = g.latlng
    return lat_lng_coords

start = time.time()
postal_codes = df_base['BARRIO']    
coordinates = [get_latlng(postal_code) for postal_code in postal_codes.tolist()]
end = time.time()
print("Time of execution: ", end - start, "seconds")

df_se_coordinates = pd.DataFrame(coordinates, columns = ['Latitud', 'Longitud'])
df_base['Latitud'] = df_se_coordinates['Latitud']
df_base['Longitud'] = df_se_coordinates['Longitud']
df_base.head(5)


Time of execution:  34.835649728775024 seconds


Unnamed: 0,BARRIO,MEDIA_08,MEDIANA_08,3_08,MEDIA_09,MEDIANA_09,3_09,VAR,FREQ_HOM,U_PERS,_U_AREA,U_DENS,SUB_PERS,SUB_AREA,SUB_DENS,RU_PERS,RU_AREA,RU_DENS,TOT_PERS,TOT_AREA,TOT_DENS,Latitud,Longitud
0,AGUADA,2.55,1.962,3.35,3.529,3.08,4.311,38.40%,186,76607.0,641.67,117.63,0,0,??,0,0,??,76607.0,641.67,117.63,-34.89041,-56.18783
1,AIRES PUROS,4.621,4.363,6.503,4.731,4.391,6.125,2.40%,186,29091.0,433.0,67.2,0,0,??,0,0,??,29091.0,433.0,67.2,-34.85136,-56.18841
2,ATAHUALPA,4.161,3.793,5.319,6.561,6.258,8.61,57.70%,186,29091.0,433.0,67.2,0,0,??,0,0,??,29091.0,433.0,67.2,-34.86504,-56.18992
3,BAÑADOS DE CARRASCO,3.99,3.529,5.16,3.843,3.449,5.402,-3.70%,186,145190.0,2513.0,57.8,6253,921,6.8,17434,5089,3.4,168877.0,8522.0,19.8,-34.84062,-56.08985
4,BARRIO SUR,3.99,3.529,5.16,3.843,3.449,5.402,-3.70%,186,46361.0,460.0,100.8,0,0,??,0,0,??,46361.0,460.0,100.8,-34.91145,-56.18963


# First stage: 

### Discard regions according to three socioeconomic criterions: Population density, criminality and purchasing power per neighborhood. 

The main idea consists in select those neighborhoods where population density is higher, and, at the same time, the ratio "common property" squared meter price against rapine frequency is higher. 

In [5]:
#Load JSON file for define Neighborhood's location. 
with open('barrios.json', encoding='utf-8-sig') as fh:
    data = json.load(fh)
#Save the file as a variable    
barrios_json = data

In [6]:
#Create and fill a new column with squared meter price/rapine frequency. 
df_base['Price_Rapine_Ratio'] = ''
df_base['Price_Rapine_Ratio'] = list(map(lambda x,y: x/y, df_base['MEDIA_09'], df_base['FREQ_HOM']))
largest_ratio = df_base.sort_values(by='Price_Rapine_Ratio', ascending=False)
largest_ratio_dens = largest_ratio.sort_values(by=['TOT_DENS'], ascending=False)
largest_ratio_dens = largest_ratio.sort_values(by=['TOT_DENS', 'Price_Rapine_Ratio'], ascending=False)
largest_ratio_dens = largest_ratio_dens.loc[largest_ratio_dens['Price_Rapine_Ratio'] > 0.02]
largest_ratio_dens = largest_ratio_dens.head(5)
largest_ratio_dens

Unnamed: 0,BARRIO,MEDIA_08,MEDIANA_08,3_08,MEDIA_09,MEDIANA_09,3_09,VAR,FREQ_HOM,U_PERS,_U_AREA,U_DENS,SUB_PERS,SUB_AREA,SUB_DENS,RU_PERS,RU_AREA,RU_DENS,TOT_PERS,TOT_AREA,TOT_DENS,Latitud,Longitud,Price_Rapine_Ratio
51,PUNTA CARRETAS,9.455,9.385,12.25,10.763,11.376,13.372,13.80%,186,121885.0,820.0,148.6,0,0,??,0,0,??,121885.0,820.0,148.6,-34.9203,-56.16154,0.057866
48,POCITOS,8.168,8.583,11.008,9.367,9.84,12.35,14.70%,369,121885.0,820.0,148.6,0,0,??,0,0,??,121885.0,820.0,148.6,-34.90853,-56.15087,0.025385
43,PARQUE RODO,7.427,7.877,11.224,7.365,6.621,10.02,-0.80%,186,101216.0,700.0,144.6,0,0,??,0,0,??,101216.0,700.0,144.6,-34.91862,-56.16416,0.039597
42,PALERMO,4.402,4.24,5.748,5.437,4.888,7.729,23.50%,186,101216.0,700.0,144.6,0,0,??,0,0,??,101216.0,700.0,144.6,-34.9115,-56.1786,0.029231
28,LA COMERCIAL,4.665,4.235,6.16,5.495,5.231,7.529,17.80%,186,91730.0,732.5,126.05,0,0,??,0,0,??,91730.0,732.5,126.1,-34.88732,-56.1704,0.029543


According to this data frame, the best neighborhoods based on the three criterion defined previously are Punta Carretas, Pocitos, Parque Rodo, Palermo and La Comercial. 

## This results can be visualized in a map:

#### FOURSQUARE TOOLS

In [7]:
CLIENT_ID = 'QWZ3AKPBXV1HBGW2KDKDSHJJKCMXJHOKQEIRKXGBS3UWQP5Z' # Foursquare ID
CLIENT_SECRET = 'LW45MBFXTQPAPZADKRMVY11VL2QWT42CHZQ30XQXOXJL23M3' # Foursquare Secret
VERSION = '20180604'
LIMIT = 50
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: QWZ3AKPBXV1HBGW2KDKDSHJJKCMXJHOKQEIRKXGBS3UWQP5Z
CLIENT_SECRET:LW45MBFXTQPAPZADKRMVY11VL2QWT42CHZQ30XQXOXJL23M3


In [8]:
address1 = 'Montevideo' #Define a centric address in Montevideo

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address1)
latitude1 = location.latitude
longitude1 = location.longitude
print(latitude1, longitude1)

-34.9059039 -56.1913569


In [9]:
# create a numpy array of length 6 and has linear spacing from the minium total to the maximum total 
threshold_scale = np.linspace(df_base['TOT_DENS'].min(),
                              df_base['TOT_DENS'].max(),
                              6, dtype=int)
threshold_scale = threshold_scale.tolist() # change the numpy array to a list
threshold_scale[-1] = threshold_scale[-1] + 1 

In [10]:
montevideo1_map = folium.Map(location=[latitude1, longitude1], zoom_start=11)

# add choropleth map with population density    
montevideo1_map.choropleth(
    geo_data=barrios_json,
    data=df_base,
    columns=['BARRIO', 'TOT_DENS'],
    key_on='feature.properties.nombre',
    threshold_scale=threshold_scale,
    fill_color='YlOrRd', 
    fill_opacity=0.6, 
    line_opacity=0.2,
    legend_name='Population density per squared hectare'
)
folium.LayerControl().add_to(montevideo1_map)

#Add crimson circles per neighborhood. It's radius correspond to the "common property" squared meter price (2009) and 
# is used as a purchasing power indicator.
for i in range(0,len(df_base)):
      folium.Circle(
      location=[df_base.iloc[i]['Latitud'], df_base.iloc[i]['Longitud']],
      popup=df_base.iloc[i]['BARRIO'],
      radius=df_base.iloc[i]['MEDIA_09']*60,
      color='crimson',
      fill=True,
      fill_color='crimson'
   ).add_to(montevideo1_map)

#Add green circles per neighborhood. It's radius correspond to the rapine frequency in 2018. This is used as a criminality
# index indicator. 
for i in range(0,len(df_base)):
      folium.Circle(
      location=[df_base.iloc[i]['Latitud'], df_base.iloc[i]['Longitud']],
      popup=df_base.iloc[i]['BARRIO'],
      radius=FREQ_HOM.iloc[i],
      color='Green',
      fill=True,
      fill_color='Green'
   ).add_to(montevideo1_map)       

        

montevideo1_map

As we can see, in the most populated region (purple area), the five neighborhoods defined previously have the biggest red circles in relation to green circles. **From here I'll be working just with this area**.

In [11]:
#Re-load modified JSON file that contains only the area of interest. 
with open('barrios_final.json', encoding='utf-8-sig') as fg:
    data2 = json.load(fg)
#Save the file as a variable    
barrios_final = data2

# Second stage:

### Explore neighborhoods in the defined area.

The main idea here is get the top 5 most common venues in the pre-defined five neighborhoods. 

In [12]:
#Function that get the top 100 venues that are in the defined area within a radius of 1000 meters
def getNearbyVenues(Nombre, Latitud, Longitud, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(Nombre, Latitud, Longitud):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Barrio', 
                  'Latitud', 
                  'Longitud', 
                  'Venue', 
                  'Venue Latitud', 
                  'Venue Longitud', 
                  'Venue Categoria']
    
    return(nearby_venues)

In [13]:
venues = getNearbyVenues(Nombre=largest_ratio_dens['BARRIO'],
                                   Latitud=largest_ratio_dens['Latitud'],
                                   Longitud=largest_ratio_dens['Longitud']
                                  )

PUNTA CARRETAS
POCITOS
PARQUE RODO
PALERMO
LA COMERCIAL


In [14]:
print(venues.shape)
venues.head()

(235, 7)


Unnamed: 0,Barrio,Latitud,Longitud,Venue,Venue Latitud,Venue Longitud,Venue Categoria
0,PUNTA CARRETAS,-34.9203,-56.16154,Francis,-34.919479,-56.160065,Mediterranean Restaurant
1,PUNTA CARRETAS,-34.9203,-56.16154,La Pulpería,-34.921036,-56.160193,BBQ Joint
2,PUNTA CARRETAS,-34.9203,-56.16154,Pellegrin Boutique Gourmet,-34.918407,-56.159667,Deli / Bodega
3,PUNTA CARRETAS,-34.9203,-56.16154,Club de Golf del Uruguay,-34.922122,-56.163377,Golf Course
4,PUNTA CARRETAS,-34.9203,-56.16154,Pizza Don Ciccio,-34.918344,-56.161532,Pizza Place


In [15]:
# Count how many venues were returned for each neighborhood
venues.groupby('Barrio').count()

Unnamed: 0_level_0,Latitud,Longitud,Venue,Venue Latitud,Venue Longitud,Venue Categoria
Barrio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LA COMERCIAL,35,35,35,35,35,35
PALERMO,50,50,50,50,50,50
PARQUE RODO,50,50,50,50,50,50
POCITOS,50,50,50,50,50,50
PUNTA CARRETAS,50,50,50,50,50,50


In [16]:
print('There are {} uniques categories.'.format(len(venues['Venue Categoria'].unique())))

There are 79 uniques categories.


In [17]:
# one hot encoding
venues_onehot = pd.get_dummies(venues[['Venue Categoria']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
venues_onehot['Barrio'] = venues['Barrio'] 

# move neighborhood column to the first column
fixed_columns = [venues_onehot.columns[-1]] + list(venues_onehot.columns[:-1])
venues_onehot = venues_onehot[fixed_columns]

venues_onehot.head()

Unnamed: 0,Barrio,Asian Restaurant,Auto Garage,BBQ Joint,Bakery,Bar,Beach,Bed & Breakfast,Beer Bar,Beer Garden,Bistro,Boat or Ferry,Bookstore,Boutique,Brewery,Burger Joint,Café,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Convenience Store,Creperie,Deli / Bodega,Department Store,Dessert Shop,Diner,Eastern European Restaurant,Electronics Store,Falafel Restaurant,Fish & Chips Shop,Flea Market,Food Court,French Restaurant,Fruit & Vegetable Store,Furniture / Home Store,Gastropub,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Hostel,Hot Dog Joint,Hotel,Ice Cream Shop,Italian Restaurant,Japanese Restaurant,Latin American Restaurant,Mediterranean Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Museum,Music Venue,Other Great Outdoors,Paper / Office Supplies Store,Park,Pharmacy,Pizza Place,Plaza,Pub,Public Art,Restaurant,Sandwich Place,Scenic Lookout,Shop & Service,Shopping Mall,Skate Park,Soccer Field,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Sports Club,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Theater,Vegetarian / Vegan Restaurant,Waterfront
0,PUNTA CARRETAS,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,PUNTA CARRETAS,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,PUNTA CARRETAS,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,PUNTA CARRETAS,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,PUNTA CARRETAS,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category:

In [18]:
venues_grouped = venues_onehot.groupby('Barrio').mean().reset_index()
venues_grouped

Unnamed: 0,Barrio,Asian Restaurant,Auto Garage,BBQ Joint,Bakery,Bar,Beach,Bed & Breakfast,Beer Bar,Beer Garden,Bistro,Boat or Ferry,Bookstore,Boutique,Brewery,Burger Joint,Café,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Convenience Store,Creperie,Deli / Bodega,Department Store,Dessert Shop,Diner,Eastern European Restaurant,Electronics Store,Falafel Restaurant,Fish & Chips Shop,Flea Market,Food Court,French Restaurant,Fruit & Vegetable Store,Furniture / Home Store,Gastropub,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Hostel,Hot Dog Joint,Hotel,Ice Cream Shop,Italian Restaurant,Japanese Restaurant,Latin American Restaurant,Mediterranean Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Museum,Music Venue,Other Great Outdoors,Paper / Office Supplies Store,Park,Pharmacy,Pizza Place,Plaza,Pub,Public Art,Restaurant,Sandwich Place,Scenic Lookout,Shop & Service,Shopping Mall,Skate Park,Soccer Field,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Sports Club,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Theater,Vegetarian / Vegan Restaurant,Waterfront
0,LA COMERCIAL,0.0,0.028571,0.085714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.028571,0.057143,0.028571,0.028571,0.0,0.028571,0.0,0.028571,0.028571,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.028571,0.0,0.028571,0.0,0.0,0.028571,0.0,0.0,0.0,0.028571,0.028571,0.028571,0.028571,0.0,0.114286,0.057143,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0
1,PALERMO,0.0,0.0,0.0,0.06,0.14,0.02,0.0,0.02,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.02,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.04,0.02,0.02,0.02,0.02,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.02,0.0,0.0,0.02,0.0,0.06,0.0,0.1,0.0,0.04,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0
2,PARQUE RODO,0.0,0.0,0.04,0.04,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.06,0.0,0.0,0.02,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.02,0.0,0.0,0.12,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.02,0.02,0.02,0.0,0.06,0.0,0.06,0.0,0.0,0.02,0.06,0.0,0.02,0.02,0.02,0.0,0.0,0.02,0.02,0.0,0.0,0.06,0.0,0.04,0.0,0.0,0.0,0.0
3,POCITOS,0.02,0.0,0.04,0.0,0.04,0.02,0.02,0.0,0.0,0.02,0.0,0.02,0.0,0.04,0.02,0.02,0.0,0.02,0.0,0.06,0.02,0.02,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.06,0.0,0.0,0.02,0.04,0.06,0.02,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.02,0.0,0.08,0.02,0.02,0.0,0.02,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0
4,PUNTA CARRETAS,0.0,0.0,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.0,0.02,0.06,0.0,0.0,0.02,0.06,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.02,0.0,0.04,0.04,0.0,0.0,0.12,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.02,0.0,0.0,0.0,0.06,0.0,0.04,0.0,0.0,0.02,0.06,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.06,0.0,0.04,0.02,0.0,0.0,0.02


Now we print each neighborhood along with the top 5 most common venues

In [19]:
num_top_venues = 5

for barrio in venues_grouped['Barrio']:
    print("----"+barrio+"----")
    temp = venues_grouped[venues_grouped['Barrio'] == barrio].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----LA COMERCIAL----
               venue  freq
0         Restaurant  0.11
1          BBQ Joint  0.09
2     Ice Cream Shop  0.06
3  Electronics Store  0.06
4     Sandwich Place  0.06


----PALERMO----
        venue  freq
0         Bar  0.14
1  Restaurant  0.10
2         Pub  0.06
3      Bakery  0.06
4         Gym  0.04


----PARQUE RODO----
         venue  freq
0        Hotel  0.12
1   Steakhouse  0.06
2  Coffee Shop  0.06
3   Restaurant  0.06
4  Pizza Place  0.06


----POCITOS----
                  venue  freq
0            Restaurant  0.08
1  Gym / Fitness Center  0.06
2           Coffee Shop  0.06
3    Italian Restaurant  0.06
4           Pizza Place  0.06


----PUNTA CARRETAS----
           venue  freq
0          Hotel  0.12
1  Deli / Bodega  0.06
2    Coffee Shop  0.06
3     Steakhouse  0.06
4           Park  0.06




Then, we proceed to store this in a dataframe, extending to top 10 most common venues:

In [20]:
#Function to sort the venues in descending order:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#New dataframe that displays the top 10 venues for each neighborhood:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Barrio']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
venues_sorted = pd.DataFrame(columns=columns)
venues_sorted['Barrio'] = venues_grouped['Barrio']

for ind in np.arange(venues_grouped.shape[0]):
    venues_sorted.iloc[ind, 1:] = return_most_common_venues(venues_grouped.iloc[ind, :], num_top_venues)

venues_sorted

Unnamed: 0,Barrio,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,LA COMERCIAL,Restaurant,BBQ Joint,Sandwich Place,Ice Cream Shop,Electronics Store,Coffee Shop,Middle Eastern Restaurant,Music Venue,Chinese Restaurant,Café
1,PALERMO,Bar,Restaurant,Pub,Bakery,Gym,Coffee Shop,Scenic Lookout,Italian Restaurant,Other Great Outdoors,Hotel
2,PARQUE RODO,Hotel,Restaurant,Steakhouse,Park,Pizza Place,Coffee Shop,Bakery,Sushi Restaurant,Café,BBQ Joint
3,POCITOS,Restaurant,Italian Restaurant,Pizza Place,Gym / Fitness Center,Coffee Shop,Dessert Shop,BBQ Joint,Bar,Ice Cream Shop,Brewery
4,PUNTA CARRETAS,Hotel,Deli / Bodega,Coffee Shop,Restaurant,Steakhouse,Park,Gym / Fitness Center,Gastropub,Gym,Café


## Cluster Neighborhoods:

### Using k-means clustering.

Although We are working with just five neighborhoods, it could be useful to apply k-means clustering for identify some differences between them. I decided to set 3 clusters.

In [21]:
# set number of clusters
kclusters = 3

venues_grouped_clustering = venues_grouped.drop('Barrio', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(venues_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 1, 0, 1, 0], dtype=int32)

In [22]:
venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

venues_merged = largest_ratio_dens

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
venues_merged = venues_merged.join(venues_sorted.set_index('Barrio'), on='BARRIO')

venues_merged.head()

Unnamed: 0,BARRIO,MEDIA_08,MEDIANA_08,3_08,MEDIA_09,MEDIANA_09,3_09,VAR,FREQ_HOM,U_PERS,_U_AREA,U_DENS,SUB_PERS,SUB_AREA,SUB_DENS,RU_PERS,RU_AREA,RU_DENS,TOT_PERS,TOT_AREA,TOT_DENS,Latitud,Longitud,Price_Rapine_Ratio,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
51,PUNTA CARRETAS,9.455,9.385,12.25,10.763,11.376,13.372,13.80%,186,121885.0,820.0,148.6,0,0,??,0,0,??,121885.0,820.0,148.6,-34.9203,-56.16154,0.057866,0,Hotel,Deli / Bodega,Coffee Shop,Restaurant,Steakhouse,Park,Gym / Fitness Center,Gastropub,Gym,Café
48,POCITOS,8.168,8.583,11.008,9.367,9.84,12.35,14.70%,369,121885.0,820.0,148.6,0,0,??,0,0,??,121885.0,820.0,148.6,-34.90853,-56.15087,0.025385,1,Restaurant,Italian Restaurant,Pizza Place,Gym / Fitness Center,Coffee Shop,Dessert Shop,BBQ Joint,Bar,Ice Cream Shop,Brewery
43,PARQUE RODO,7.427,7.877,11.224,7.365,6.621,10.02,-0.80%,186,101216.0,700.0,144.6,0,0,??,0,0,??,101216.0,700.0,144.6,-34.91862,-56.16416,0.039597,0,Hotel,Restaurant,Steakhouse,Park,Pizza Place,Coffee Shop,Bakery,Sushi Restaurant,Café,BBQ Joint
42,PALERMO,4.402,4.24,5.748,5.437,4.888,7.729,23.50%,186,101216.0,700.0,144.6,0,0,??,0,0,??,101216.0,700.0,144.6,-34.9115,-56.1786,0.029231,1,Bar,Restaurant,Pub,Bakery,Gym,Coffee Shop,Scenic Lookout,Italian Restaurant,Other Great Outdoors,Hotel
28,LA COMERCIAL,4.665,4.235,6.16,5.495,5.231,7.529,17.80%,186,91730.0,732.5,126.05,0,0,??,0,0,??,91730.0,732.5,126.1,-34.88732,-56.1704,0.029543,2,Restaurant,BBQ Joint,Sandwich Place,Ice Cream Shop,Electronics Store,Coffee Shop,Middle Eastern Restaurant,Music Venue,Chinese Restaurant,Café


In [23]:
#Find Punta Carretas coordinates
address_pc = 'Punta Carretas, Montevideo' 

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address_pc)
latitude_pc= location.latitude
longitude_pc = location.longitude
print(latitude_pc, longitude_pc)

-34.9202023 -56.1600649


In [24]:
map_clusters = folium.Map(location=[latitude_pc, longitude_pc], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(venues_merged['Latitud'], venues_merged['Longitud'], venues_merged['BARRIO'], venues_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine clusters:

In [25]:
venues_merged.loc[venues_merged['Cluster Labels'] == 2, venues_merged.columns[[1] + list(range(5, venues_merged.shape[1]))]]

Unnamed: 0,MEDIA_08,MEDIANA_09,3_09,VAR,FREQ_HOM,U_PERS,_U_AREA,U_DENS,SUB_PERS,SUB_AREA,SUB_DENS,RU_PERS,RU_AREA,RU_DENS,TOT_PERS,TOT_AREA,TOT_DENS,Latitud,Longitud,Price_Rapine_Ratio,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
28,4.665,5.231,7.529,17.80%,186,91730.0,732.5,126.05,0,0,??,0,0,??,91730.0,732.5,126.1,-34.88732,-56.1704,0.029543,2,Restaurant,BBQ Joint,Sandwich Place,Ice Cream Shop,Electronics Store,Coffee Shop,Middle Eastern Restaurant,Music Venue,Chinese Restaurant,Café


In [26]:
venues_merged.loc[venues_merged['Cluster Labels'] == 1, venues_merged.columns[[1] + list(range(5, venues_merged.shape[1]))]]

Unnamed: 0,MEDIA_08,MEDIANA_09,3_09,VAR,FREQ_HOM,U_PERS,_U_AREA,U_DENS,SUB_PERS,SUB_AREA,SUB_DENS,RU_PERS,RU_AREA,RU_DENS,TOT_PERS,TOT_AREA,TOT_DENS,Latitud,Longitud,Price_Rapine_Ratio,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
48,8.168,9.84,12.35,14.70%,369,121885.0,820.0,148.6,0,0,??,0,0,??,121885.0,820.0,148.6,-34.90853,-56.15087,0.025385,1,Restaurant,Italian Restaurant,Pizza Place,Gym / Fitness Center,Coffee Shop,Dessert Shop,BBQ Joint,Bar,Ice Cream Shop,Brewery
42,4.402,4.888,7.729,23.50%,186,101216.0,700.0,144.6,0,0,??,0,0,??,101216.0,700.0,144.6,-34.9115,-56.1786,0.029231,1,Bar,Restaurant,Pub,Bakery,Gym,Coffee Shop,Scenic Lookout,Italian Restaurant,Other Great Outdoors,Hotel


In [27]:
venues_merged.loc[venues_merged['Cluster Labels'] == 0, venues_merged.columns[[1] + list(range(5, venues_merged.shape[1]))]]

Unnamed: 0,MEDIA_08,MEDIANA_09,3_09,VAR,FREQ_HOM,U_PERS,_U_AREA,U_DENS,SUB_PERS,SUB_AREA,SUB_DENS,RU_PERS,RU_AREA,RU_DENS,TOT_PERS,TOT_AREA,TOT_DENS,Latitud,Longitud,Price_Rapine_Ratio,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
51,9.455,11.376,13.372,13.80%,186,121885.0,820.0,148.6,0,0,??,0,0,??,121885.0,820.0,148.6,-34.9203,-56.16154,0.057866,0,Hotel,Deli / Bodega,Coffee Shop,Restaurant,Steakhouse,Park,Gym / Fitness Center,Gastropub,Gym,Café
43,7.427,6.621,10.02,-0.80%,186,101216.0,700.0,144.6,0,0,??,0,0,??,101216.0,700.0,144.6,-34.91862,-56.16416,0.039597,0,Hotel,Restaurant,Steakhouse,Park,Pizza Place,Coffee Shop,Bakery,Sushi Restaurant,Café,BBQ Joint


As We can see in the map, k-means clustering divided three groups: La Comercial, Palermo, and Parque Rodo, Pocitos and Punta Carretas as other group. If We scroll some lines above, in the first stage map, We can find the meaning of this clustering: Parque Rodo, Pocitos and Punta Carretas are the three neighborhoods with more purchasing power (according to our indicator). On the other hand, Palermo and La Comercial does not show any differences at simple view, but its purchasing power is much less than the first cluster. 

Given this information and the dataframes above, We can associate Cluster 1 with the most suitable for establish a convenience store. Although this conclusion is debatable, a Hotel is related with a great flow of people with medium-high / high income level, which fits the public target of this stores. 

The third stage of this project will try to find a more specific area inside the region that comprises Cluster 1, namely Parque Rodo, Pocitos and Punta Carretas.

# Third stage:

#### Define limits:

In [28]:
#Re-load modified JSON file that contains only Punta Carretas, Pocitos and Parque Rodo. 
with open('cluster1.json', encoding='utf-8-sig') as fi:
    data3 = json.load(fi)
#Save the file as a variable    
cluster1 = data3

#### Find Pocitos and Parque Rodo's coordinates (Punta Carretas' ones have been already defined)

In [29]:
#Find Pocitos' coordinates
address_po = 'Pocitos, Montevideo' 

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address_po)
latitude_po= location.latitude
longitude_po = location.longitude

#Find Parque Rodo's coordinates
address_pr = 'Parque Rodo, Montevideo' 

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address_pr)
latitude_pr= location.latitude
longitude_pr = location.longitude

print(latitude_po, longitude_po)
print(latitude_pr, longitude_pr)

-34.90744 -56.1461843
-34.9100822 -56.1697776


#### Next step is use the Foursquare API to find Hotels in the three neighborhoods:

In [30]:
search_query = 'Hotel'
radius = 2200

url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude_pc, longitude_pc,  VERSION, search_query, radius, LIMIT)

results = requests.get(url).json()

venues = results['response']['venues']
dataframe = json_normalize(venues)

# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
hotelpc_dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
hotelpc_dataframe_filtered['categories'] = hotelpc_dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
hotelpc_dataframe_filtered.columns = [column.split('.')[-1] for column in hotelpc_dataframe_filtered.columns]

print('Hotels:')
print(hotelpc_dataframe_filtered.shape)

Hotels:
(36, 16)


#### Let's display them in a map:

In [31]:
hotels_map = folium.Map(location=[latitude_pc, longitude_pc], zoom_start=13)

# add Hotels as orange circle markers
for lat, lng, label in zip(hotelpc_dataframe_filtered.lat, hotelpc_dataframe_filtered.lng, hotelpc_dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=4,
        color='orange',
        popup=label,
        fill = True,
        fill_color='orange',
        fill_opacity=4
    ).add_to(hotels_map)

hotels_map.choropleth(
    geo_data=cluster1,
    data=df_base,
    columns=['BARRIO', 'TOT_DENS'],
    key_on='feature.properties.nombre',
    threshold_scale=threshold_scale,
    fill_color='PuBuGn', 
    fill_opacity=0.3, 
    line_opacity=0.2)
folium.LayerControl().add_to(hotels_map)

hotels_map

The next step consists in define the last criterion for establish the convenience store. This will be based on search  those convenience stores already installed in the same zone where we displayed the hotels, and find some area where the distance between a hotel and the nearest convenience store exceed three blocks. By the way, this is the convenience store's radius of consumption according to GPA(2019).

Thus, We proceed to define the three most competitive convenience stores in Montevideo: "Kinko", "Frog" and "Devoto Express", and display them in the area of interest.

In [32]:
# Kinko stores:
search_query = 'Kinko'
radius = 2500

url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude_pc, longitude_pc,  VERSION, search_query, radius, LIMIT)

results = requests.get(url).json()

venues = results['response']['venues']
dataframe = json_normalize(venues)

# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
kinko_dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
kinko_dataframe_filtered['categories'] = kinko_dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
kinko_dataframe_filtered.columns = [column.split('.')[-1] for column in kinko_dataframe_filtered.columns]

# Then, repeat the process for Frog and Devoto Express stores:
search_query = 'Devoto_Express'
radius = 2500

url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude_pc, longitude_pc,  VERSION, search_query, radius, LIMIT)

results = requests.get(url).json()

venues = results['response']['venues']
dataframe = json_normalize(venues)

# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
devoto_dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
devoto_dataframe_filtered['categories'] = devoto_dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
devoto_dataframe_filtered.columns = [column.split('.')[-1] for column in devoto_dataframe_filtered.columns]

search_query = 'Frog'
radius = 2500

url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude_pc, longitude_pc,  VERSION, search_query, radius, LIMIT)

results = requests.get(url).json()

venues = results['response']['venues']
dataframe = json_normalize(venues)

# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
frog_dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
frog_dataframe_filtered['categories'] = frog_dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
frog_dataframe_filtered.columns = [column.split('.')[-1] for column in frog_dataframe_filtered.columns]

print('Kinko:')
print(kinko_dataframe_filtered.shape)
print('Devoto:')
print(devoto_dataframe_filtered.shape)
print('Frog:')
print(frog_dataframe_filtered.shape)
print('First number indicates how many stores had been found')

Kinko:
(12, 15)
Devoto:
(18, 15)
Frog:
(8, 16)
First number indicates how many stores had been found


In [33]:
final_map = folium.Map(location=[latitude_pc, longitude_pc], zoom_start=13)

# add Hotels as orange circle markers
for lat, lng, label in zip(hotelpc_dataframe_filtered.lat, hotelpc_dataframe_filtered.lng, hotelpc_dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=4,
        color='orange',
        popup=label,
        fill = True,
        fill_color='orange',
        fill_opacity=4
    ).add_to(final_map)

# add Kinko stores as brown circle markers
for lat, lng, label in zip(kinko_dataframe_filtered.lat, kinko_dataframe_filtered.lng, kinko_dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=4,
        color='brown',
        popup=label,
        fill = True,
        fill_color='brown',
        fill_opacity=4
    ).add_to(final_map)

# add Devoto stores as brown circle markers
for lat, lng, label in zip(devoto_dataframe_filtered.lat, devoto_dataframe_filtered.lng, devoto_dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=4,
        color='brown',
        popup=label,
        fill = True,
        fill_color='brown',
        fill_opacity=4
    ).add_to(final_map)
    
# add Frog stores as brown circle markers
for lat, lng, label in zip(frog_dataframe_filtered.lat, frog_dataframe_filtered.lng, frog_dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=4,
        color='brown',
        popup=label,
        fill = True,
        fill_color='brown',
        fill_opacity=4
    ).add_to(final_map)
    
final_map.choropleth(
    geo_data=cluster1,
    data=df_base,
    columns=['BARRIO', 'TOT_DENS'],
    key_on='feature.properties.nombre',
    threshold_scale=threshold_scale,
    fill_color='PuBuGn', 
    fill_opacity=0.3, 
    line_opacity=0.2)
folium.LayerControl().add_to(final_map)

final_map

**Map references:**


Yellow circles: Hotels

Brown circles: Installed convenience stores

**As conclusion, the best area for establish a convenience store is the center or south-center area of Punta Carretas, according to all the standards defined previously and considering Foursquare's accuracy locating venues.**