Part 1 (Problem):
Where in Rio de Janeiro would it be best to open a restaurant. What is the place that has the most reviews, what location seems to be the most popular. Is there a type of restaurant that is more popular than another?

The problem will be solved using the Foursquare API, looking at the usercount and tipcount. This problem will help everyone that wants to open up a restaurant in Rio de Janeiro and wants to know if it is a viable option where they intend on opening.

Part 2 (Data):
The data that will be used is Foursquare data for Rio de Janeiro city. It will focus on venues of restaurant type. The popularity will be an indicator for how good an area is for a restaurant.

In [3]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [4]:
# Get longitude and latitude for Toronto
address = 'Rio de Janeiro, Rio de Janeiro'

geolocator = Nominatim(user_agent="rio_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Rio de Janeiro is {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Rio de Janeiro is -22.9110137, -43.2093727.


In [5]:
CLIENT_ID = 'XBVDVFI3BB3LBMSPWBVCYUDG0AGUZRQP1MFUXOS0SKACIAGK' # your Foursquare ID
CLIENT_SECRET = 'HD34GAYDCO0GOUM1Z3NFCY4JH4FO0V5F4FMOLO4CEDSZO5AV' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: XBVDVFI3BB3LBMSPWBVCYUDG0AGUZRQP1MFUXOS0SKACIAGK
CLIENT_SECRET:HD34GAYDCO0GOUM1Z3NFCY4JH4FO0V5F4FMOLO4CEDSZO5AV


In [6]:
search_query = 'Restaurant'
radius = 50000
print(search_query + ' .... OK!')

Restaurant .... OK!


In [7]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=XBVDVFI3BB3LBMSPWBVCYUDG0AGUZRQP1MFUXOS0SKACIAGK&client_secret=HD34GAYDCO0GOUM1Z3NFCY4JH4FO0V5F4FMOLO4CEDSZO5AV&ll=-22.9110137,-43.2093727&v=20180605&query=Restaurant&radius=50000&limit=100'

In [8]:
results = requests.get(url).json()

In [9]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

  """


Unnamed: 0,id,name,categories,referralId,hasPerk,location.address,location.lat,location.lng,location.labeledLatLngs,location.distance,location.cc,location.city,location.state,location.country,location.formattedAddress,location.crossStreet,location.postalCode
0,56e04e1d38fa3ae11155b863,Riá Restaurant,"[{'id': '52e81612bcbc57f1066b79f1', 'name': 'B...",v-1592090730,False,"Rua Riachuelo, 242",-22.914733,-43.188829,"[{'label': 'display', 'lat': -22.914733, 'lng'...",2146,BR,Rio de Janeiro,RJ,Brasil,"[Rua Riachuelo, 242, Rio de Janeiro, RJ, Brasil]",,
1,4ece6782cc219860f521dd5d,Restaurante Pitada,"[{'id': '4bf58dd8d48988d16b941735', 'name': 'B...",v-1592090730,False,,-22.909734,-43.191598,"[{'label': 'display', 'lat': -22.9097338088607...",1828,BR,Rio de Janeiro,RJ,Brasil,"[Rio de Janeiro, RJ, Brasil]",,
2,50e0b634e4b0f94bd8a38f47,Restaurant El Duero,"[{'id': '4bf58dd8d48988d1c4941735', 'name': 'R...",v-1592090730,False,Navio Soberano,-22.892851,-43.189795,"[{'label': 'display', 'lat': -22.8928508209919...",2849,BR,Rio de Janeiro,RJ,Brasil,"[Navio Soberano, Rio de Janeiro, RJ, Brasil]",,
3,4e0209e4b0fbdf39a4592821,Restaurante Braga,"[{'id': '4bf58dd8d48988d1c4941735', 'name': 'R...",v-1592090730,False,Silva paulet esquina com costa barros,-22.914891,-43.209306,"[{'label': 'display', 'lat': -22.9148909, 'lng...",431,BR,Fortaleza,Ce,Brasil,[Silva paulet esquina com costa barros (Aldeot...,Aldeota,
4,54fc90bc498e67112301efff,Restaurante Drink's,"[{'id': '52e81612bcbc57f1066b7a00', 'name': 'C...",v-1592090730,False,Travessa Angustura,-22.913848,-43.213167,"[{'label': 'display', 'lat': -22.9138483642793...",500,BR,Rio de Janeiro,RJ,Brasil,"[Travessa Angustura (Rua do Matoso), Rio de Ja...",Rua do Matoso,


In [10]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered.head()

Unnamed: 0,name,categories,address,lat,lng,labeledLatLngs,distance,cc,city,state,country,formattedAddress,crossStreet,postalCode,id
0,Riá Restaurant,Bistro,"Rua Riachuelo, 242",-22.914733,-43.188829,"[{'label': 'display', 'lat': -22.914733, 'lng'...",2146,BR,Rio de Janeiro,RJ,Brasil,"[Rua Riachuelo, 242, Rio de Janeiro, RJ, Brasil]",,,56e04e1d38fa3ae11155b863
1,Restaurante Pitada,Brazilian Restaurant,,-22.909734,-43.191598,"[{'label': 'display', 'lat': -22.9097338088607...",1828,BR,Rio de Janeiro,RJ,Brasil,"[Rio de Janeiro, RJ, Brasil]",,,4ece6782cc219860f521dd5d
2,Restaurant El Duero,Restaurant,Navio Soberano,-22.892851,-43.189795,"[{'label': 'display', 'lat': -22.8928508209919...",2849,BR,Rio de Janeiro,RJ,Brasil,"[Navio Soberano, Rio de Janeiro, RJ, Brasil]",,,50e0b634e4b0f94bd8a38f47
3,Restaurante Braga,Restaurant,Silva paulet esquina com costa barros,-22.914891,-43.209306,"[{'label': 'display', 'lat': -22.9148909, 'lng...",431,BR,Fortaleza,Ce,Brasil,[Silva paulet esquina com costa barros (Aldeot...,Aldeota,,4e0209e4b0fbdf39a4592821
4,Restaurante Drink's,Comfort Food Restaurant,Travessa Angustura,-22.913848,-43.213167,"[{'label': 'display', 'lat': -22.9138483642793...",500,BR,Rio de Janeiro,RJ,Brasil,"[Travessa Angustura (Rua do Matoso), Rio de Ja...",Rua do Matoso,,54fc90bc498e67112301efff


In [12]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=13) # generate map centred around the Conrad Hotel

# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Conrad Hotel',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Italian restaurants as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map