In [1]:
import numpy as np # library to handle data in a vectorized manner
import geocoder # import geocoder
from geopy.geocoders import Nominatim

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

#!conda install -c anaconda beautifulsoup4

print('Libraries imported.')

Libraries imported.


#### Import Lisbon neighborhood data and copy to dataframe

In [2]:


from bs4 import BeautifulSoup
import requests
r  = requests.get("https://www.visitarportugal.pt/d-lisboa/c-lisboa")
data = r.text
soup = BeautifulSoup(data)

uls = soup.find("div", {'class':'fmcol'})

rows=list()
for row in uls.findAll("li"):
    rows.append(row.text)

lisbon_df = pd.DataFrame([],columns=['Neighborhood','SearchName','Coordinates','Latitude','Longitude'])
lisbon_df['Neighborhood'] = rows
lisbon_df['SearchName'] = lisbon_df.Neighborhood + ', Lisbon'

#### Neighborhoods of Alcantara and Benfica are removed as no more bars licenses are granted there

In [3]:
lisbon_df = lisbon_df[(lisbon_df.Neighborhood != 'Alcântara') & (lisbon_df.Neighborhood != 'Benfica')]
lisbon_df.reset_index(drop=True, inplace=True)
lisbon_df

Unnamed: 0,Neighborhood,SearchName,Coordinates,Latitude,Longitude
0,Ajuda,"Ajuda, Lisbon",,,
1,Alvalade,"Alvalade, Lisbon",,,
2,Areeiro,"Areeiro, Lisbon",,,
3,Arroios,"Arroios, Lisbon",,,
4,Avenidas Novas,"Avenidas Novas, Lisbon",,,
5,Beato,"Beato, Lisbon",,,
6,Belém,"Belém, Lisbon",,,
7,Campo de Ourique,"Campo de Ourique, Lisbon",,,
8,Campolide,"Campolide, Lisbon",,,
9,Carnide,"Carnide, Lisbon",,,


#### Get geolocation

In [4]:
geolocator = Nominatim(timeout=None)
lisbon_df['Coordinates']  = lisbon_df['SearchName'].apply(geolocator.geocode)
lisbon_df['Latitude'] = lisbon_df['Coordinates'].apply(lambda x: (x.latitude))
lisbon_df['Longitude'] = lisbon_df['Coordinates'].apply(lambda x: (x.longitude))

print(lisbon_df[['Neighborhood','Latitude','Longitude']])
    

  """Entry point for launching an IPython kernel.


               Neighborhood   Latitude  Longitude
0                     Ajuda  38.712186  -9.201217
1                  Alvalade  38.753341  -9.149140
2                   Areeiro  38.741910  -9.132269
3                   Arroios  38.726185  -9.136683
4            Avenidas Novas  38.736436  -9.149825
5                     Beato  38.732622  -9.110240
6                     Belém  38.697769  -9.209432
7          Campo de Ourique  38.718213  -9.165223
8                 Campolide  38.731827  -9.167911
9                   Carnide  38.759893  -9.189619
10                  Estrela  38.707502  -9.163198
11                   Lumiar  38.771330  -9.163768
12                  Marvila  38.748259  -9.112754
13             Misericórdia  38.710684  -9.148209
14        Parque das Naçoes  38.775031  -9.097135
15          Penha de França  38.728392  -9.123655
16        Santa Maria Maior  38.712440  -9.132814
17            Santo António  38.721491  -9.148968
18  São Domingos de Benfica  38.746210  -9.176222


#### Create map with Lisbon neighborhoods

In [5]:
address = 'Lisbon, Portugal'
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Lisbon are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Lisbon are 38.7077507, -9.1365919.


In [6]:
# create map of Lisbon using latitude and longitude values
map_lisbon = folium.Map(location=[latitude, longitude], zoom_start=12)

lisbon_df[["Latitude", "Longitude"]] = lisbon_df[["Latitude", "Longitude"]].apply(pd.to_numeric)

# add markers to map
for lat, lng, label in zip(lisbon_df['Latitude'], lisbon_df['Longitude'], lisbon_df['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_lisbon)  
    
map_lisbon

#### Call Foursquare to return venues

In [7]:
CLIENT_ID = '3Y43KRY30SIR454E01CPDGENENG5W24N20ZGBW3CKZTYG1A3' # your Foursquare ID
CLIENT_SECRET = 'QFFXGQLHJY1RXPDJEGRGLSHXI5Y1KQNPX3I22NO5S2TEJPTC' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 300 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

In [8]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude',
                  'Venue Category']
    
    return(nearby_venues)

In [9]:
lisbon_venues = getNearbyVenues(names=lisbon_df['SearchName'],
                                   latitudes=lisbon_df['Latitude'],
                                   longitudes=lisbon_df['Longitude']
                                  )

Ajuda, Lisbon
Alvalade, Lisbon
Areeiro, Lisbon
Arroios, Lisbon
Avenidas Novas, Lisbon
Beato, Lisbon
Belém, Lisbon
Campo de Ourique, Lisbon
Campolide, Lisbon
Carnide, Lisbon
Estrela, Lisbon
Lumiar, Lisbon
Marvila, Lisbon
Misericórdia, Lisbon
Parque das Naçoes, Lisbon
Penha de França, Lisbon
Santa Maria Maior, Lisbon
Santo António, Lisbon
São Domingos de Benfica, Lisbon
São Vicente, Lisbon


In [10]:
lisbon_venues['Venue Category'].sort_values().unique()

array(['Accessories Store', 'African Restaurant', 'American Restaurant',
       'Amphitheater', 'Argentinian Restaurant', 'Art Gallery',
       'Art Museum', 'Arts & Crafts Store', 'Asian Restaurant',
       'BBQ Joint', 'Bagel Shop', 'Bakery', 'Bar', 'Basketball Stadium',
       'Bed & Breakfast', 'Bistro', 'Bookstore', 'Boutique',
       'Breakfast Spot', 'Brewery', 'Burger Joint', 'Bus Station', 'Café',
       'Casino', 'Castle', 'Cheese Shop', 'Chinese Restaurant', 'Church',
       'Clothing Store', 'Cocktail Bar', 'Coffee Shop',
       'Comfort Food Restaurant', 'Convenience Store', 'Cosmetics Shop',
       'Creperie', 'Cultural Center', 'Dance Studio', 'Deli / Bodega',
       'Department Store', 'Dessert Shop', 'Dim Sum Restaurant', 'Diner',
       'Dive Bar', 'Electronics Store', 'Empanada Restaurant',
       'Event Space', 'Exhibit', 'Farmers Market', 'Fast Food Restaurant',
       'Fish & Chips Shop', 'Flea Market', 'Flower Shop', 'Food',
       'Food & Drink Shop', 'Food Cour

In [11]:
lisbon_venues = lisbon_venues[lisbon_venues['Venue Category']
                      .str.contains('Restaurant|Diner|Steakhouse|Burger|Pizza')]

#### Clean venues data and order by most common venues

In [12]:
# one hot encoding
lisbon_onehot = pd.get_dummies(lisbon_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
lisbon_onehot['Neighborhood'] = lisbon_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [lisbon_onehot.columns[-1]] + list(lisbon_onehot.columns[:-1])
lisbon_onehot = lisbon_onehot[fixed_columns]

lisbon_grouped = lisbon_onehot.groupby('Neighborhood').mean().reset_index()

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 3

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = lisbon_grouped['Neighborhood']

for ind in np.arange(lisbon_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(lisbon_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted
lisbon_tomerge = lisbon_df
del lisbon_tomerge['Neighborhood']

#### Cluster Lisbon neighborhoods based on most common restaurantes and bars  and display map

In [13]:
# set number of clusters
kclusters = 3

lisbon_grouped_clustering = lisbon_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(lisbon_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:3] 

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

lisbon_tomerge.rename(columns={"SearchName": "Neighborhood"}, inplace=True)

# merge lisbon_grouped with lisbon_df to add latitude/longitude for each neighborhood
lisbon_merged = lisbon_tomerge.join(neighborhoods_venues_sorted
                                     .set_index('Neighborhood'), on='Neighborhood')

In [14]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(lisbon_merged['Latitude'], lisbon_merged['Longitude'], lisbon_merged['Neighborhood'], lisbon_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### 3 clusters:<br><br>- Cluster 0 - Mostly meatless restaurants;<br>- Cluster 1 - Portuguese/Classic restaurants;<br>- Cluster 2 - Informal dining.

In [15]:
lisbon_merged.loc[lisbon_merged['Cluster Labels'] == 0, lisbon_merged.columns[[1] + list(range(5, lisbon_merged.shape[1]))]]

Unnamed: 0,Coordinates,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,"(Ajuda, LSB, Lisboa, Grande Lisboa, Área Metro...",Seafood Restaurant,Restaurant,Vegetarian / Vegan Restaurant
5,"(Beato, LSB, Lisboa, Grande Lisboa, Área Metro...",Restaurant,Vegetarian / Vegan Restaurant,Empanada Restaurant


In [16]:
lisbon_merged.loc[lisbon_merged['Cluster Labels'] == 1, lisbon_merged.columns[[1] + list(range(5, lisbon_merged.shape[1]))]]

Unnamed: 0,Coordinates,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
2,"(Areeiro, LSB, Lisboa, Grande Lisboa, Área Met...",Portuguese Restaurant,Restaurant,Argentinian Restaurant
3,"(Arroios, LSB, Lisboa, Grande Lisboa, Área Met...",Portuguese Restaurant,Indian Restaurant,Restaurant
6,"(Belém, LSB, Lisboa, Grande Lisboa, Área Metro...",Portuguese Restaurant,Restaurant,Seafood Restaurant
7,"(Campo de Ourique, LSB, Lisboa, Grande Lisboa,...",Portuguese Restaurant,Italian Restaurant,Seafood Restaurant
8,"(Campolide, LSB, Lisboa, Grande Lisboa, Área M...",Portuguese Restaurant,Japanese Restaurant,Vegetarian / Vegan Restaurant
9,"(Carnide, LSB, Lisboa, Grande Lisboa, Área Met...",Portuguese Restaurant,Restaurant,Seafood Restaurant
13,"(Misericórdia, LSB, Lisboa, Grande Lisboa, Áre...",Portuguese Restaurant,Restaurant,Seafood Restaurant
15,"(Penha de França, LSB, Lisboa, Grande Lisboa, ...",Portuguese Restaurant,Vegetarian / Vegan Restaurant,Empanada Restaurant
16,"(Santa Maria Maior, LSB, Lisboa, Grande Lisboa...",Portuguese Restaurant,Restaurant,Mediterranean Restaurant
17,"(Santo António, LSB, Lisboa, Grande Lisboa, Ár...",Portuguese Restaurant,Restaurant,Sushi Restaurant


In [17]:
lisbon_merged.loc[lisbon_merged['Cluster Labels'] == 2, lisbon_merged.columns[[1] + list(range(5, lisbon_merged.shape[1]))]]

Unnamed: 0,Coordinates,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
1,"(Alvalade, LSB, Lisboa, Grande Lisboa, Área Me...",Restaurant,Asian Restaurant,Burger Joint
4,"(Avenidas Novas, LSB, Lisboa, Grande Lisboa, Á...",Restaurant,Portuguese Restaurant,Vegetarian / Vegan Restaurant
10,"(Estrela, LSB, Lisboa, Grande Lisboa, Área Met...",Restaurant,Italian Restaurant,Portuguese Restaurant
11,"(Lumiar, LSB, Lisboa, Grande Lisboa, Área Metr...",Argentinian Restaurant,Tapas Restaurant,Restaurant
12,"(Marvila, LSB, Lisboa, Grande Lisboa, Área Met...",Restaurant,Fast Food Restaurant,Middle Eastern Restaurant
14,"(Parque das Nações, LSB, Lisboa, Grande Lisboa...",Burger Joint,Sushi Restaurant,Restaurant
18,"(São Domingos de Benfica, LSB, Lisboa, Grande ...",Burger Joint,Chinese Restaurant,Vegetarian / Vegan Restaurant


#### Insight from clustering is limited. Get number of Japanese and Sushi restaurants by neighborhoods

In [43]:
lisbon_top = lisbon_venues.groupby('Neighborhood')['Venue Category']\
            .apply(lambda x: (x.str.contains('Jap|Sushi')).sum())\
            .reset_index(name='Jap/Sushi counts').sort_values('Jap/Sushi counts', ascending = False)
lisbon_top.head()

Unnamed: 0,Neighborhood,Jap/Sushi counts
17,"Santo António, Lisbon",4
4,"Avenidas Novas, Lisbon",4
14,"Parque das Naçoes, Lisbon",3
10,"Estrela, Lisbon",1
16,"Santa Maria Maior, Lisbon",1


#### From Neighborhoods with more Japanese and Sushi restaurants find rental price per sqm

In [79]:
r2  = requests.get("http://www.apartamentos-lisboa.com/p2955-saiba-qual-preco-por-lisboa-nas-diferentes-zonas.html")
data = r2.text
soup = BeautifulSoup(data)
neighborhoods=list()
price_sqm=list()
for header in soup.findAll(['h3']):
    neighborhoods.append(header.get_text())
    price_sqm.append(header.findNext('p').get_text()[-15:])

prices_df = pd.DataFrame([],columns=['Neighborhood','Price_sqm'])
prices_df['Neighborhood'] = neighborhoods
prices_df['Price_sqm'] = price_sqm
prices_df.dropna()
prices_df.sort_values('Price_sqm', ascending=False)

Unnamed: 0,Neighborhood,Price_sqm
0,Freguesia de Santo António:,5153 euros/m2.
1,Freguesia da Misericórdia:,4917 euros/m2.
2,Freguesia das Avenidas Novas:,4040 euros/m2.
3,Freguesia de Belém:,3743 euros/m2.
4,Freguesia da Estrela:,3611 euros/m2.
5,Freguesia de Santa Maria Maior:,3451 euros/m2.
6,Freguesia de Alcântara:,2984 euros/m2.
7,Freguesia de Campo de Ourique:,2797 euros/m2.
8,Freguesia de Alvalade:,2787 euros/m2.
11,Freguesia de São Vicente:,2580 euros/m2.


#### Avenidas Novas selected as the location for the Sake Bar as it has the highest number of Japanense and Sushi restaurants but a lower rental price (in comparison to Santo Antonio). 