In [287]:
import numpy as np
import pandas as pd
import requests
from sklearn import preprocessing
from sklearn.cluster import KMeans

In [288]:
# Reading neighbors data
df = pd.read_csv("tarragona_neighborhoods.csv")
df.head() 

Unnamed: 0,Postalcode,Neighborhood,City,Population,Latitude,Longitude
0,43002,Nou Eixample Nord,Tarragona,26080,41.122781,1.247971
1,43006,Torreforta,Tarragona,17010,41.118823,1.216954
2,43007,Sant Pere i Sant Pau,Tarragona,16694,41.138271,1.252438
3,43005,Nou Eixample Sud,Tarragona,15884,41.116315,1.245997
4,43007,Urbanitzacions de Llevant,Tarragona,13152,41.118099,1.272726


In [289]:
CLIENT_ID = 'RB5EO3FB3IR4BOCM0OFBLPATPXNWDJ1PUUTKWOQOFKUP1AAY' # your Foursquare ID
CLIENT_SECRET = 'FFYDHMRFTE4WYFUW5FGHBXF3KZ0J3POEP23WB10FEXJNJWBH' # your Foursquare Secret
VERSION = '20180604'

In [290]:
def getNearbyVenues(names, latitudes, longitudes):
    radius=500
    LIMIT=100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venues', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [291]:
venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )


Nou Eixample Nord
Torreforta
Sant Pere i Sant Pau
Nou Eixample Sud
Urbanitzacions de Llevant
Campclar
Eixample Tarragona
Barris Marítims
Bonavista
Sant Salvador
Part Alta
Districte 1
Districte 2
Districte 3
Districte 4
Districte 5
Districte 6
Districte 7
Districte 8
Districte 9
Districte 10
El Vendrell
Cambrils
Salou
Valls
Calafell
Vila-seca


In [292]:
venues.set_index('Neighborhood')
venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venues,Venue Latitude,Venue Longitude,Venue Category
0,Nou Eixample Nord,41.122781,1.247971,Tradicionarius,41.120784,1.25155,Bakery
1,Nou Eixample Nord,41.122781,1.247971,Pizza Nova,41.118857,1.246968,Pizza Place
2,Nou Eixample Nord,41.122781,1.247971,Plaça de la Imperial Tàrraco,41.118683,1.24567,Plaza
3,Nou Eixample Nord,41.122781,1.247971,Casa Boada,41.119042,1.250761,Sandwich Place
4,Nou Eixample Nord,41.122781,1.247971,Panishop,41.120327,1.246174,Coffee Shop


In [293]:
venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venues,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Barris Marítims,21,21,21,21,21,21
Bonavista,7,7,7,7,7,7
Calafell,5,5,5,5,5,5
Cambrils,70,70,70,70,70,70
Campclar,8,8,8,8,8,8
Districte 1,90,90,90,90,90,90
Districte 10,4,4,4,4,4,4
Districte 2,19,19,19,19,19,19
Districte 3,8,8,8,8,8,8
Districte 4,73,73,73,73,73,73


In [294]:
# Adding columns 'Venues' per neighborhood
venues_per_neighborhood = pd.DataFrame(venues.groupby('Neighborhood').count()['Venues'])
df = df.join(venues_per_neighborhood, on='Neighborhood')
df.head()

Unnamed: 0,Postalcode,Neighborhood,City,Population,Latitude,Longitude,Venues
0,43002,Nou Eixample Nord,Tarragona,26080,41.122781,1.247971,13
1,43006,Torreforta,Tarragona,17010,41.118823,1.216954,6
2,43007,Sant Pere i Sant Pau,Tarragona,16694,41.138271,1.252438,9
3,43005,Nou Eixample Sud,Tarragona,15884,41.116315,1.245997,42
4,43007,Urbanitzacions de Llevant,Tarragona,13152,41.118099,1.272726,9


In [295]:
# one hot encoding
onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")
onehot.insert(loc=0, column='Neighborhood', value=venues['Neighborhood'] )
onehot.shape

(661, 101)

In [296]:
grouped = onehot.groupby('Neighborhood').mean().reset_index()
grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bar,Beach,...,Sushi Restaurant,Tapas Restaurant,Thai Restaurant,Theater,Theme Park,Train Station,Vegetarian / Vegan Restaurant,Wine Bar,Wings Joint,Women's Store
0,Barris Marítims,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.047619,0.0,...,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bonavista,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,...,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Calafell,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Cambrils,0.014286,0.0,0.0,0.0,0.0,0.014286,0.0,0.014286,0.014286,...,0.0,0.085714,0.0,0.0,0.0,0.0,0.0,0.014286,0.0,0.0
4,Campclar,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [297]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [298]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = grouped['Neighborhood']

for ind in np.arange(grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Barris Marítims,Seafood Restaurant,Mediterranean Restaurant,Japanese Restaurant,Ice Cream Shop,Tapas Restaurant,Park,Restaurant,Supermarket,Bar,Pool
1,Bonavista,Soccer Stadium,Gym,Restaurant,Tapas Restaurant,Bar,Farmers Market,Soccer Field,Fish Market,Department Store,Dessert Shop
2,Calafell,Park,Fruit & Vegetable Store,Falafel Restaurant,Food Court,Mediterranean Restaurant,Women's Store,Fish Market,Dessert Shop,Diner,Donut Shop
3,Cambrils,Seafood Restaurant,Mediterranean Restaurant,Tapas Restaurant,Ice Cream Shop,Italian Restaurant,Café,Spanish Restaurant,Pizza Place,Gastropub,Hotel
4,Campclar,Plaza,Gym / Fitness Center,Department Store,Athletics & Sports,Restaurant,Supermarket,Pharmacy,Soccer Field,Farmers Market,Creperie


# Making Clusters for Neighborhood

In [299]:
# set number of clusters
kclusters = 10

grouped_clustering = grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 8, 1, 1, 1, 5, 1, 1, 1])

In [300]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

merged = df

# merge grouped with df to add latitude/longitude for each neighborhood
merged = merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

merged.head()

Unnamed: 0,Postalcode,Neighborhood,City,Population,Latitude,Longitude,Venues,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,43002,Nou Eixample Nord,Tarragona,26080,41.122781,1.247971,13,1,Pizza Place,Coffee Shop,Café,Food & Drink Shop,Sandwich Place,Burger Joint,Mediterranean Restaurant,Plaza,Bakery,Hotel
1,43006,Torreforta,Tarragona,17010,41.118823,1.216954,6,4,Hotel,Market,Department Store,Supermarket,Stadium,Women's Store,Dessert Shop,Diner,Donut Shop,Electronics Store
2,43007,Sant Pere i Sant Pau,Tarragona,16694,41.138271,1.252438,9,2,Bakery,Brewery,Grocery Store,Bar,Spanish Restaurant,Soccer Field,Women's Store,Dessert Shop,Diner,Donut Shop
3,43005,Nou Eixample Sud,Tarragona,15884,41.116315,1.245997,42,1,Hotel,Pizza Place,Supermarket,Bakery,Tapas Restaurant,Restaurant,Plaza,Clothing Store,Bar,Coffee Shop
4,43007,Urbanitzacions de Llevant,Tarragona,13152,41.118099,1.272726,9,1,Restaurant,Breakfast Spot,Supermarket,Café,Spanish Restaurant,Hotel,Beach Bar,Beach,Fast Food Restaurant,Farmers Market


In [301]:
neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,Barris Marítims,Seafood Restaurant,Mediterranean Restaurant,Japanese Restaurant,Ice Cream Shop,Tapas Restaurant,Park,Restaurant,Supermarket,Bar,Pool
1,1,Bonavista,Soccer Stadium,Gym,Restaurant,Tapas Restaurant,Bar,Farmers Market,Soccer Field,Fish Market,Department Store,Dessert Shop
2,8,Calafell,Park,Fruit & Vegetable Store,Falafel Restaurant,Food Court,Mediterranean Restaurant,Women's Store,Fish Market,Dessert Shop,Diner,Donut Shop
3,1,Cambrils,Seafood Restaurant,Mediterranean Restaurant,Tapas Restaurant,Ice Cream Shop,Italian Restaurant,Café,Spanish Restaurant,Pizza Place,Gastropub,Hotel
4,1,Campclar,Plaza,Gym / Fitness Center,Department Store,Athletics & Sports,Restaurant,Supermarket,Pharmacy,Soccer Field,Farmers Market,Creperie


In [302]:
from geopy.geocoders import Nominatim 

address = 'Tarragona, ES'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Tarragona are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Tarragona are 41.1172364, 1.2546057.


#### Creating Map

In [303]:
import folium 
import matplotlib.cm as cm
import matplotlib.colors as colors

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighborhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Selecting neighborhoods similar to Reus District 1

In [304]:
cluster_label = merged.loc[merged['Neighborhood'] == 'Districte 1']['Cluster Labels'].values[0]
print('Reus District 1 is assigned to cluster number', cluster_label)

Reus District 1 is assigned to cluster number 1


In [305]:
# Selecting neighborhoods similiar to Reus District 1
selected_neighborhoods = merged.loc[merged['Cluster Labels'] == cluster_label]
selected_neighborhoods.head()

Unnamed: 0,Postalcode,Neighborhood,City,Population,Latitude,Longitude,Venues,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,43002,Nou Eixample Nord,Tarragona,26080,41.122781,1.247971,13,1,Pizza Place,Coffee Shop,Café,Food & Drink Shop,Sandwich Place,Burger Joint,Mediterranean Restaurant,Plaza,Bakery,Hotel
3,43005,Nou Eixample Sud,Tarragona,15884,41.116315,1.245997,42,1,Hotel,Pizza Place,Supermarket,Bakery,Tapas Restaurant,Restaurant,Plaza,Clothing Store,Bar,Coffee Shop
4,43007,Urbanitzacions de Llevant,Tarragona,13152,41.118099,1.272726,9,1,Restaurant,Breakfast Spot,Supermarket,Café,Spanish Restaurant,Hotel,Beach Bar,Beach,Fast Food Restaurant,Farmers Market
5,43006,Campclar,Tarragona,11705,41.120789,1.21111,8,1,Plaza,Gym / Fitness Center,Department Store,Athletics & Sports,Restaurant,Supermarket,Pharmacy,Soccer Field,Farmers Market,Creperie
6,43001,Eixample Tarragona,Tarragona,11333,41.11515,1.251046,100,1,Mediterranean Restaurant,Ice Cream Shop,Bar,Cocktail Bar,Plaza,Tapas Restaurant,Burger Joint,Restaurant,Frozen Yogurt Shop,Hotel


# Getting the right neighborhood

In [307]:
# Calculating venues per population ratio
df['Target'] = df['Population']/df['Venues']
df.head()

Unnamed: 0,Postalcode,Neighborhood,City,Population,Latitude,Longitude,Venues,Target
0,43002,Nou Eixample Nord,Tarragona,26080,41.122781,1.247971,13,2006.153846
1,43006,Torreforta,Tarragona,17010,41.118823,1.216954,6,2835.0
2,43007,Sant Pere i Sant Pau,Tarragona,16694,41.138271,1.252438,9,1854.888889
3,43005,Nou Eixample Sud,Tarragona,15884,41.116315,1.245997,42,378.190476
4,43007,Urbanitzacions de Llevant,Tarragona,13152,41.118099,1.272726,9,1461.333333


In [310]:
# The 5 best neighborhoods
df.nlargest(5, 'Target')

Unnamed: 0,Postalcode,Neighborhood,City,Population,Latitude,Longitude,Venues,Target
16,43203,Districte 6,Reus,19568,41.162105,1.100983,4,4892.0
25,43820,Calafell,Calafell,24265,41.193233,1.564578,5,4853.0
21,43700,El Vendrell,El Vendrell,35821,41.2173,1.530048,11,3256.454545
1,43006,Torreforta,Tarragona,17010,41.118823,1.216954,6,2835.0
18,43205,Districte 8,Reus,11050,41.148612,1.093174,4,2762.5


In [312]:
# Getting the neighborhood with maximum target
df[df['Target']==df['Target'].max()]

Unnamed: 0,Postalcode,Neighborhood,City,Population,Latitude,Longitude,Venues,Target
16,43203,Districte 6,Reus,19568,41.162105,1.100983,4,4892.0
