# The optimal location for a new shopping center - Code

## Data collection and preparation

In [248]:
# list of postal codes for Milano
CAPs = [20121, 20122, 20123, 20124, 20125, 20126, 20127, 20128, 20129, 20131, 
        20132, 20133, 20134, 20135, 20136, 20137, 20138, 20139, 20141, 20142,
        20143, 20144, 20145, 20146, 20147, 20148, 20149, 20151, 20152, 20153,
        20154, 20155, 20156, 20157, 20158, 20159, 20161, 20162]

In [249]:
# construct a list that stores the data to construct the dataframe
# getting the latitude and longitude information with the Nominatim API
from geopy.geocoders import Nominatim
rows = []
geolocator = Nominatim(user_agent="coursera_project")
for c in CAPs:
    try:
        loc = geolocator.geocode("{0} Italy".format(c))
        rows.append([c, loc.address.split(",")[0], loc.latitude, loc.longitude])
    except:
        pass

In [250]:
# create the dataframe to store the information about the areas
import pandas as pd
milanAreas = pd.DataFrame(rows, columns=["CAP", "name", "latitude", "longitude"])
print(milanAreas.shape)
milanAreas

(32, 4)


Unnamed: 0,CAP,name,latitude,longitude
0,20121,Municipio 1,45.46874,9.182879
1,20122,Municipio 1,45.461931,9.19636
2,20123,Municipio 1,45.463221,9.177478
3,20124,Municipio 2,45.48478,9.202348
4,20125,Greco,45.499673,9.204903
5,20126,Segnano,45.513329,9.217614
6,20127,Gorla,45.50251,9.222735
7,20128,Precotto,45.51493,9.225498
8,20129,Municipio 3,45.471425,9.213726
9,20131,Città Studi,45.48376,9.222421


In [53]:
address = 'Piazza del Duomo, Duomo, Municipio 1, Milano, Lombardia, Italia'
loc = geolocator.geocode(address)
center_lat, center_long = loc.latitude, loc.longitude
print('The geograpical coordinate of the center of Milano are {}, {}.'.format(center_lat, center_long))

The geograpical coordinate of the center of Milano are 45.46420795, 9.190010308888969.


In [54]:
import folium
map_milano = folium.Map(location=[center_lat, center_long], zoom_start=12)
# add markers to map
for lat, lng, name, cap in zip(milanAreas['latitude'], milanAreas['longitude'], 
                                           milanAreas['name'], milanAreas['CAP']):
    label = '{}, {}'.format(cap, name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([lat, lng], radius=8, popup=label, color='red', fill=True,
                        fill_color='#FF5733', fill_opacity=0.5, parse_html=False).add_to(map_milano)  
    
map_milano

In [45]:
CLIENT_ID = '0WSSLBASAZEOMXIEUC5W4UDTLYADATYKYH1J0GBY4KE0LXVN' # your Foursquare ID
CLIENT_SECRET = 'XIFFL2HAJOUBHMQ2ABZDOUOLQE4MUIFZHEIJ05DS254KEYTD' # your Foursquare Secret
VERSION = '20200330' # Foursquare API version
LIMIT = 100

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 0WSSLBASAZEOMXIEUC5W4UDTLYADATYKYH1J0GBY4KE0LXVN
CLIENT_SECRET:XIFFL2HAJOUBHMQ2ABZDOUOLQE4MUIFZHEIJ05DS254KEYTD


In [20]:
import requests

def getNearbyVenues(names, latitudes, longitudes, radius=1000, categoryID=None):
    
    venues_list=[]
    
    for n, name, lat, lng in zip(range(len(names)), names, latitudes, longitudes):
            
        if categoryID is None:
            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
                CLIENT_ID, CLIENT_SECRET, lat, lng, VERSION, radius, LIMIT)
        else:
            url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}&categoryId={}'.format(
                CLIENT_ID, CLIENT_SECRET, lat, lng, VERSION, radius, LIMIT, categoryID)   
            
        # make the GET request
        results = requests.get(url).json()
        results = results["response"]['groups'][0]['items']
        if len(results) == 0:
            print("Warning! found no venue for neighboorhood nr. {1}: {0}".format(name, n))
        
        # return only relevant information for each nearby venue
        venues_list.append([ (name, lat, lng, v['venue']['name'], v['venue']['location']['lat'], 
                              v['venue']['location']['lng'], v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['CAP', 
                  'CAP Latitude', 
                  'CAP Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    print("\n Finished... a total number of {0} venues has been found!".format(len(nearby_venues)))
    return(nearby_venues)


In [21]:
# get all the venues
allVenues = getNearbyVenues(milanAreas["CAP"], milanAreas["latitude"], milanAreas["longitude"])


 Finished... a total number of 2572 venues has been found!


In [22]:
print(allVenues.shape)
allVenues.head()

(2572, 7)


Unnamed: 0,CAP,CAP Latitude,CAP Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,20121,45.46874,9.182879,Piazza Castello,45.468965,9.181312,Plaza
1,20121,45.46874,9.182879,Castello Sforzesco,45.469545,9.180424,Castle
2,20121,45.46874,9.182879,Fontana del Castello Sforzesco,45.469237,9.180917,Fountain
3,20121,45.46874,9.182879,Giovanni Cova & C.,45.468816,9.184121,Bakery
4,20121,45.46874,9.182879,Antonia,45.46889,9.184799,Accessories Store


In [25]:
# category ID of the category Shop & Service
categoryID = "4d4b7105d754a06378d81259"
# get all the Shop & Service venues
shopVenue = getNearbyVenues(milanAreas["CAP"], milanAreas["latitude"], milanAreas["longitude"], categoryID=categoryID)


 Finished... a total number of 1188 venues has been found!


In [26]:
print(shopVenue.shape)
shopVenue.head()

(1188, 7)


Unnamed: 0,CAP,CAP Latitude,CAP Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,20121,45.46874,9.182879,Antonia,45.46889,9.184799,Accessories Store
1,20121,45.46874,9.182879,Gay Odin,45.466187,9.180801,Chocolate Shop
2,20121,45.46874,9.182879,New Old Camera,45.467359,9.184171,Camera Store
3,20121,45.46874,9.182879,Fabriano Boutique,45.470025,9.184987,Stationery Store
4,20121,45.46874,9.182879,Flying Tiger,45.465945,9.18485,Gift Shop


## First step: clustering of all the areas of Milano

In [69]:
# count the total number of shops per each area defined and labelled by its CAP
shopVenue_count = shopVenue[["CAP","Venue"]].groupby('CAP').count().reset_index()
shopVenue_count.columns = ["CAP", "Shop Total Nr"]
shopVenue_count.set_index("CAP", inplace=True)

# count the number of shop types present in each area and labelled by its CAP
shopVenue_countunique = shopVenue[["CAP","Venue Category"]].groupby('CAP')["Venue Category"].nunique().reset_index()
shopVenue_countunique.columns = ["CAP", "Shop Type Nr"]
shopVenue_countunique.set_index("CAP", inplace=True)

# join the two columns in a single dataframe
shopVenue_count = shopVenue_count.join(shopVenue_countunique, on="CAP", lsuffix='', rsuffix='')
shopVenue_count.reset_index()

print(shopVenue_count.shape)
shopVenue_count.head()

(34, 2)


Unnamed: 0_level_0,Shop Total Nr,Shop Type Nr
CAP,Unnamed: 1_level_1,Unnamed: 2_level_1
20121,100,34
20122,100,27
20123,81,29
20124,86,36
20125,23,16


In [70]:
# now determine the total number of venues in each area, by counting the elements in the allVenues dataframe
allVenue_count = allVenues[["CAP","Venue"]].groupby('CAP').count()
allVenue_count.columns = ["Venue Total Nr"]

print(allVenue_count.shape)
allVenue_count.head()

(34, 1)


Unnamed: 0_level_0,Venue Total Nr
CAP,Unnamed: 1_level_1
20121,100
20122,100
20123,100
20124,82
20125,82


In [71]:
# now compute the distance between each neighborhood center and the city center using the geidesic distance
# implemented in the geopy library

import geopy.distance

# coordinates of the city center
center = (center_lat, center_long)

# loop over the neighborhoods and compute the distance
rows = []
for cap, lat, long in zip(milanAreas["CAP"], milanAreas["latitude"], milanAreas["longitude"]):
    neighborhood = (lat, long)
    rows.append([cap, geopy.distance.distance(center, neighborhood).km])
    
# create the dataframe to store the information about the areas
distances = pd.DataFrame(rows, columns=["CAP", "distance"]).set_index("CAP")

print(distances.shape)
distances.head()

(34, 1)


Unnamed: 0_level_0,distance
CAP,Unnamed: 1_level_1
20121,0.751457
20122,0.557364
20123,0.986282
20124,2.48161
20125,4.110022


In [144]:
# now join all the tables in a single dataframe
df1 = shopVenue_count.join(allVenue_count, on="CAP", lsuffix='', rsuffix='').join(
    distances, on="CAP", lsuffix='', rsuffix='')

print(df1.shape)
df1

(34, 4)


Unnamed: 0_level_0,Shop Total Nr,Shop Type Nr,Venue Total Nr,distance
CAP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
20121,100,34,100,0.751457
20122,100,27,100,0.557364
20123,81,29,100,0.986282
20124,86,36,82,2.48161
20125,23,16,82,4.110022
20126,20,15,94,5.870387
20127,13,10,64,4.966618
20128,18,13,55,6.282979
20129,47,27,100,2.020685
20131,64,35,78,3.338398


In [145]:
# now cluster data

# set number of clusters
kclusters = 5

# import k-means from clustering stage
from sklearn.cluster import KMeans

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df1)

# check cluster labels generated for each row in the dataframe
kmeans.labels_


array([2, 2, 2, 2, 0, 0, 3, 3, 4, 2, 3, 0, 3, 4, 4, 4, 1, 0, 1, 4, 4, 3,
       3, 4, 4, 1, 4, 0, 1, 1, 0, 4, 1, 3], dtype=int32)

In [146]:
# add the cluster column to the dataframe
df1["cluster"] = kmeans.labels_

# add the latitude and longitude columns to the dataframe
milanAreas
df1 = df1.join(milanAreas.set_index("CAP"), on="CAP", lsuffix='', rsuffix='')
df1.head()

df1.reset_index(inplace=True)

In [147]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np

# define a function to plot map with colored clusters
def plot_map_clusters(df):
    
    # create map
    map_milano = folium.Map(location=[center_lat, center_long], zoom_start=11)
    
    # set color scheme for the clusters
    x = np.arange(kclusters)
    ys = [i + x + (i*x)**2 for i in range(kclusters)]
    colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
    rainbow = [colors.rgb2hex(i) for i in colors_array]

    # add markers to the map
    markers_colors = []
    for lat, lon, cap, name, cluster in zip(df['latitude'], df['longitude'], df['CAP'], df["name"], df['cluster']):
        label = folium.Popup("{0}, {1} - Cluster {2}".format(name, cap, cluster), parse_html=True)
        folium.CircleMarker([lat, lon], radius=8, popup=label, color=rainbow[cluster-1], fill=True,
                        fill_color=rainbow[cluster-1], fill_opacity=0.7, parse_html=False).add_to(map_milano)  
       
    return map_milano

In [148]:
plot_map_clusters(df1)

In [149]:
df1.loc[df1['cluster'] == 0, :] # red, group 3

Unnamed: 0,CAP,Shop Total Nr,Shop Type Nr,Venue Total Nr,distance,cluster,name,latitude,longitude
4,20125,23,16,82,4.110022,0,Greco,45.499673,9.204903
5,20126,20,15,94,5.870387,0,Segnano,45.513329,9.217614
11,20133,29,19,100,2.91031,0,Municipio 4,45.467552,9.226919
17,20139,31,22,80,3.490976,0,Municipio 4,45.439911,9.218294
27,20155,30,19,100,3.721048,0,Bullona,45.491527,9.162498
30,20158,32,19,90,4.365273,0,Dergano,45.501537,9.172645


In [150]:
df1.loc[df1['cluster'] == 1, :] # violet, group 5

Unnamed: 0,CAP,Shop Total Nr,Shop Type Nr,Venue Total Nr,distance,cluster,name,latitude,longitude
16,20138,8,6,21,5.042223,1,Morsenchio,45.444595,9.248136
18,20142,6,4,23,4.497375,1,Quartiere Sant'Ambrogio,45.427615,9.16547
25,20152,5,3,19,6.608144,1,Sella Nuova,45.452183,9.10727
28,20156,11,9,40,5.338463,1,Villapizzone,45.503402,9.150537
29,20157,8,3,16,6.704582,1,Quarto Oggiaro,45.512253,9.138147
32,20161,14,8,32,6.210545,1,Affori,45.518615,9.171888


In [151]:
df1.loc[df1['cluster'] == 2, :] # cyan, group 1

Unnamed: 0,CAP,Shop Total Nr,Shop Type Nr,Venue Total Nr,distance,cluster,name,latitude,longitude
0,20121,100,34,100,0.751457,2,Municipio 1,45.46874,9.182879
1,20122,100,27,100,0.557364,2,Municipio 1,45.461931,9.19636
2,20123,81,29,100,0.986282,2,Municipio 1,45.463221,9.177478
3,20124,86,36,82,2.48161,2,Municipio 2,45.48478,9.202348
9,20131,64,35,78,3.338398,2,Città Studi,45.48376,9.222421


In [152]:
df1.loc[df1['cluster'] == 3, :] # green, group 4

Unnamed: 0,CAP,Shop Total Nr,Shop Type Nr,Venue Total Nr,distance,cluster,name,latitude,longitude
6,20127,13,10,64,4.966618,3,Gorla,45.50251,9.222735
7,20128,18,13,55,6.282979,3,Precotto,45.51493,9.225498
10,20132,20,15,54,5.525985,3,Cimiano,45.500142,9.23886
12,20134,15,13,74,4.441639,3,Lambrate,45.476655,9.243984
21,20146,35,18,65,4.102013,3,Lorenteggio,45.45511,9.139183
22,20147,18,14,55,4.696594,3,Arzaga,45.45677,9.130899
33,20162,9,7,58,5.465445,3,Pratocentenaro,45.513087,9.197671


In [153]:
df1.loc[df1['cluster'] == 4, :] # orange, group 2

Unnamed: 0,CAP,Shop Total Nr,Shop Type Nr,Venue Total Nr,distance,cluster,name,latitude,longitude
8,20129,47,27,100,2.020685,4,Municipio 3,45.471425,9.213726
13,20135,51,28,100,1.968077,4,Porta Romana,45.454655,9.211198
14,20136,43,23,100,1.651284,4,Porta Ticinese,45.451155,9.179927
15,20137,34,20,100,2.769024,4,Calvairate,45.455834,9.223354
19,20143,41,25,100,3.131315,4,San Cristoforo,45.44669,9.158657
20,20144,39,24,100,2.219633,4,Municipio 6,45.457383,9.163339
23,20149,38,26,100,3.36155,4,Tre Torri-Fiera,45.479528,9.152944
24,20151,37,23,90,5.447322,4,Boldinasco,45.495575,9.136475
26,20154,37,24,100,2.431102,4,Municipio 8,45.48301,9.174123
31,20159,45,26,100,3.18437,4,Municipio 9,45.492812,9.187665


The analysis show that the areas of Milano can be classified in this way:

1. central areas, with a **large number of venues**, a **large number of shops** of diverse type (cluster 2, color cyan)
2. non central areas, with a **large number of venues**, and a **relatively high number of shops** still of diverse type (cluster 4, color orange)
3. non central areas, with a **moderate number of shops** and a **large number of venues** (cluster 0, color red)
4. non central areas, with a **moderate number of shops** and a **moderate number of venues** (cluster 3, color green)
5. non central areas, with a **small number of shops and venues** (cluster 1, violet)

Evidently, the interesting areas for opening a new shopping center would be the 3rd and 4th group, with a particular preference for the 3rd group. Areas of the 1st and 2nd group are already too crowded in terms of shops, while areas of the 5th group are too peripheral, as shown by the small number of venues.

### Second step: analysis of the shops in the interesting areas

We now extract the interesting areas and create a new dataframe with the shops from these areas only

In [228]:
interestingCAPs = list(df1.loc[df1['cluster'] == 3, :]["CAP"]) + list(df1.loc[df1['cluster'] == 0, :]["CAP"])
print(interestingCAPs)

[20127, 20128, 20132, 20134, 20146, 20147, 20162, 20125, 20126, 20133, 20139, 20155, 20158]


In [229]:
shopVenue_selected = shopVenue[shopVenue["CAP"].isin(interestingCAPs)]

print(shopVenue_selected.shape)
shopVenue_selected.head()

(293, 7)


Unnamed: 0,CAP,CAP Latitude,CAP Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
367,20125,45.499673,9.204903,Farmacia Falqui,45.50067,9.197202,Pharmacy
368,20125,45.499673,9.204903,Esselunga,45.499101,9.195828,Supermarket
369,20125,45.499673,9.204903,Carrefour Express,45.492574,9.206591,Convenience Store
370,20125,45.499673,9.204903,LD Market,45.499847,9.210104,Supermarket
371,20125,45.499673,9.204903,Pet Stop,45.500918,9.209864,Pet Store


In [230]:
print('There are {} uniques categories.'.format(len(shopVenue_selected['Venue Category'].unique())))

There are 59 uniques categories.


In [231]:
# one hot encoding
shopVenue_dummy = pd.get_dummies(shopVenue_selected[['Venue Category']], prefix="", prefix_sep="")
print(shopVenue_dummy.shape)

# add neighborhood column back to dataframe
shopVenue_dummy['CAP'] = shopVenue_selected['CAP'] 

# move neighborhood column to the first column
collist = list(shopVenue_dummy.columns)
collist.remove('CAP')
shopVenue_dummy = shopVenue_dummy[['CAP'] + collist]

print(shopVenue_dummy.shape)
shopVenue_dummy.head()


(293, 59)
(293, 60)


Unnamed: 0,CAP,Accessories Store,Arts & Crafts Store,Auto Garage,Auto Workshop,Betting Shop,Bookstore,Boutique,Business Service,Candy Store,...,Shop & Service,Shopping Mall,Smoke Shop,Sporting Goods Shop,Supermarket,Thrift / Vintage Store,Toy / Game Store,Video Game Store,Wine Shop,Women's Store
367,20125,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
368,20125,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
369,20125,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
370,20125,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
371,20125,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [232]:
# now sum the data by CAP
shopVenue_grouped = shopVenue_dummy.groupby('CAP').sum().reset_index()
shopVenue_grouped.set_index("CAP", inplace=True)

shopVenue_grouped.shape
shopVenue_grouped

Unnamed: 0_level_0,Accessories Store,Arts & Crafts Store,Auto Garage,Auto Workshop,Betting Shop,Bookstore,Boutique,Business Service,Candy Store,Cheese Shop,...,Shop & Service,Shopping Mall,Smoke Shop,Sporting Goods Shop,Supermarket,Thrift / Vintage Store,Toy / Game Store,Video Game Store,Wine Shop,Women's Store
CAP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20125,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,5,0,0,0,0,0
20126,0,1,0,0,0,0,0,0,0,0,...,1,1,0,0,4,0,0,1,0,1
20127,0,2,0,0,0,0,0,0,0,0,...,0,0,0,0,2,0,0,0,2,0
20128,0,0,0,0,0,0,0,0,0,0,...,1,1,0,0,3,0,1,0,0,1
20132,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,5,0,0,0,0,0
20133,0,0,0,0,0,1,0,1,0,0,...,0,1,1,0,4,0,0,0,2,1
20134,0,1,0,0,0,0,0,0,1,0,...,0,0,0,0,2,1,0,0,0,1
20139,0,0,1,0,0,2,1,2,0,0,...,0,0,1,0,4,0,0,0,0,0
20146,0,0,0,0,0,0,0,0,0,0,...,0,0,0,2,7,0,0,0,0,0
20147,0,1,0,0,0,0,0,0,0,0,...,0,0,0,2,4,0,0,0,0,0


In [233]:
# set number of clusters
kclusters = 4

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(shopVenue_grouped)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([3, 0, 0, 0, 0, 3, 0, 3, 1, 0, 0, 2, 0], dtype=int32)

In [234]:
# add cluster label to the shopVenue_grouped dataframe
shopVenue_grouped["cluster"] = [ int(cluster) for cluster in kmeans.labels_]
shopVenue_grouped.head()

Unnamed: 0_level_0,Accessories Store,Arts & Crafts Store,Auto Garage,Auto Workshop,Betting Shop,Bookstore,Boutique,Business Service,Candy Store,Cheese Shop,...,Shopping Mall,Smoke Shop,Sporting Goods Shop,Supermarket,Thrift / Vintage Store,Toy / Game Store,Video Game Store,Wine Shop,Women's Store,cluster
CAP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20125,0,0,0,0,0,1,0,0,0,0,...,0,0,0,5,0,0,0,0,0,3
20126,0,1,0,0,0,0,0,0,0,0,...,1,0,0,4,0,0,1,0,1,0
20127,0,2,0,0,0,0,0,0,0,0,...,0,0,0,2,0,0,0,2,0,0
20128,0,0,0,0,0,0,0,0,0,0,...,1,0,0,3,0,1,0,0,1,0
20132,0,0,0,0,0,0,0,0,0,0,...,0,0,1,5,0,0,0,0,0,0


In [235]:
milanAreas_selected = milanAreas[milanAreas["CAP"].isin(interestingCAPs)]
milanAreas_selected.set_index("CAP", inplace=True)

milanAreas_selected = milanAreas_selected.join(shopVenue_grouped["cluster"], on="CAP", lsuffix='', rsuffix='')
milanAreas_selected.reset_index(inplace=True)
milanAreas_selected

Unnamed: 0,CAP,name,latitude,longitude,cluster
0,20125,Greco,45.499673,9.204903,3
1,20126,Segnano,45.513329,9.217614,0
2,20127,Gorla,45.50251,9.222735,0
3,20128,Precotto,45.51493,9.225498,0
4,20132,Cimiano,45.500142,9.23886,0
5,20133,Municipio 4,45.467552,9.226919,3
6,20134,Lambrate,45.476655,9.243984,0
7,20139,Municipio 4,45.439911,9.218294,3
8,20146,Lorenteggio,45.45511,9.139183,1
9,20147,Arzaga,45.45677,9.130899,0


In [236]:
plot_map_clusters(milanAreas_selected)

In [242]:
# create a new dataframe
shoplist = pd.DataFrame()

for cap in shopVenue_grouped.index:
    row_sorted = shopVenue_grouped.loc[cap, :].sort_values(ascending=False)
    shoplist.loc[cap,"cluster"] = shopVenue_grouped.loc[cap,"cluster"]
    i = 1
    for shop in row_sorted.index:
        if row_sorted[shop] > 0 and shop != "cluster":
            shoplist.loc[cap,i] = "{} ({})".format(shop, row_sorted[shop])
            i += 1

shoplist["cluster"] = shoplist["cluster"].astype(int)


In [243]:
shoplist.loc[shoplist['cluster'] == 0, :] # red, group 

Unnamed: 0,cluster,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
20126,0,Supermarket (4),Furniture / Home Store (2),Food & Drink Shop (2),Pet Store (1),Shop & Service (1),Electronics Store (1),Market (1),Paper / Office Supplies Store (1),Pharmacy (1),...,Health & Beauty Service (1),Video Game Store (1),Arts & Crafts Store (1),,,,,,,
20127,0,Wine Shop (2),Arts & Crafts Store (2),Supermarket (2),Pharmacy (1),Insurance Office (1),Pet Store (1),Convenience Store (1),Kids Store (1),Food & Drink Shop (1),...,,,,,,,,,,
20128,0,Supermarket (3),Furniture / Home Store (2),Food & Drink Shop (2),Cosmetics Shop (2),Shop & Service (1),Women's Store (1),Market (1),Electronics Store (1),Convenience Store (1),...,Toy / Game Store (1),,,,,,,,,
20132,0,Supermarket (5),Electronics Store (2),Recording Studio (1),Outlet Mall (1),Department Store (1),Cosmetics Shop (1),Mobile Phone Shop (1),Construction & Landscaping (1),Clothing Store (1),...,Food & Drink Shop (1),Health & Beauty Service (1),Shoe Store (1),,,,,,,
20134,0,Farmers Market (2),Supermarket (2),Pharmacy (1),Candy Store (1),Women's Store (1),Mobile Phone Shop (1),Optical Shop (1),Electronics Store (1),Paper / Office Supplies Store (1),...,Thrift / Vintage Store (1),,,,,,,,,
20147,0,Supermarket (4),Sporting Goods Shop (2),Hobby Shop (1),Shoe Store (1),Insurance Office (1),Flea Market (1),Optical Shop (1),Electronics Store (1),Cosmetics Shop (1),...,Grocery Store (1),Arts & Crafts Store (1),,,,,,,,
20155,0,Supermarket (4),Smoke Shop (3),Health Food Store (2),Market (2),Wine Shop (2),Electronics Store (2),Cheese Shop (2),IT Services (2),Convenience Store (1),...,Accessories Store (1),Betting Shop (1),Shop & Service (1),Sporting Goods Shop (1),Toy / Game Store (1),Food & Drink Shop (1),Mobile Phone Shop (1),,,
20162,0,Supermarket (3),Pharmacy (1),Other Repair Shop (1),Shopping Mall (1),Paper / Office Supplies Store (1),Department Store (1),Arts & Crafts Store (1),,,...,,,,,,,,,,


In [244]:
shoplist.loc[shoplist['cluster'] == 1, :] # violet, group 

Unnamed: 0,cluster,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
20146,1,Supermarket (7),Pharmacy (5),Mobile Phone Shop (2),Shoe Store (2),Photography Studio (2),Sporting Goods Shop (2),Electronics Store (2),Optical Shop (2),Grocery Store (2),...,Department Store (1),Cosmetics Shop (1),Convenience Store (1),Clothing Store (1),Food & Drink Shop (1),Hobby Shop (1),,,,


In [245]:
shoplist.loc[shoplist['cluster'] == 2, :] # cyan, group 

Unnamed: 0,cluster,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
20158,2,Supermarket (6),Smoke Shop (4),Pharmacy (2),Bookstore (2),Cosmetics Shop (2),Mobile Phone Shop (2),Paper / Office Supplies Store (2),Auto Workshop (1),Boutique (1),...,Outlet Store (1),Pet Store (1),Hobby Shop (1),Accessories Store (1),Shipping Store (1),Shop & Service (1),Print Shop (1),,,


In [246]:
shoplist.loc[shoplist['cluster'] == 3, :] # yellow, group 

Unnamed: 0,cluster,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
20125,3,Supermarket (5),Clothing Store (3),Pharmacy (2),Gift Shop (1),Convenience Store (1),Other Repair Shop (1),Farmers Market (1),Electronics Store (1),Fish Market (1),...,Optical Shop (1),Kids Store (1),Flower Shop (1),Bookstore (1),,,,,,
20133,3,Convenience Store (4),Supermarket (4),Optical Shop (2),Wine Shop (2),Pet Store (2),Clothing Store (2),Business Service (1),Insurance Office (1),IT Services (1),...,Food & Drink Shop (1),Bookstore (1),Shopping Mall (1),Smoke Shop (1),Grocery Store (1),Gift Shop (1),Flower Shop (1),,,
20139,3,Supermarket (4),Bookstore (2),Electronics Store (2),Kids Store (2),Clothing Store (2),Business Service (2),Film Studio (2),Smoke Shop (1),Boutique (1),...,Flower Shop (1),Health Food Store (1),Pharmacy (1),Flea Market (1),Convenience Store (1),Auto Garage (1),Department Store (1),Health & Beauty Service (1),Lingerie Store (1),Shipping Store (1)
