# Assignment : Segmenting and Clustering Neighborhoods in Toronto

In [1]:
import urllib3
import bs4 as BeautifulSoup
import requests

import pandas as pd
import numpy as np
import geocoder # To get latitude and longitude for our postal codes

from sklearn.cluster import KMeans

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

import folium

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
http = urllib3.PoolManager()
response = http.request('GET', url)

soup = BeautifulSoup.BeautifulSoup(response.data)



In [3]:
# <table class="wikitable sortable">
# ...
# </table>
postalcodes_table = soup.find('table', {'class': 'wikitable sortable'})

In [4]:
headers = postalcodes_table.find_all('th')
headers = [header.text.strip() for header in headers]
headers

['Postcode', 'Borough', 'Neighbourhood']

In [5]:
cells = postalcodes_table.find_all('td')

In [6]:
num_rows = range(int(len(cells)/3))
data = {}

for num_row in num_rows:
    postal_code, borough, neighborhood = cells[num_row * 3:(num_row + 1) * 3]
    # Remove uncessary '\n'
    postal_code, borough, neighborhood = postal_code.text.strip(), borough.text.strip(), neighborhood.text.strip()
    if borough.lower() == 'not assigned':
        continue # 2nd item from the listbox of the 3) of the Assignment instructions
    if neighborhood.lower() == 'not assigned':
        neighborhood = borough # 4th item from the listbox of the 3) of the Assignment instructions

    # Normally the 2nd step of this condition does not occur. But just for security case create a new row
    # if the postal_code is already registered BUT not with the current borough
    if postal_code not in data or (postal_code in data and data[postal_code][headers[1]] != borough):
        data[postal_code] = {headers[1]: borough, headers[2]: neighborhood}
    else: # postal_code and borough already registered, let's append the neighbourhood
        data[postal_code][headers[2]] += ', {}'.format(neighborhood)

In [7]:
# Add missing column Postal Code before building the df
for postalcode in data:
    data[postalcode][headers[0]] = postalcode

In [8]:
toronto_df = pd.DataFrame(data.values())

In [9]:
toronto_coords = pd.read_csv('Geospatial_Coordinates.csv')

In [10]:
toronto_df = toronto_df.merge(toronto_coords, left_on='Postcode', right_on='Postal Code')

In [11]:
toronto_df.drop(columns='Postal Code', inplace=True)
toronto_df.rename(columns={'Neighbourhood': 'Neighborhood'}, inplace=True)

In [12]:
toronto_df[toronto_df['Borough'].str.contains('Toronto')]

Unnamed: 0,Borough,Neighborhood,Postcode,Latitude,Longitude
2,Downtown Toronto,Harbourfront,M5A,43.65426,-79.360636
9,Downtown Toronto,"Ryerson, Garden District",M5B,43.657162,-79.378937
15,Downtown Toronto,St. James Town,M5C,43.651494,-79.375418
19,East Toronto,The Beaches,M4E,43.676357,-79.293031
20,Downtown Toronto,Berczy Park,M5E,43.644771,-79.373306
24,Downtown Toronto,Central Bay Street,M5G,43.657952,-79.387383
25,Downtown Toronto,Christie,M6G,43.669542,-79.422564
30,Downtown Toronto,"Adelaide, King, Richmond",M5H,43.650571,-79.384568
31,West Toronto,"Dovercourt Village, Dufferin",M6H,43.669005,-79.442259
36,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",M5J,43.640816,-79.381752


In [13]:
CLIENT_ID = 'CILKB0UQP' # your Foursquare ID
CLIENT_SECRET = '0JRL0H' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [14]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    columns = ['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude', 
              'Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category']
    data = [item for venue_list in venues_list for item in venue_list]
    
    return pd.DataFrame(data=data, columns=columns)

In [15]:
#toronto_venues = getNearbyVenues(toronto_df['Neighborhood'], latitudes=toronto_df['Latitude'], longitudes=toronto_df['Longitude'])
#toronto_venues.to_csv('toronto_venues.csv', index=False)

In [16]:
toronto_venues = pd.read_csv('toronto_venues.csv') # To avoid to call Foursquare API too much time

In [17]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant


In [18]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head(10)

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Bathurst Manor, Downsview North, Wilson Heights",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0
6,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
neigh_freqs = {}
n_most_common_shops = 5
for neigh_name in toronto_grouped['Neighborhood']:
    neigh_freq = toronto_grouped[toronto_grouped['Neighborhood'] == neigh_name].T.reset_index()
    neigh_freq.columns = ['venue','freq']
    neigh_freq = neigh_freq[neigh_freq['venue']!='Neighborhood']
    neigh_freq['freq'] = neigh_freq['freq'].astype(float)
    neigh_freq['freq'] = neigh_freq['freq'] * 100
    neigh_freq = neigh_freq.round({'freq': 2})
    neigh_freq.sort_values('freq', ascending=False, inplace=True)
    neigh_freqs[neigh_name] = neigh_freq.iloc[:5, 0].tolist()

In [21]:
for neigh_freq in neigh_freqs:
    print(neigh_freq, end='\n')
    print(' => ', ' / '.join(neigh_freqs[neigh_freq]), end='\n')
    print()

Adelaide, King, Richmond
 =>  Coffee Shop / Café / Steakhouse / Bar / Cosmetics Shop

Agincourt
 =>  Latin American Restaurant / Skating Rink / Breakfast Spot / Lounge / Molecular Gastronomy Restaurant

Agincourt North, L'Amoreaux East, Milliken, Steeles East
 =>  Playground / Park / Coffee Shop / Arts & Crafts Store / Mobile Phone Shop

Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown
 =>  Pizza Place / Fried Chicken Joint / Sandwich Place / Fast Food Restaurant / Liquor Store

Alderwood, Long Branch
 =>  Pizza Place / Pharmacy / Sandwich Place / Gym / Pool

Bathurst Manor, Downsview North, Wilson Heights
 =>  Coffee Shop / Sushi Restaurant / Shopping Mall / Diner / Supermarket

Bayview Village
 =>  Café / Japanese Restaurant / Bank / Chinese Restaurant / Yoga Studio

Bedford Park, Lawrence Manor East
 =>  Italian Restaurant / Pizza Place / Coffee Shop / Pharmacy / Indian Restaurant

Berczy Park
 =>  Coffee Shop / Bakery / B

In [22]:
def most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [23]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [24]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in range(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in range(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

# Some neighborhoods do not have any most common venues. Let's remove them. Also as their Cluster Labels is NaN, the column have been converted to float.
# Let's put it back to int
toronto_merged = toronto_merged[~toronto_merged['Cluster Labels'].isnull()]
toronto_merged['Cluster Labels'] = toronto_merged['Cluster Labels'].astype(int)

toronto_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighborhood,Postcode,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,Parkwoods,M3A,43.753259,-79.329656,0,Park,Food & Drink Shop,Women's Store,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
1,North York,Victoria Village,M4A,43.725882,-79.315572,4,Intersection,Coffee Shop,Portuguese Restaurant,French Restaurant,Hockey Arena,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
2,Downtown Toronto,Harbourfront,M5A,43.65426,-79.360636,4,Coffee Shop,Park,Bakery,Pub,Theater,Mexican Restaurant,Restaurant,Breakfast Spot,Café,Performing Arts Venue
3,North York,"Lawrence Heights, Lawrence Manor",M6A,43.718518,-79.464763,4,Furniture / Home Store,Clothing Store,Coffee Shop,Event Space,Miscellaneous Shop,Arts & Crafts Store,Women's Store,Boutique,Accessories Store,Vietnamese Restaurant
4,Queen's Park,Queen's Park,M7A,43.662301,-79.389494,4,Coffee Shop,Park,Gym,Diner,Nightclub,Seafood Restaurant,Sandwich Place,Burger Joint,Burrito Place,Café


In [25]:
# create map
longitude = -79.3831843
latitude = 43.653226

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

![Toronto Map](https://raw.githubusercontent.com/thoscc/Coursera_Capstone/master/toronto.png)

# Cluster Examination

In [26]:
cluster0 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
cluster0

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Parkwoods,0,Park,Food & Drink Shop,Women's Store,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
21,Caledonia-Fairbanks,0,Park,Women's Store,Market,Fast Food Restaurant,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
32,Scarborough Village,0,Convenience Store,Playground,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store
35,East Toronto,0,Park,Convenience Store,Metro Station,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store
40,"CFB Toronto, Downsview East",0,Park,Airport,Snack Place,Women's Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
46,Downsview West,0,Grocery Store,Convenience Store,Park,Bank,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
49,"Downsview, North Park, Upwood Park",0,Park,Bakery,Construction & Landscaping,Women's Store,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
61,Lawrence Park,0,Park,Bus Line,Swim School,Women's Store,Donut Shop,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore
66,York Mills West,0,Park,Bank,Bar,Convenience Store,Women's Store,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore
68,"Forest Hill North, Forest Hill West",0,Park,Trail,Bus Line,Sushi Restaurant,Jewelry Store,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop


In [27]:
cluster0.groupby('1st Most Common Venue')['1st Most Common Venue'].count()

1st Most Common Venue
Convenience Store     1
Grocery Store         1
Park                 12
Name: 1st Most Common Venue, dtype: int64

=> For Cluster 0, we can see that the most common venue is <b>'Park'</b>. <br />
We can reproduce this examination for the remaining clusters

In [28]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,"Rouge, Malvern",1,Fast Food Restaurant,Women's Store,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant


In [29]:
cluster2 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
cluster2

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
52,"Newtonbrook, Willowdale",2,Piano Bar,Department Store,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop


In [30]:
cluster3 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
cluster3

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
101,"Humber Bay, King's Mill Park, Kingsway Park So...",3,Baseball Field,Breakfast Spot,Women's Store,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant


For the cluster 2 and 3, the most common venues are respectively: Piano Bar and Baseball Field.

In [31]:
cluster4 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
cluster4.head(10)

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Victoria Village,4,Intersection,Coffee Shop,Portuguese Restaurant,French Restaurant,Hockey Arena,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
2,Harbourfront,4,Coffee Shop,Park,Bakery,Pub,Theater,Mexican Restaurant,Restaurant,Breakfast Spot,Café,Performing Arts Venue
3,"Lawrence Heights, Lawrence Manor",4,Furniture / Home Store,Clothing Store,Coffee Shop,Event Space,Miscellaneous Shop,Arts & Crafts Store,Women's Store,Boutique,Accessories Store,Vietnamese Restaurant
4,Queen's Park,4,Coffee Shop,Park,Gym,Diner,Nightclub,Seafood Restaurant,Sandwich Place,Burger Joint,Burrito Place,Café
5,Queen's Park,4,Coffee Shop,Park,Gym,Diner,Nightclub,Seafood Restaurant,Sandwich Place,Burger Joint,Burrito Place,Café
7,Don Mills North,4,Café,Gym / Fitness Center,Caribbean Restaurant,Japanese Restaurant,Donut Shop,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore
8,"Woodbine Gardens, Parkview Hill",4,Fast Food Restaurant,Pizza Place,Pharmacy,Athletics & Sports,Gastropub,Intersection,Bus Line,Breakfast Spot,Bank,Gym / Fitness Center
9,"Ryerson, Garden District",4,Clothing Store,Coffee Shop,Cosmetics Shop,Bakery,Fast Food Restaurant,Café,Italian Restaurant,Bubble Tea Shop,Pizza Place,Restaurant
10,Glencairn,4,Pizza Place,Pub,Japanese Restaurant,Women's Store,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
12,"Highland Creek, Rouge Hill, Port Union",4,Construction & Landscaping,Bar,Women's Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant


In [32]:
cluster4.groupby('1st Most Common Venue')['1st Most Common Venue'].count().sort_values(ascending=False)

1st Most Common Venue
Coffee Shop                   18
Café                           8
Pizza Place                    4
Pharmacy                       4
Grocery Store                  3
Sporting Goods Shop            2
Gym                            2
Fast Food Restaurant           2
Clothing Store                 2
Middle Eastern Restaurant      2
Sandwich Place                 2
Bakery                         2
Business Service               1
Cafeteria                      1
Gift Shop                      1
Burger Joint                   1
Beer Store                     1
Baseball Field                 1
College Stadium                1
Construction & Landscaping     1
Discount Store                 1
Drugstore                      1
Bar                            1
Furniture / Home Store         1
Garden                         1
Sushi Restaurant               1
Golf Course                    1
Greek Restaurant               1
Skating Rink                   1
Ramen Restaurant     

For cluster 4, we can see that the "Coffee Shop" is the 1st most common venue