In [1]:
# importing the useful libraries for this project. 

import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

### Extracting the data from wikipedia page 
* Extract the HTML of the webpage using requests. 
* Parse HTML using BeautifulSoup.


In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
website_url = requests.get(url).text     # This returns the HTML of website
soup = BeautifulSoup(website_url, 'lxml')
print(soup.prettify())                   # To check how tags are nested. 

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"XrOB1ApAAEIAAFRLP3wAAAEX","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":955308368,"wgRevisionId":955308368,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Communications in Ontario","Postal codes in Canada","Toronto","Ontario

#### The useful data is of the class type "wikitable sorttable". Therefore, we find that data. 

In [3]:
my_table = soup.find('table',{'class':'wikitable sortable'})
table_rows = my_table.findAll('tr')
table_rows

[<tr>
 <th>Postal Code
 </th>
 <th>Borough
 </th>
 <th>Neighborhood
 </th></tr>,
 <tr>
 <td>M1A
 </td>
 <td>Not assigned
 </td>
 <td>
 </td></tr>,
 <tr>
 <td>M2A
 </td>
 <td>Not assigned
 </td>
 <td>
 </td></tr>,
 <tr>
 <td>M3A
 </td>
 <td>North York
 </td>
 <td>Parkwoods
 </td></tr>,
 <tr>
 <td>M4A
 </td>
 <td>North York
 </td>
 <td>Victoria Village
 </td></tr>,
 <tr>
 <td>M5A
 </td>
 <td>Downtown Toronto
 </td>
 <td>Regent Park / Harbourfront
 </td></tr>,
 <tr>
 <td>M6A
 </td>
 <td>North York
 </td>
 <td>Lawrence Manor / Lawrence Heights
 </td></tr>,
 <tr>
 <td>M7A
 </td>
 <td>Downtown Toronto
 </td>
 <td>Queen's Park / Ontario Provincial Government
 </td></tr>,
 <tr>
 <td>M8A
 </td>
 <td>Not assigned
 </td>
 <td>
 </td></tr>,
 <tr>
 <td>M9A
 </td>
 <td>Etobicoke
 </td>
 <td>Islington Avenue
 </td></tr>,
 <tr>
 <td>M1B
 </td>
 <td>Scarborough
 </td>
 <td>Malvern / Rouge
 </td></tr>,
 <tr>
 <td>M2B
 </td>
 <td>Not assigned
 </td>
 <td>
 </td></tr>,
 <tr>
 <td>M3B
 </td>
 <td>North Yor

In [30]:
# This part of code takes table_rows as input and output the data in dataframe. 
data = []
for row in table_rows:
    data.append([t.text.strip() for t in row.find_all('td')])

df = pd.DataFrame(data, columns=['PostalCode', 'Borough', 'Neighbourhood'])
df = df[~df['PostalCode'].isnull()]  # to filter out bad rows
df.head(5)

Unnamed: 0,PostalCode,Borough,Neighbourhood
1,M1A,Not assigned,
2,M2A,Not assigned,
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Regent Park / Harbourfront


In [33]:
df = df[~(df['Borough'] == 'Not assigned')]   # To drop the rows with (Borough = Not assigned). 
df.reset_index(inplace = True)
df = df.drop(columns = 'index')
df.rename(columns = {'PostalCode': 'Postal Code'}, inplace = True)
df.head(5)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [34]:
df.shape

(103, 3)

### Latitude and longitude for the postal code corresponding to neighbourhood

In [15]:
# import geocoder
# print('Library imported')
# #initialize your variable to None
# def get_geocoder(postal_code_from_df):
#     lat_lng_coords = None
#     # loop until you get the coordinates
#     while(lat_lng_coords is None):
#         g = geocoder.google('{}, Toronto, Ontario'.format(postal_code_from_df))
#         lat_lng_coords = g.latlng  
#     latitude = lat_lng_coords[0]
#     longitude = lat_lng_coords[1]
#     return(latitude, longitude)

# for i in df.index:
#     df['Latitude'][i], df['Longitude'][i] = get_geocoder(df['PostalCode'][i])


In [35]:
df_locations = pd.read_csv('Geospatial_Coordinates.csv')  #Loading the csv file the geocode locations. 
df_locations.head(5)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [37]:
# Merging two dataframe on a common column
df_merge = pd.merge(df, df_locations, on = 'Postal Code', how = 'outer' ) 
df_merge.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494


In [45]:
import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [120]:
df_cluster = df_merge[['Borough','Latitude', 'Longitude']]
df_cluster = df_cluster[(df_cluster['Borough'].str.contains('Toronto'))]   #borough with Toronto mentioned in name
df_cluster.head()

Unnamed: 0,Borough,Latitude,Longitude
2,Downtown Toronto,43.65426,-79.360636
4,Downtown Toronto,43.662301,-79.389494
9,Downtown Toronto,43.657162,-79.378937
15,Downtown Toronto,43.651494,-79.375418
19,East Toronto,43.676357,-79.293031


In [121]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [122]:
# Create map of Toronto using latitude and longitude
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough in zip(df_cluster['Latitude'], df_cluster['Longitude'], df_cluster['Borough']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [123]:
CLIENT_ID = '3MTNLOUO4MKYHQP5YGBM353DBBFGI4X5O1FWNUELWYVOXI0I' #  Foursquare ID
CLIENT_SECRET = 'WIDMUIGUAUCSEUWRNNRLEKNEETSRSHHDBLVTBNWL41MFPYRI' #  Foursquare Secret
VERSION = '20200505' # Foursquare API version

print('My credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

My credentails:
CLIENT_ID: 3MTNLOUO4MKYHQP5YGBM353DBBFGI4X5O1FWNUELWYVOXI0I
CLIENT_SECRET:WIDMUIGUAUCSEUWRNNRLEKNEETSRSHHDBLVTBNWL41MFPYRI


### Explore the nearby venues

In [124]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Borough Latitude', 
                  'Borough Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [125]:
borough_venues = getNearbyVenues(names=df_cluster['Borough'], latitudes=df_cluster['Latitude'], 
                                 longitudes=df_cluster['Longitude'])

Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
East Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
West Toronto
Downtown Toronto
West Toronto
East Toronto
Downtown Toronto
West Toronto
East Toronto
Downtown Toronto
East Toronto
Central Toronto
Central Toronto
Central Toronto
Central Toronto
West Toronto
Central Toronto
Central Toronto
West Toronto
Central Toronto
Downtown Toronto
West Toronto
Central Toronto
Downtown Toronto
Central Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
East Toronto


In [126]:
borough_venues.head(5)

Unnamed: 0,Borough,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Downtown Toronto,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Downtown Toronto,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Downtown Toronto,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
3,Downtown Toronto,43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
4,Downtown Toronto,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [128]:
# Count no of venues for each borough
borough_venues.groupby('Borough').count()

Unnamed: 0_level_0,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Central Toronto,115,115,115,115,115,115
Downtown Toronto,1225,1225,1225,1225,1225,1225
East Toronto,124,124,124,124,124,124
West Toronto,159,159,159,159,159,159


In [132]:
# one hot encoding
borough_onehot = pd.get_dummies(borough_venues[['Venue Category']], prefix="", prefix_sep="")

# add borough column back to dataframe
borough_onehot['Borough'] = borough_venues['Borough'] 

# move borough column to the first column
fixed_columns = [borough_onehot.columns[-1]] + list(borough_onehot.columns[:-1])
borough_onehot = borough_onehot[fixed_columns]

borough_onehot.head()

Unnamed: 0,Borough,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,Downtown Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Downtown Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Downtown Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Downtown Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Downtown Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [133]:
borough_grouped = borough_onehot.groupby('Borough').mean().reset_index()
borough_grouped.head(5)

Unnamed: 0,Borough,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,Central Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017391,0.0,...,0.0,0.008696,0.008696,0.0,0.008696,0.0,0.008696,0.0,0.0,0.008696
1,Downtown Toronto,0.000816,0.000816,0.000816,0.000816,0.001633,0.001633,0.001633,0.014694,0.001633,...,0.000816,0.0,0.000816,0.002449,0.010612,0.001633,0.003265,0.004898,0.000816,0.005714
2,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.024194,0.0,...,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.008065,0.0,0.024194
3,West Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.018868,0.0,0.012579,0.006289,0.0,0.018868


In [134]:
num_top_venues = 5

for hood in borough_grouped['Borough']:
    print("----"+hood+"----")
    temp = borough_grouped[borough_grouped['Borough'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central Toronto----
              venue  freq
0       Coffee Shop  0.07
1    Sandwich Place  0.06
2              Park  0.05
3              Café  0.05
4  Sushi Restaurant  0.03


----Downtown Toronto----
                 venue  freq
0          Coffee Shop  0.10
1                 Café  0.06
2           Restaurant  0.04
3                Hotel  0.03
4  Japanese Restaurant  0.03


----East Toronto----
                venue  freq
0    Greek Restaurant  0.07
1         Coffee Shop  0.06
2                Park  0.04
3                Café  0.04
4  Italian Restaurant  0.04


----West Toronto----
                venue  freq
0                Café  0.07
1                 Bar  0.06
2          Restaurant  0.04
3  Italian Restaurant  0.04
4         Coffee Shop  0.04




In [135]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [136]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
borough_venues_sorted = pd.DataFrame(columns=columns)
borough_venues_sorted['Borough'] = borough_grouped['Borough']

for ind in np.arange(borough_grouped.shape[0]):
    borough_venues_sorted.iloc[ind, 1:] = return_most_common_venues(borough_grouped.iloc[ind, :], num_top_venues)

borough_venues_sorted.head()

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Coffee Shop,Sandwich Place,Park,Café,Restaurant,Pizza Place,Dessert Shop,Sushi Restaurant,Gym,Pub
1,Downtown Toronto,Coffee Shop,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
2,East Toronto,Greek Restaurant,Coffee Shop,Brewery,Café,Italian Restaurant,Park,Ice Cream Shop,Restaurant,American Restaurant,Bakery
3,West Toronto,Café,Bar,Coffee Shop,Restaurant,Italian Restaurant,Bakery,Bookstore,Pizza Place,Gym,Gift Shop


### Cluster neighbourhood

In [138]:
# set number of clusters
kclusters = 4

borough_grouped_clustering = borough_grouped.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(borough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 3, 1, 2], dtype=int32)

In [139]:
# add clustering labels
borough_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

borough_merged = df_cluster

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
borough_merged = borough_merged.join(borough_venues_sorted.set_index('Borough'), on='Borough')

borough_merged.head() # check the last columns!

Unnamed: 0,Borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,43.65426,-79.360636,3,Coffee Shop,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
4,Downtown Toronto,43.662301,-79.389494,3,Coffee Shop,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
9,Downtown Toronto,43.657162,-79.378937,3,Coffee Shop,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
15,Downtown Toronto,43.651494,-79.375418,3,Coffee Shop,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
19,East Toronto,43.676357,-79.293031,1,Greek Restaurant,Coffee Shop,Brewery,Café,Italian Restaurant,Park,Ice Cream Shop,Restaurant,American Restaurant,Bakery


In [140]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(borough_merged['Latitude'], borough_merged['Longitude'], borough_merged['Borough'], borough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine cluster = 0

In [141]:
borough_merged.loc[borough_merged['Cluster Labels'] == 0, borough_merged.columns[[1] + list(range(5, borough_merged.shape[1]))]]

Unnamed: 0,Latitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
61,43.72802,Sandwich Place,Park,Café,Restaurant,Pizza Place,Dessert Shop,Sushi Restaurant,Gym,Pub
62,43.711695,Sandwich Place,Park,Café,Restaurant,Pizza Place,Dessert Shop,Sushi Restaurant,Gym,Pub
67,43.712751,Sandwich Place,Park,Café,Restaurant,Pizza Place,Dessert Shop,Sushi Restaurant,Gym,Pub
68,43.696948,Sandwich Place,Park,Café,Restaurant,Pizza Place,Dessert Shop,Sushi Restaurant,Gym,Pub
73,43.715383,Sandwich Place,Park,Café,Restaurant,Pizza Place,Dessert Shop,Sushi Restaurant,Gym,Pub
74,43.67271,Sandwich Place,Park,Café,Restaurant,Pizza Place,Dessert Shop,Sushi Restaurant,Gym,Pub
79,43.704324,Sandwich Place,Park,Café,Restaurant,Pizza Place,Dessert Shop,Sushi Restaurant,Gym,Pub
83,43.689574,Sandwich Place,Park,Café,Restaurant,Pizza Place,Dessert Shop,Sushi Restaurant,Gym,Pub
86,43.686412,Sandwich Place,Park,Café,Restaurant,Pizza Place,Dessert Shop,Sushi Restaurant,Gym,Pub


### Examine cluster = 1

In [142]:
borough_merged.loc[borough_merged['Cluster Labels'] == 1, borough_merged.columns[[1] + list(range(5, borough_merged.shape[1]))]]

Unnamed: 0,Latitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,43.676357,Coffee Shop,Brewery,Café,Italian Restaurant,Park,Ice Cream Shop,Restaurant,American Restaurant,Bakery
41,43.679557,Coffee Shop,Brewery,Café,Italian Restaurant,Park,Ice Cream Shop,Restaurant,American Restaurant,Bakery
47,43.668999,Coffee Shop,Brewery,Café,Italian Restaurant,Park,Ice Cream Shop,Restaurant,American Restaurant,Bakery
54,43.659526,Coffee Shop,Brewery,Café,Italian Restaurant,Park,Ice Cream Shop,Restaurant,American Restaurant,Bakery
100,43.662744,Coffee Shop,Brewery,Café,Italian Restaurant,Park,Ice Cream Shop,Restaurant,American Restaurant,Bakery


### Examine cluster = 2

In [143]:
borough_merged.loc[borough_merged['Cluster Labels'] == 2, borough_merged.columns[[1] + list(range(5, borough_merged.shape[1]))]]

Unnamed: 0,Latitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
31,43.669005,Bar,Coffee Shop,Restaurant,Italian Restaurant,Bakery,Bookstore,Pizza Place,Gym,Gift Shop
37,43.647927,Bar,Coffee Shop,Restaurant,Italian Restaurant,Bakery,Bookstore,Pizza Place,Gym,Gift Shop
43,43.636847,Bar,Coffee Shop,Restaurant,Italian Restaurant,Bakery,Bookstore,Pizza Place,Gym,Gift Shop
69,43.661608,Bar,Coffee Shop,Restaurant,Italian Restaurant,Bakery,Bookstore,Pizza Place,Gym,Gift Shop
75,43.64896,Bar,Coffee Shop,Restaurant,Italian Restaurant,Bakery,Bookstore,Pizza Place,Gym,Gift Shop
81,43.651571,Bar,Coffee Shop,Restaurant,Italian Restaurant,Bakery,Bookstore,Pizza Place,Gym,Gift Shop


### Examine cluster = 3

In [144]:
borough_merged.loc[borough_merged['Cluster Labels'] == 3, borough_merged.columns[[1] + list(range(5, borough_merged.shape[1]))]]

Unnamed: 0,Latitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,43.65426,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
4,43.662301,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
9,43.657162,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
15,43.651494,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
20,43.644771,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
24,43.657952,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
25,43.669542,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
30,43.650571,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
36,43.640816,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
42,43.647177,Café,Restaurant,Japanese Restaurant,Hotel,Italian Restaurant,Bakery,Park,Gym,Seafood Restaurant
