In [62]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup 

In [2]:
URL = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
r = requests.get(URL)

#### Step1: 
    Use BeautifulSoup to work on webpage

In [3]:
soup = BeautifulSoup(r.content, 'html5lib') 
#print(soup.prettify())

#### Step2: 
    Extract only the table that is needed

In [4]:
table = soup.find('table', attrs = {'class':'wikitable sortable'}) 
#print(table.prettify())  


#### Step3: 
    Fetch the header row to create the dataframe

In [5]:
cols=[]
for row in table.findAll('tr'):
    for headers in row.findAll('th'):
        cols.append(headers.text.rstrip("\n\r"))
        
cols

['Postcode', 'Borough', 'Neighbourhood']

#### Step4: 
    Create a dataframe with the data from table

In [6]:
df = pd.DataFrame(columns=cols)
row_data = []

for row in table.findAll('tr'):
    for data in row.findAll('td'):
        row_data.append(data.text.rstrip("\n\r"))
    if len(row_data)==3:
        df=df.append(pd.DataFrame([row_data], columns=cols), ignore_index = True)
    row_data = []

df.head(11)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


In [7]:
df.shape

(288, 3)

#### Step5: 
    Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.

In [8]:
df = df[df.Borough != 'Not assigned']
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [9]:
df.shape

(211, 3)

#### Step6:
 More than one neighborhood can exist in one postal code area.  For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park.   These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in the above table.

In [10]:
df1=df.groupby(['Postcode'], as_index=False, sort=False)['Borough'].first()
df2=df.groupby(['Postcode'], as_index=False, sort=False)['Neighbourhood'].agg(lambda x: ', '.join(x.values))
df=pd.merge(df1, df2, on=["Postcode"], how="inner")
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Not assigned


In [11]:
df.shape

(103, 3)

#### Step7: 
    If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough

In [12]:
df['Neighbourhood'] = df.apply(lambda x: x['Borough'] if x['Neighbourhood'] == 'Not assigned' else x['Neighbourhood'], axis=1)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


In [13]:
df.shape

(103, 3)

#### Step8: 
    Import and get coordinates from geocoder

In [16]:
!conda install -c conda-forge geocoder --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ratelim-0.1.6              |             py_2           6 KB  conda-forge
    certifi-2019.9.11          |           py36_0         147 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    geocoder-1.38.1            |             py_1          53 KB  conda-forge
    ca-certificates-2019.9.11  |       hecc5488_0         144 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geocoder:        1.38.1-py_1       conda-forge
    ratelim:         0.1.6-py_2        conda-forge

The following packages will be UPDATED:

    

In [17]:
import geocoder 

In [18]:
# Class definition to generate latitude and logitude for a given postal_code
def geo_loc(postal_code):
        # initialize your variable to None
    lat_lng_coords = None
    i=0
         # loop until you get the coordinates
    print(postal_code)
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
        print("Queried Geocode :", i)
        i += 1
    return(lat_lng_coords)

In [19]:
df['Latitude'] = df['Postcode'].apply(lambda x: (geo_loc(x))[0])
df['Longitude'] = df['Postcode'].apply(lambda x: (geo_loc(x))[1])

M3A
Queried Geocode : 0
Queried Geocode : 1
Queried Geocode : 2
Queried Geocode : 3
Queried Geocode : 4
Queried Geocode : 5
Queried Geocode : 6
Queried Geocode : 7
Queried Geocode : 8
Queried Geocode : 9
Queried Geocode : 10
Queried Geocode : 11
Queried Geocode : 12
Queried Geocode : 13
Queried Geocode : 14
Queried Geocode : 15
Queried Geocode : 16
Queried Geocode : 17
Queried Geocode : 18
Queried Geocode : 19
Queried Geocode : 20
Queried Geocode : 21
Queried Geocode : 22
Queried Geocode : 23
Queried Geocode : 24
Queried Geocode : 25
Queried Geocode : 26
Queried Geocode : 27
Queried Geocode : 28
Queried Geocode : 29
Queried Geocode : 30
Queried Geocode : 31
Queried Geocode : 32
Queried Geocode : 33
Queried Geocode : 34
Queried Geocode : 35
Queried Geocode : 36
Queried Geocode : 37
Queried Geocode : 38
Queried Geocode : 39
Queried Geocode : 40
Queried Geocode : 41
Queried Geocode : 42
Queried Geocode : 43
Queried Geocode : 44
Queried Geocode : 45
Queried Geocode : 46
Queried Geocode : 4

KeyboardInterrupt: 

#### Step9:
    Get the coordinates from csv as the geocoder package is not responding

In [20]:
coor_df = pd.read_csv("http://cocl.us/Geospatial_data")

In [21]:
coor_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [22]:
coor_df.columns = ['Postcode', 'Latitude', 'Longitude']

In [23]:
df = pd.merge(df, coor_df, on=["Postcode"], how="inner")

In [24]:
df.head(11)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


#### Step 10:  Use geopy library to get the latitude and longitude values of Toronto.

In [30]:
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    folium-0.5.0               |             py_0          45 KB  conda-forge
    altair-3.2.0               |           py36_0         770 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         868 KB

The following NEW packages will be INSTALLED:

    altair:  3.2.0-py36_0 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge


Downloading and Extracting Packages
folium-0.5.0         | 45 KB    

In [33]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library

In [31]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [35]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Step 11: Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.

In [81]:
CLIENT_ID = 'DCOTPELHV4P3U30CYZBHPN5QZL32VFBSKYXXBA0QC1U3A33H' 
CLIENT_SECRET = 'QJ5SGMJGUVGSHEQO0ZDUAEBKBVTDGZ3ZWYJVRGJRT1OTLAHE' 
VERSION = '20180605' # Foursquare API version


In [82]:
neighborhood_latitude = df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


In [83]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=2):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [84]:
toronto_venues = getNearbyVenues(names=df['Neighbourhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )
                         


Parkwoods
Victoria Village
Harbourfront, Regent Park
Lawrence Heights, Lawrence Manor
Queen's Park
Islington Avenue
Rouge, Malvern
Don Mills North
Woodbine Gardens, Parkview Hill
Ryerson, Garden District
Glencairn
Cloverdale, Islington, Martin Grove, Princess Gardens, West Deane Park
Highland Creek, Rouge Hill, Port Union
Flemingdon Park, Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Downsview North, Wilson Heights
Thorncliffe Park
Adelaide, King, Richmond
Dovercourt Village, Dufferin
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto
Harbourfront East, Toronto Islands, Union Station
Little Portugal, Trinity
East Birchmount Park, Ionview, Kennedy Park
Bayview Village
CFB Toronto, Downsview East
The D

In [85]:
toronto_venues.head()

Unnamed: 0,Neighbourhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
4,"Harbourfront, Regent Park",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery


In [86]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 93 uniques categories.


### Step12: Analyze each Neighbourhood

In [87]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Airport,Airport Lounge,American Restaurant,Arts & Crafts Store,Bakery,Bank,Bar,Baseball Field,Basketball Court,...,Steakhouse,Summer Camp,Supermarket,Sushi Restaurant,Theme Restaurant,Toy / Game Store,Trail,Warehouse Store,Wings Joint,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Harbourfront, Regent Park",0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [88]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighbourhood,Airport,Airport Lounge,American Restaurant,Arts & Crafts Store,Bakery,Bank,Bar,Baseball Field,Basketball Court,...,Steakhouse,Summer Camp,Supermarket,Sushi Restaurant,Theme Restaurant,Toy / Game Store,Trail,Warehouse Store,Wings Joint,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues

In [89]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [90]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Adelaide, King, Richmond",Concert Hall,Steakhouse,Yoga Studio,Drugstore,Curling Ice
1,Agincourt,Sandwich Place,Breakfast Spot,Yoga Studio,Curling Ice,Dance Studio
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Playground,Yoga Studio,Cosmetics Shop,Creperie
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Pharmacy,Sandwich Place,Construction & Landscaping,Cosmetics Shop,Creperie
4,"Alderwood, Long Branch",Gym,Pizza Place,Yoga Studio,Dog Run,Creperie


### Step 13: Lets cluster Neighbourhoods

Run *k*-means to cluster the neighborhood into 5 clusters

In [91]:
from sklearn.cluster import KMeans

In [92]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 2, 0, 3, 0, 0, 0, 0, 0], dtype=int32)

In [None]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [106]:
toronto_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head(10) # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,4.0,Food & Drink Shop,Park,Yoga Studio,Dog Run,Creperie
1,M4A,North York,Victoria Village,43.725882,-79.315572,1.0,Coffee Shop,Hockey Arena,Yoga Studio,Drugstore,Curling Ice
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,1.0,Coffee Shop,Bakery,Yoga Studio,Drugstore,Curling Ice
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763,0.0,Furniture / Home Store,Boutique,Yoga Studio,Drugstore,Curling Ice
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494,4.0,Italian Restaurant,Park,Yoga Studio,Creperie,Curling Ice
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242,,,,,,
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,0.0,Print Shop,Fast Food Restaurant,Yoga Studio,Dog Run,Creperie
7,M3B,North York,Don Mills North,43.745906,-79.352188,0.0,Gym / Fitness Center,Caribbean Restaurant,Yoga Studio,Drugstore,Curling Ice
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937,0.0,Gym / Fitness Center,Gastropub,Yoga Studio,Drugstore,Curling Ice
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,3.0,Pizza Place,Clothing Store,Yoga Studio,Cosmetics Shop,Curling Ice


In [111]:
toronto_merged.dropna(axis=0, inplace=True)

In [112]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [114]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Step 14: Examine clusters

#### Cluster1

In [116]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]].head()

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
3,North York,0.0,Furniture / Home Store,Boutique,Yoga Studio,Drugstore,Curling Ice
6,Scarborough,0.0,Print Shop,Fast Food Restaurant,Yoga Studio,Dog Run,Creperie
7,North York,0.0,Gym / Fitness Center,Caribbean Restaurant,Yoga Studio,Drugstore,Curling Ice
8,East York,0.0,Gym / Fitness Center,Gastropub,Yoga Studio,Drugstore,Curling Ice
10,North York,0.0,Pub,Japanese Restaurant,Yoga Studio,Convenience Store,Creperie


#### Cluster2

In [117]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]].head()

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,North York,1.0,Coffee Shop,Hockey Arena,Yoga Studio,Drugstore,Curling Ice
2,Downtown Toronto,1.0,Coffee Shop,Bakery,Yoga Studio,Drugstore,Curling Ice
17,Etobicoke,1.0,Coffee Shop,Liquor Store,Yoga Studio,Cosmetics Shop,Curling Ice
22,Scarborough,1.0,Coffee Shop,Yoga Studio,Drugstore,Curling Ice,Dance Studio
24,Downtown Toronto,1.0,Coffee Shop,Yoga Studio,Drugstore,Curling Ice,Dance Studio


#### Cluster3

In [118]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]].head()

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
16,York,2.0,Playground,Field,Yoga Studio,Dog Run,Creperie
32,Scarborough,2.0,Playground,Yoga Studio,Convenience Store,Creperie,Curling Ice
85,Scarborough,2.0,Park,Playground,Yoga Studio,Cosmetics Shop,Creperie
91,Downtown Toronto,2.0,Park,Playground,Yoga Studio,Cosmetics Shop,Creperie


#### Cluster4

In [119]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]].head()

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
9,Downtown Toronto,3.0,Pizza Place,Clothing Store,Yoga Studio,Cosmetics Shop,Curling Ice
18,Scarborough,3.0,Pizza Place,Electronics Store,Yoga Studio,Dog Run,Creperie
37,West Toronto,3.0,Brewery,Pizza Place,Cosmetics Shop,Curling Ice,Dance Studio
50,North York,3.0,Pizza Place,Caribbean Restaurant,Yoga Studio,Cosmetics Shop,Curling Ice
72,North York,3.0,Pharmacy,Pizza Place,Construction & Landscaping,Cosmetics Shop,Creperie


#### Cluster5

In [120]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]].head()

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,North York,4.0,Food & Drink Shop,Park,Yoga Studio,Dog Run,Creperie
4,Queen's Park,4.0,Italian Restaurant,Park,Yoga Studio,Creperie,Curling Ice
21,York,4.0,Fast Food Restaurant,Park,Yoga Studio,Dog Run,Creperie
35,East York,4.0,Park,Yoga Studio,Drugstore,Creperie,Curling Ice
36,Downtown Toronto,4.0,Neighborhood,Park,Dog Run,Cosmetics Shop,Creperie
