In [1]:
# Import libraries
import requests
from bs4 import BeautifulSoup
import urllib.request as ur
import time
import lxml.html as lh
header = {'User-Agent': 'Mozilla/5.0'}

In [2]:
#read and open URL to scrape
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(url) #Connect to the URL
response #200 means it went through

<Response [200]>

In [3]:
#Parse HTML and save to BeautifulSoup object
soup = BeautifulSoup(response.text, "html.parser")
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className=document.documentElement.className.replace(/(^|\s)client-nojs(\s|$)/,"$1client-js$2");RLCONF={"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":906439794,"wgRevisionId":906439794,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["CS1 errors: deprecated parameters","Communications in Ontario","Postal codes in Canada","Toronto","Ontario-related lists"],"wgBreakFrames":!1,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","Feb

In [4]:
tables = soup.findAll('table',{'class':'wikitable sortable'})
tables

[<table class="wikitable sortable">
 <tbody><tr>
 <th>Postcode</th>
 <th>Borough</th>
 <th>Neighbourhood
 </th></tr>
 <tr>
 <td>M1A</td>
 <td>Not assigned</td>
 <td>Not assigned
 </td></tr>
 <tr>
 <td>M2A</td>
 <td>Not assigned</td>
 <td>Not assigned
 </td></tr>
 <tr>
 <td>M3A</td>
 <td><a href="/wiki/North_York" title="North York">North York</a></td>
 <td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
 </td></tr>
 <tr>
 <td>M4A</td>
 <td><a href="/wiki/North_York" title="North York">North York</a></td>
 <td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
 </td></tr>
 <tr>
 <td>M5A</td>
 <td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
 <td><a href="/wiki/Harbourfront_(Toronto)" title="Harbourfront (Toronto)">Harbourfront</a>
 </td></tr>
 <tr>
 <td>M5A</td>
 <td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
 <td><a href="/wiki/Regent_Park" title="Regent Park">Regent Pa

In [5]:
#How many tables are in the HTML text
for table in tables: 
    print("#########")
    print(table.text[:100])

#########


Postcode
Borough
Neighbourhood


M1A
Not assigned
Not assigned


M2A
Not assigned
Not assigned


M


In [6]:
for tn in range(len(tables)):
    table = tables[tn]

In [7]:
#Parse tr/td text and compile into rows
rows = table.findAll("tr")
row_lengths = [len(r.findAll(['th', 'td'])) for r in rows]
ncols = max(row_lengths)
nrows = len(rows)
data = []
for i in range(nrows):
    rowD = []
    for j in range(ncols):
        rowD.append('')
    data.append(rowD)

In [8]:
for i in range(len(rows)):
        row = rows[i]
        rowD = []
        cells = row.findAll(["td", "th"])
        for j in range(len(cells)):
            cell = cells[j]
        #lots of cells span cols and rows so lets deal with that
            cspan = int(cell.get('colspan', 1))
            rspan = int(cell.get('rowspan', 1))
            l = 0
            for k in range(rspan):
                # Shifts to the first empty cell of this row
                while data[i + k][j + l]:
                    l += 1
                for m in range(cspan):
                    data[i + k][j + l + m] += cell.text
                    
data.append(rowD)

In [9]:
import os
import codecs

In [10]:
 # write data out to tab seperated format
page = os.path.split(url)[1]
fname = 'output_{}_t{}.tsv'.format(page, tn)
f = codecs.open(fname, 'w')
for i in range(nrows):
    rowStr = '\t'.join(data[i])
    rowStr = rowStr.replace('\n', '')
    print(rowStr)
    f.write(rowStr + '\n')

Postcode	Borough	Neighbourhood
M1A	Not assigned	Not assigned
M2A	Not assigned	Not assigned
M3A	North York	Parkwoods
M4A	North York	Victoria Village
M5A	Downtown Toronto	Harbourfront
M5A	Downtown Toronto	Regent Park
M6A	North York	Lawrence Heights
M6A	North York	Lawrence Manor
M7A	Queen's Park	Not assigned
M8A	Not assigned	Not assigned
M9A	Etobicoke	Islington Avenue
M1B	Scarborough	Rouge
M1B	Scarborough	Malvern
M2B	Not assigned	Not assigned
M3B	North York	Don Mills North
M4B	East York	Woodbine Gardens
M4B	East York	Parkview Hill
M5B	Downtown Toronto	Ryerson
M5B	Downtown Toronto	Garden District
M6B	North York	Glencairn
M7B	Not assigned	Not assigned
M8B	Not assigned	Not assigned
M9B	Etobicoke	Cloverdale
M9B	Etobicoke	Islington
M9B	Etobicoke	Martin Grove
M9B	Etobicoke	Princess Gardens
M9B	Etobicoke	West Deane Park
M1C	Scarborough	Highland Creek
M1C	Scarborough	Rouge Hill
M1C	Scarborough	Port Union
M2C	Not assigned	Not assigned
M3C	North York	Flemingdon Park
M3C	North York	Don Mills South
M

In [11]:
import pandas as pd

df = pd.read_csv(fname, sep='\t', header=0)

df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [12]:
import numpy as np

In [13]:
# Drop rows with not assigned Borough
df2 = df[~df['Borough'].str.contains('Not assigned')]
df2

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [14]:
# Replace unassigned Neighbourhoods with Borough names
for i in range(df2.shape[0]):
    if df2.iloc[i,:]['Neighbourhood']=='Not assigned':
        df2.iloc[i,:]['Neighbourhood'] = df2.iloc[i,:]['Borough']
df2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [15]:
df2.shape

(211, 3)

In [16]:
df3_grouped = df2.groupby(['Postcode','Borough'], as_index=False).agg(lambda x: ','.join(x))

df3_grouped.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [17]:
df3_grouped.shape

(103, 3)

In [18]:
path = 'C:\\Users\\LeaMPalmer\\Documents\\Notes\\Python and Data Science\\CSV Files\\Geospatial_Coordinates.csv'
df_coord = pd.read_csv(path, header='infer')
df_coord.rename(columns={'Postal Code':'Postcode'},inplace='True')
df_coord.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [19]:
df4_lat_lng = df3_grouped.merge(df_coord, on='Postcode', how='inner')
df4_lat_lng.head(30)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


In [20]:
print('The dataframe has {} boroughs and {} neighbourhoods.'.format(
        len(df4_lat_lng['Borough'].unique()),
        df4_lat_lng.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighbourhoods.


In [21]:
df4_lat_lng.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [22]:
!conda install -c conda-forge folium=0.5.0 --yes

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [23]:
pip install geopy

Note: you may need to restart the kernel to use updated packages.


In [24]:
from geopy.geocoders import Nominatim
import folium

In [25]:
# Grab the coordinates of Toronto
address="Toronto, Canada"

geolocator = Nominatim(user_agent="exploring_toronto_map")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geographical coordinates of Toronto are 43.653963, -79.387207.


In [26]:
toronto_map=folium.Map(location=[latitude,longitude],zoom_start=10)

for lat, lng, label in zip(df4_lat_lng['Latitude'], df4_lat_lng['Longitude'], df4_lat_lng['Borough']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=10,
        popup=label,
        color='coral',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.8,
        parse_html=False).add_to(toronto_map)
    
toronto_map

In [27]:
scarborough_data = df4_lat_lng[df4_lat_lng['Borough'] == 'Scarborough'].reset_index(drop=True)
scarborough_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [28]:
address = 'Scarborough, Canada'

geolocator = Nominatim(user_agent="exploring_scarborough")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinates of Scarborough are {}, {}.'.format(latitude, longitude))

The geographical coordinates of Scarborough are 43.773077, -79.257774.


In [29]:
scarborough_map=folium.Map(location=[latitude,longitude],zoom_start=11)

for lat, lng, label in zip(scarborough_data['Latitude'], scarborough_data['Longitude'], scarborough_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(scarborough_map)
    
scarborough_map

In [30]:
CLIENT_ID = 'IMHULEGENXQIE2V42EYXQ5GOO0IFTFMKBDZQDMFI0I3DLFFR'
CLIENT_SECRET = 'ODZRXGFQCB22JOCVJLYUVSAANRGSRCF3G40PHFJ5RSXVZTSD'
VERSION = '20180605'


print('Your Credentials')
print('Client_ID: '+ CLIENT_ID)
print('Client Secret: ' + CLIENT_SECRET)

Your Credentials
Client_ID: IMHULEGENXQIE2V42EYXQ5GOO0IFTFMKBDZQDMFI0I3DLFFR
Client Secret: ODZRXGFQCB22JOCVJLYUVSAANRGSRCF3G40PHFJ5RSXVZTSD


In [31]:
scarborough_data.loc[0, 'Neighbourhood']

'Rouge,Malvern'

In [32]:
neighbourhood_latitude = scarborough_data.loc[0, 'Latitude'] # neighborhood latitude value
neighbourhood_longitude = scarborough_data.loc[0, 'Longitude'] # neighborhood longitude value
neighbourhood_name = scarborough_data.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of Rouge,Malvern are 43.806686299999996, -79.19435340000001.


In [33]:
search_query= 'Rouge,Malvern'
radius=50000
LIMIT = 100

url='https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, neighbourhood_latitude, neighbourhood_longitude, VERSION, search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=IMHULEGENXQIE2V42EYXQ5GOO0IFTFMKBDZQDMFI0I3DLFFR&client_secret=ODZRXGFQCB22JOCVJLYUVSAANRGSRCF3G40PHFJ5RSXVZTSD&ll=43.806686299999996,-79.19435340000001&v=20180605&query=Rouge,Malvern&radius=50000&limit=100'

In [34]:
results = requests.get(url).json()
venues=results['response']['venues']
venues

[{'id': '4d5475dbcc65a1431140425e',
  'name': 'Malvern Rouge Valley Youth Services',
  'location': {'address': '1270 Morningside Ave, Unit 5',
   'lat': 43.797791,
   'lng': -79.198267,
   'labeledLatLngs': [{'label': 'display',
     'lat': 43.797791,
     'lng': -79.198267}],
   'distance': 1038,
   'cc': 'CA',
   'country': 'Canada',
   'formattedAddress': ['1270 Morningside Ave, Unit 5', 'Canada']},
  'categories': [{'id': '52e81612bcbc57f1066b7a34',
    'name': 'Community Center',
    'pluralName': 'Community Centers',
    'shortName': 'Community Center',
    'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/default_',
     'suffix': '.png'},
    'primary': True}],
  'referralId': 'v-1568210027',
  'hasPerk': False},
 {'id': '4b828d15f964a520efd730e3',
  'name': 'Rouge Valley Centenary Hospital',
  'location': {'address': '2867 Ellesmere Rd.',
   'crossStreet': 'at Neilson Rd.',
   'lat': 43.78061431275554,
   'lng': -79.20515095619899,
   'labeledLatLngs': [{'lab

In [35]:
# Function that extracts the category of the venue(s) #
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [36]:
import json 
from pandas.io.json import json_normalize

In [37]:
nearby_venues = json_normalize(venues)
nearby_venues.columns

Index(['categories', 'hasPerk', 'id', 'location.address', 'location.cc',
       'location.city', 'location.country', 'location.crossStreet',
       'location.distance', 'location.formattedAddress',
       'location.labeledLatLngs', 'location.lat', 'location.lng',
       'location.neighborhood', 'location.postalCode', 'location.state',
       'name', 'referralId'],
      dtype='object')

In [38]:
# filter columns
filtered_columns = ['name', 'categories', 'location.lat', 'location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Malvern Rouge Valley Youth Services,Community Center,43.797791,-79.198267
1,Rouge Valley Centenary Hospital,Hospital,43.780614,-79.205151
2,Rouge Hill GO Station,Train Station,43.780229,-79.130855
3,Malvern Woods Park,Park,43.802091,-79.203851
4,Toronto Public Library - Malvern Branch,Library,43.808068,-79.216153


In [39]:
#How many venues were returned by Foursquare
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

50 venues were returned by Foursquare.


In [40]:

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [41]:
toronto_venues = getNearbyVenues(names=df4_lat_lng['Neighbourhood'],
                                   latitudes=df4_lat_lng['Latitude'],
                                   longitudes=df4_lat_lng['Longitude']
                                  )

Rouge,Malvern
Highland Creek,Rouge Hill,Port Union
Guildwood,Morningside,West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park,Ionview,Kennedy Park
Clairlea,Golden Mile,Oakridge
Cliffcrest,Cliffside,Scarborough Village West
Birch Cliff,Cliffside West
Dorset Park,Scarborough Town Centre,Wexford Heights
Maryvale,Wexford
Agincourt
Clarks Corners,Sullivan,Tam O'Shanter
Agincourt North,L'Amoreaux East,Milliken,Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview,Henry Farm,Oriole
Bayview Village
Silver Hills,York Mills
Newtonbrook,Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park,Don Mills South
Bathurst Manor,Downsview North,Wilson Heights
Northwood Park,York University
CFB Toronto,Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens,Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West,Riverdale
The Beaches West,Indi

In [42]:
print (toronto_venues.shape)
toronto_venues.head()
toronto_venues.index

(2255, 7)


RangeIndex(start=0, stop=2255, step=1)

In [43]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",100,100,100,100,100,100
Agincourt,5,5,5,5,5,5
"Agincourt North,L'Amoreaux East,Milliken,Steeles East",2,2,2,2,2,2
"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown",9,9,9,9,9,9
"Alderwood,Long Branch",9,9,9,9,9,9
"Bathurst Manor,Downsview North,Wilson Heights",20,20,20,20,20,20
Bayview Village,4,4,4,4,4,4
"Bedford Park,Lawrence Manor East",21,21,21,21,21,21
Berczy Park,57,57,57,57,57,57
"Birch Cliff,Cliffside West",4,4,4,4,4,4


In [44]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 274 uniques categories.


In [45]:
#One Hot Encoding ## This is used to group categorical variables
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

toronto_onehot.drop(columns={'Neighborhood'},axis=1,inplace=True)

toronto_venues['Neighborhood']

0                                           Rouge,Malvern
1                    Highland Creek,Rouge Hill,Port Union
2                         Guildwood,Morningside,West Hill
3                         Guildwood,Morningside,West Hill
4                         Guildwood,Morningside,West Hill
5                         Guildwood,Morningside,West Hill
6                         Guildwood,Morningside,West Hill
7                         Guildwood,Morningside,West Hill
8                         Guildwood,Morningside,West Hill
9                                                  Woburn
10                                                 Woburn
11                                                 Woburn
12                                              Cedarbrae
13                                              Cedarbrae
14                                              Cedarbrae
15                                              Cedarbrae
16                                              Cedarbrae
17            

In [46]:
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood']

In [47]:
toronto_onehot.set_index('Neighborhood',inplace=True)

In [48]:
# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot

Unnamed: 0_level_0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Rouge,Malvern",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"Highland Creek,Rouge Hill,Port Union",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Woburn,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [49]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,"Adelaide,King,Richmond",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.020000,0.000000,0.00,0.000000,0.000000,0.010000,0.0,0.000000,0.0
1,Agincourt,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.0,0.000000,0.0
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.0,0.000000,0.0
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.0,0.000000,0.0
4,"Alderwood,Long Branch",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.0,0.000000,0.0
5,"Bathurst Manor,Downsview North,Wilson Heights",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.05,0.000000,0.000000,0.000000,0.0,0.000000,0.0
6,Bayview Village,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.0,0.000000,0.0
7,"Bedford Park,Lawrence Manor East",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.0,0.000000,0.0
8,Berczy Park,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.017544,0.000000,0.00,0.000000,0.000000,0.000000,0.0,0.000000,0.0
9,"Birch Cliff,Cliffside West",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.0,0.000000,0.0


In [50]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
         venue  freq
0  Coffee Shop  0.08
1         Café  0.05
2          Bar  0.04
3   Steakhouse  0.04
4        Hotel  0.03


----Agincourt----
                venue  freq
0        Skating Rink   0.2
1      Breakfast Spot   0.2
2  Chinese Restaurant   0.2
3      Sandwich Place   0.2
4              Lounge   0.2


----Agincourt North,L'Amoreaux East,Milliken,Steeles East----
                             venue  freq
0                       Playground   0.5
1                             Park   0.5
2                    Metro Station   0.0
3  Molecular Gastronomy Restaurant   0.0
4       Modern European Restaurant   0.0


----Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown----
                 venue  freq
0        Grocery Store  0.22
1             Pharmacy  0.11
2           Beer Store  0.11
3  Fried Chicken Joint  0.11
4       Sandwich Place  0.11


----Alderwood,Long Branch----
            venue  freq
0 

                             venue  freq
0           Furniture / Home Store   0.5
1                   Baseball Field   0.5
2        Middle Eastern Restaurant   0.0
3              Monument / Landmark   0.0
4  Molecular Gastronomy Restaurant   0.0


----Fairview,Henry Farm,Oriole----
                  venue  freq
0        Clothing Store  0.17
1  Fast Food Restaurant  0.08
2           Coffee Shop  0.08
3         Women's Store  0.05
4   Japanese Restaurant  0.03


----First Canadian Place,Underground city----
         venue  freq
0  Coffee Shop  0.09
1         Café  0.08
2        Hotel  0.05
3   Steakhouse  0.04
4   Restaurant  0.04


----Flemingdon Park,Don Mills South----
              venue  freq
0        Beer Store  0.10
1  Asian Restaurant  0.10
2       Coffee Shop  0.10
3               Gym  0.10
4    Discount Store  0.05


----Forest Hill North,Forest Hill West----
              venue  freq
0              Park  0.25
1     Jewelry Store  0.25
2  Sushi Restaurant  0.25
3             Tr

                venue  freq
0         Coffee Shop  0.11
1          Restaurant  0.04
2                Café  0.04
3  Italian Restaurant  0.03
4            Beer Bar  0.03


----Studio District----
                 venue  freq
0                 Café  0.11
1          Coffee Shop  0.08
2   Italian Restaurant  0.05
3               Bakery  0.05
4  American Restaurant  0.05


----The Annex,North Midtown,Yorkville----
            venue  freq
0            Café  0.12
1     Coffee Shop  0.12
2  Sandwich Place  0.12
3     Pizza Place  0.08
4  Cosmetics Shop  0.04


----The Beaches----
                  venue  freq
0     Health Food Store   0.2
1  Other Great Outdoors   0.2
2                 Trail   0.2
3                   Pub   0.2
4        Massage Studio   0.0


----The Beaches West,India Bazaar----
                venue  freq
0                Park  0.09
1      Sandwich Place  0.09
2   Fish & Chips Shop  0.05
3  Italian Restaurant  0.05
4         Pizza Place  0.05


----The Danforth West,Riverdale-

In [51]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [52]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{}_Most_Common_Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th_Most_Common_Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st_Most_Common_Venue,2nd_Most_Common_Venue,3rd_Most_Common_Venue,4th_Most_Common_Venue,5th_Most_Common_Venue,6th_Most_Common_Venue,7th_Most_Common_Venue,8th_Most_Common_Venue,9th_Most_Common_Venue,10th_Most_Common_Venue
0,"Adelaide,King,Richmond",Coffee Shop,Café,Bar,Steakhouse,Burger Joint,Cosmetics Shop,Hotel,American Restaurant,Restaurant,Thai Restaurant
1,Agincourt,Lounge,Sandwich Place,Breakfast Spot,Skating Rink,Chinese Restaurant,Donut Shop,Diner,Discount Store,Dog Run,Doner Restaurant
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Playground,Park,Women's Store,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Grocery Store,Fried Chicken Joint,Pharmacy,Pizza Place,Fast Food Restaurant,Coffee Shop,Beer Store,Sandwich Place,Women's Store,Dog Run
4,"Alderwood,Long Branch",Pizza Place,Pub,Coffee Shop,Gym,Pharmacy,Sandwich Place,Skating Rink,Pool,Women's Store,Department Store


In [53]:
number_one=neighborhoods_venues_sorted['1st_Most_Common_Venue'].value_counts

In [54]:
number_one

<bound method IndexOpsMixin.value_counts of 0              Coffee Shop
1                   Lounge
2               Playground
3            Grocery Store
4              Pizza Place
5              Coffee Shop
6                     Café
7              Coffee Shop
8              Coffee Shop
9                     Café
10            Liquor Store
11                    Café
12      Light Rail Station
13                    Park
14         Airport Service
15             Coffee Shop
16                    Park
17                   Hotel
18        Hakka Restaurant
19             Coffee Shop
20                    Café
21                    Café
22             Coffee Shop
23                  Bakery
24             Pizza Place
25                   Motel
26                    Bank
27             Coffee Shop
28             Pizza Place
29                     Gym
              ...         
70    Caribbean Restaurant
71          Breakfast Spot
72                    Park
73             Coffee Shop
74         

In [55]:
from sklearn.cluster import KMeans

In [1]:
#set number of kmeans clusters
kclusters = 3

toronto_clustered_groups = toronto_grouped.drop('Neighborhood', 1)

#run kmeans clusters
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_clustered_groups)

#check cluster labels
kmeans.labels_[0:10]

NameError: name 'toronto_grouped' is not defined

In [57]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df4_lat_lng

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

toronto_merged.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st_Most_Common_Venue,2nd_Most_Common_Venue,3rd_Most_Common_Venue,4th_Most_Common_Venue,5th_Most_Common_Venue,6th_Most_Common_Venue,7th_Most_Common_Venue,8th_Most_Common_Venue,9th_Most_Common_Venue,10th_Most_Common_Venue
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,0.0,Fast Food Restaurant,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Harbor / Marina
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,1.0,Bar,Women's Store,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Fast Food Restaurant
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,0.0,Medical Center,Electronics Store,Pizza Place,Intersection,Breakfast Spot,Rental Car Location,Mexican Restaurant,Doner Restaurant,Diner,Discount Store
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,Coffee Shop,Korean Restaurant,Women's Store,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,Hakka Restaurant,Bank,Athletics & Sports,Bakery,Thai Restaurant,Caribbean Restaurant,Fried Chicken Joint,Eastern European Restaurant,Dumpling Restaurant,Drugstore


In [58]:
toronto_merged.shape

(103, 16)

In [59]:
toronto_merged.columns = toronto_merged.columns.str.replace(' ', '_')

In [60]:
toronto_merged['Cluster_Labels']=toronto_merged['Cluster_Labels'].fillna(0).astype(int)

In [61]:
toronto_merged

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster_Labels,1st_Most_Common_Venue,2nd_Most_Common_Venue,3rd_Most_Common_Venue,4th_Most_Common_Venue,5th_Most_Common_Venue,6th_Most_Common_Venue,7th_Most_Common_Venue,8th_Most_Common_Venue,9th_Most_Common_Venue,10th_Most_Common_Venue
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,0,Fast Food Restaurant,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Harbor / Marina
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,1,Bar,Women's Store,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Fast Food Restaurant
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,0,Medical Center,Electronics Store,Pizza Place,Intersection,Breakfast Spot,Rental Car Location,Mexican Restaurant,Doner Restaurant,Diner,Discount Store
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Coffee Shop,Korean Restaurant,Women's Store,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,Hakka Restaurant,Bank,Athletics & Sports,Bakery,Thai Restaurant,Caribbean Restaurant,Fried Chicken Joint,Eastern European Restaurant,Dumpling Restaurant,Drugstore
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,0,Playground,Construction & Landscaping,Women's Store,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029,0,Department Store,Discount Store,Coffee Shop,Hobby Shop,Convenience Store,Dumpling Restaurant,Diner,Dog Run,Doner Restaurant,Donut Shop
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577,0,Bakery,Park,Intersection,Fast Food Restaurant,Metro Station,Bus Line,Bus Station,Soccer Field,Costume Shop,Construction & Landscaping
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476,0,Motel,American Restaurant,Department Store,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848,0,Café,General Entertainment,Skating Rink,College Stadium,Concert Hall,Dim Sum Restaurant,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store


In [62]:
import matplotlib.cm as cm
import matplotlib.colors as colors
print('Imported')

Imported


In [63]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=15)

#color scheme
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

#add markers to the map 
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster_Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
    [lat, lon],
    radius=5,
    popup=label,
    color=rainbow[cluster-1],
    fill=True,
    fill_color=rainbow[cluster-1],
    fill_opacity=0.7).add_to(map_clusters)

map_clusters

In [64]:
toronto_merged.loc[toronto_merged['Cluster_Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster_Labels,1st_Most_Common_Venue,2nd_Most_Common_Venue,3rd_Most_Common_Venue,4th_Most_Common_Venue,5th_Most_Common_Venue,6th_Most_Common_Venue,7th_Most_Common_Venue,8th_Most_Common_Venue,9th_Most_Common_Venue,10th_Most_Common_Venue
0,Scarborough,0,Fast Food Restaurant,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Harbor / Marina
2,Scarborough,0,Medical Center,Electronics Store,Pizza Place,Intersection,Breakfast Spot,Rental Car Location,Mexican Restaurant,Doner Restaurant,Diner,Discount Store
3,Scarborough,0,Coffee Shop,Korean Restaurant,Women's Store,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
4,Scarborough,0,Hakka Restaurant,Bank,Athletics & Sports,Bakery,Thai Restaurant,Caribbean Restaurant,Fried Chicken Joint,Eastern European Restaurant,Dumpling Restaurant,Drugstore
5,Scarborough,0,Playground,Construction & Landscaping,Women's Store,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
6,Scarborough,0,Department Store,Discount Store,Coffee Shop,Hobby Shop,Convenience Store,Dumpling Restaurant,Diner,Dog Run,Doner Restaurant,Donut Shop
7,Scarborough,0,Bakery,Park,Intersection,Fast Food Restaurant,Metro Station,Bus Line,Bus Station,Soccer Field,Costume Shop,Construction & Landscaping
8,Scarborough,0,Motel,American Restaurant,Department Store,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
9,Scarborough,0,Café,General Entertainment,Skating Rink,College Stadium,Concert Hall,Dim Sum Restaurant,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store
10,Scarborough,0,Indian Restaurant,Pet Store,Latin American Restaurant,Vietnamese Restaurant,Chinese Restaurant,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store


In [65]:

toronto_merged.loc[toronto_merged['Cluster_Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster_Labels,1st_Most_Common_Venue,2nd_Most_Common_Venue,3rd_Most_Common_Venue,4th_Most_Common_Venue,5th_Most_Common_Venue,6th_Most_Common_Venue,7th_Most_Common_Venue,8th_Most_Common_Venue,9th_Most_Common_Venue,10th_Most_Common_Venue
1,Scarborough,1,Bar,Women's Store,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Fast Food Restaurant


In [66]:
toronto_merged.loc[toronto_merged['Cluster_Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster_Labels,1st_Most_Common_Venue,2nd_Most_Common_Venue,3rd_Most_Common_Venue,4th_Most_Common_Venue,5th_Most_Common_Venue,6th_Most_Common_Venue,7th_Most_Common_Venue,8th_Most_Common_Venue,9th_Most_Common_Venue,10th_Most_Common_Venue
14,Scarborough,2,Playground,Park,Women's Store,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
20,North York,2,Park,Women's Store,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
23,North York,2,Park,Bank,Convenience Store,Women's Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
25,North York,2,Park,Food & Drink Shop,Women's Store,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
30,North York,2,Park,Airport,Women's Store,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
40,East York,2,Park,Intersection,Coffee Shop,Convenience Store,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
44,Central Toronto,2,Park,Bus Line,Swim School,Women's Store,Donut Shop,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore
50,Downtown Toronto,2,Park,Trail,Playground,Building,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
74,York,2,Park,Women's Store,Market,Fast Food Restaurant,Comic Shop,Concert Hall,Event Space,Ethiopian Restaurant,Comfort Food Restaurant,Empanada Restaurant
98,York,2,Park,Convenience Store,Women's Store,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore


In [67]:
toronto_merged.loc[toronto_merged['Cluster_Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster_Labels,1st_Most_Common_Venue,2nd_Most_Common_Venue,3rd_Most_Common_Venue,4th_Most_Common_Venue,5th_Most_Common_Venue,6th_Most_Common_Venue,7th_Most_Common_Venue,8th_Most_Common_Venue,9th_Most_Common_Venue,10th_Most_Common_Venue
94,Etobicoke,3,Bank,Women's Store,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Fast Food Restaurant


In [68]:
toronto_merged.loc[toronto_merged['Cluster_Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster_Labels,1st_Most_Common_Venue,2nd_Most_Common_Venue,3rd_Most_Common_Venue,4th_Most_Common_Venue,5th_Most_Common_Venue,6th_Most_Common_Venue,7th_Most_Common_Venue,8th_Most_Common_Venue,9th_Most_Common_Venue,10th_Most_Common_Venue
