# Getting data

Import packages.

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

Set url.

In [2]:
url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

Get the table.

In [3]:
html=requests.get(url)
soup=BeautifulSoup(html.text, 'html.parser')
table=soup.find('table',{'class':'wikitable sortable'})

Build a dataframe from the table.

In [4]:
columns = [th.text.replace('\n', '') for th in table.find('tr').find_all('th')]
trs = table.find_all('tr')[1:]
rows = list()
for tr in trs:
    rows.append([td.text.replace('\n', '').replace('\xa0', '') for td in tr.find_all('td')])
df=pd.DataFrame(data=rows,columns=columns)

Get rid of the rows with no borough assigned and set neighbourghood to borough if not assigned.

In [5]:
df.drop(df[df.Borough=='Not assigned'].index,inplace=True)
df['Neighbourhood'].replace('Not assigned',df['Borough'],inplace=True)

Combine rows with the same postcode and sort by postcodes.

In [6]:
duf=df['Postcode'].duplicated(keep='first')
duf=list(duf[duf==True].index)
dul=df['Postcode'].duplicated(keep='last')
dul=list(dul[dul==True].index)
for i,j in zip(dul[::-1],duf[::-1]):
    df.loc[i,'Neighbourhood']=str(df.loc[j,'Neighbourhood'])+','+str(df.loc[i,'Neighbourhood'])
df.drop(index=duf,inplace=True)
df.sort_values('Postcode',inplace=True)
df.reset_index(drop=True,inplace=True)

Show the final dataframe.

In [7]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern,Rouge"
1,M1C,Scarborough,"Port Union,Rouge Hill,Highland Creek"
2,M1E,Scarborough,"West Hill,Morningside,Guildwood"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [8]:
df.shape

(103, 3)

# Get geo data

 Get geo info from csv<br>
 Geocode has been very consitent to return None for me :(

In [9]:
df_geo=pd.read_csv('https://cocl.us/Geospatial_data')
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Merge geo info.

In [10]:
df_new=df.merge(df_geo,left_on='Postcode',right_on='Postal Code',how='left')
df_new.drop('Postal Code',axis=1,inplace=True)
df_new.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern,Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Port Union,Rouge Hill,Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"West Hill,Morningside,Guildwood",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Clustering

In [11]:
# @hidden_cell
CLIENT_ID = 'TFBQ3TCNGGG12CQDSF0LJSXUVGADSDLUEVEDRXEUW0P21W0F' # your Foursquare ID
CLIENT_SECRET = '4AJRYELS00FWGQFH20DYLZ4VV2NGHK3JFZZUFS13A1IAKB0R' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 500

Use the function from New York example.

In [12]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

get nerby venues, %%capture is used here to supress output.

In [13]:
%%capture
df_venues=getNearbyVenues(names=df_new['Neighbourhood'],latitudes=df_new['Latitude'],longitudes=df_new['Longitude']);

print(df_venues.shape)
df_venues.head()

one hot encoding

In [14]:
df_venues_onehot = pd.get_dummies(df_venues[['Venue Category']], prefix="", prefix_sep="")
df_venues_onehot.drop('Neighborhood',axis=1,inplace=True)

# add neighborhood column back to the front of dataframe
df_venues_onehot.insert(0,'Neighborhood',df_venues['Neighborhood'])

print(df_venues_onehot.shape)
df_venues_onehot.head()

(2271, 280)


Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Stadium,Beach,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,...,Snack Place,Soccer Field,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"Malvern,Rouge",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Malvern,Rouge",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Port Union,Rouge Hill,Highland Creek",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Port Union,Rouge Hill,Highland Creek",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"West Hill,Morningside,Guildwood",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Group by Neighborhoods and calculate the frequency of each category.

In [15]:
df_grouped=df_venues_onehot.groupby('Neighborhood').mean().reset_index()
df_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Stadium,Beach,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,...,Snack Place,Soccer Field,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.017544,0.035088,0.0,0.0,0.0,0.0,0.017544,0.017544,0.017544,0.0,0.017544,0.0,0.017544,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035088,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.017544,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
4,Caledonia-Fairbanks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0


Use the function from New York example.

In [16]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Sort by most common venues

In [17]:
import numpy as np

In [18]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = df_grouped['Neighborhood']

for ind in np.arange(df_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(df_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Clothing Store,Breakfast Spot,Skating Rink,Yoga Studio,Donut Shop,Diner,Discount Store,Dive Bar,Dog Run
1,Bayview Village,Café,Japanese Restaurant,Chinese Restaurant,Bank,Dessert Shop,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant
2,Berczy Park,Coffee Shop,Cocktail Bar,Restaurant,Seafood Restaurant,Bakery,Farmers Market,Pub,Café,Cheese Shop,Steakhouse
3,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Yoga Studio,Garden,Smoke Shop,Brewery,Spa,Burrito Place,Farmers Market,Fast Food Restaurant,Restaurant
4,Caledonia-Fairbanks,Park,Pharmacy,Women's Store,Market,Fast Food Restaurant,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store


Use k-means clustering to label neighborhoods.

In [19]:
from sklearn.cluster import KMeans

In [20]:
kclusters=5
df_clustering = df_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(init='k-means++',n_clusters=kclusters,n_init=12).fit(df_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [21]:
# add clustering labels
neighborhoods_venues_sorted.insert(1, 'Cluster Labels', kmeans.labels_)
neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,0,Lounge,Clothing Store,Breakfast Spot,Skating Rink,Yoga Studio,Donut Shop,Diner,Discount Store,Dive Bar,Dog Run
1,Bayview Village,0,Café,Japanese Restaurant,Chinese Restaurant,Bank,Dessert Shop,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant
2,Berczy Park,0,Coffee Shop,Cocktail Bar,Restaurant,Seafood Restaurant,Bakery,Farmers Market,Pub,Café,Cheese Shop,Steakhouse
3,Business Reply Mail Processing Centre 969 Eastern,0,Light Rail Station,Yoga Studio,Garden,Smoke Shop,Brewery,Spa,Burrito Place,Farmers Market,Fast Food Restaurant,Restaurant
4,Caledonia-Fairbanks,0,Park,Pharmacy,Women's Store,Market,Fast Food Restaurant,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store


Combine with Latitude and Longitude.

In [22]:
df_geo2=df_new.iloc[:,2:5].rename(columns={'Neighbourhood':'Neighborhood'})
df_result=df_geo2.merge(neighborhoods_venues_sorted,on='Neighborhood')
df_result

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Malvern,Rouge",43.806686,-79.194353,0,Print Shop,Fast Food Restaurant,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Yoga Studio
1,"Port Union,Rouge Hill,Highland Creek",43.784535,-79.160497,0,Bar,Construction & Landscaping,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Yoga Studio
2,"West Hill,Morningside,Guildwood",43.763573,-79.188711,0,Intersection,Mexican Restaurant,Electronics Store,Spa,Pizza Place,Rental Car Location,Breakfast Spot,Medical Center,Dessert Shop,Dim Sum Restaurant
3,Woburn,43.770992,-79.216917,0,Coffee Shop,Korean Restaurant,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Yoga Studio
4,Cedarbrae,43.773136,-79.239476,0,Athletics & Sports,Hakka Restaurant,Thai Restaurant,Caribbean Restaurant,Bakery,Bank,Fried Chicken Joint,Dive Bar,Dim Sum Restaurant,Diner
5,Scarborough Village,43.744734,-79.239476,0,Playground,Jewelry Store,Yoga Studio,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Doner Restaurant
6,"Kennedy Park,Ionview,East Birchmount Park",43.727929,-79.262029,0,Convenience Store,Coffee Shop,Discount Store,Hobby Shop,Department Store,Donut Shop,Dim Sum Restaurant,Diner,Dive Bar,Dog Run
7,"Oakridge,Golden Mile,Clairlea",43.711112,-79.284577,0,Bus Line,Bakery,Intersection,Park,Fast Food Restaurant,Bus Station,Soccer Field,Metro Station,Dive Bar,Diner
8,"Scarborough Village West,Cliffside,Cliffcrest",43.716316,-79.239476,0,Motel,Movie Theater,American Restaurant,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Yoga Studio
9,"Cliffside West,Birch Cliff",43.692657,-79.264848,0,Café,General Entertainment,College Stadium,Skating Rink,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run


Draw a map of the result.

In [23]:
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

In [24]:
# create map
map_clusters = folium.Map(location=[43.741667, -79.373333], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_result['Latitude'], df_result['Longitude'], df_result['Neighborhood'], df_result['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine the results

Cluster 0

In [25]:
neighborhoods_venues_sorted.loc[neighborhoods_venues_sorted['Cluster Labels']==0]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,0,Lounge,Clothing Store,Breakfast Spot,Skating Rink,Yoga Studio,Donut Shop,Diner,Discount Store,Dive Bar,Dog Run
1,Bayview Village,0,Café,Japanese Restaurant,Chinese Restaurant,Bank,Dessert Shop,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant
2,Berczy Park,0,Coffee Shop,Cocktail Bar,Restaurant,Seafood Restaurant,Bakery,Farmers Market,Pub,Café,Cheese Shop,Steakhouse
3,Business Reply Mail Processing Centre 969 Eastern,0,Light Rail Station,Yoga Studio,Garden,Smoke Shop,Brewery,Spa,Burrito Place,Farmers Market,Fast Food Restaurant,Restaurant
4,Caledonia-Fairbanks,0,Park,Pharmacy,Women's Store,Market,Fast Food Restaurant,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
5,Canada Post Gateway Processing Centre,0,Hotel,Coffee Shop,Gym / Fitness Center,Fried Chicken Joint,Middle Eastern Restaurant,Sandwich Place,Burrito Place,American Restaurant,Mediterranean Restaurant,Donut Shop
6,Cedarbrae,0,Athletics & Sports,Hakka Restaurant,Thai Restaurant,Caribbean Restaurant,Bakery,Bank,Fried Chicken Joint,Dive Bar,Dim Sum Restaurant,Diner
7,Central Bay Street,0,Coffee Shop,Italian Restaurant,Bubble Tea Shop,Burger Joint,Ice Cream Shop,Café,Bar,Chinese Restaurant,Japanese Restaurant,Middle Eastern Restaurant
8,Christie,0,Grocery Store,Café,Park,Nightclub,Diner,Italian Restaurant,Baby Store,Restaurant,Athletics & Sports,Convenience Store
9,Church and Wellesley,0,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Burger Joint,Restaurant,Yoga Studio,Pub,Bubble Tea Shop,Fast Food Restaurant


It looks like these are the areas where people live, with lots of stores for all kinds of stuff.

Cluster 1

In [26]:
neighborhoods_venues_sorted.loc[neighborhoods_venues_sorted['Cluster Labels']==1]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
97,"York Mills,Silver Hills",1,Cafeteria,Yoga Studio,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Donut Shop


Can't really tell.

Cluster 2

In [27]:
neighborhoods_venues_sorted.loc[neighborhoods_venues_sorted['Cluster Labels']==2]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Downsview Central,2,Food Truck,Baseball Field,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Donut Shop
26,"Humberlea,Emery",2,Baseball Field,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Drugstore


Can't really tell.

Cluster 3

In [28]:
neighborhoods_venues_sorted.loc[neighborhoods_venues_sorted['Cluster Labels']==3]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
85,"West Deane Park,Princess Gardens,Martin Grove,...",3,Bank,Yoga Studio,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Department Store


Can't really tell.

Cluster 4

In [29]:
neighborhoods_venues_sorted.loc[neighborhoods_venues_sorted['Cluster Labels']==4]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,East Toronto,4,Park,Convenience Store,Coffee Shop,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant
54,Rosedale,4,Park,Playground,Trail,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar
65,"Steeles East,Milliken,L'Amoreaux East,Agincour...",4,Park,Playground,Yoga Studio,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run
88,Weston,4,Park,Convenience Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Yoga Studio
96,York Mills West,4,Bank,Park,Yoga Studio,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop


Looks like rural areas for people to hang out and spend time.