# Part 1: Scraping Data from Wikipedia

In [1]:
import pandas as pd
import numpy as np
import urllib.request
from bs4 import BeautifulSoup

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = urllib.request.urlopen(url)

In [3]:
soup = BeautifulSoup(page, "lxml")

In [4]:
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"XrRJvgpAEKcAAI@-EI0AAAAQ","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":955414546,"wgRevisionId":955414546,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Communications in Ontario","Postal codes in Canada","Toronto","Ontario

In [5]:
all_tables = soup.find_all('table')
all_tables

[<table class="wikitable sortable">
 <tbody><tr>
 <th>Postal Code
 </th>
 <th>Borough
 </th>
 <th>Neighborhood
 </th></tr>
 <tr>
 <td>M1A
 </td>
 <td>Not assigned
 </td>
 <td>
 </td></tr>
 <tr>
 <td>M2A
 </td>
 <td>Not assigned
 </td>
 <td>
 </td></tr>
 <tr>
 <td>M3A
 </td>
 <td>North York
 </td>
 <td>Parkwoods
 </td></tr>
 <tr>
 <td>M4A
 </td>
 <td>North York
 </td>
 <td>Victoria Village
 </td></tr>
 <tr>
 <td>M5A
 </td>
 <td>Downtown Toronto
 </td>
 <td>Regent Park, Harbourfront
 </td></tr>
 <tr>
 <td>M6A
 </td>
 <td>North York
 </td>
 <td>Lawrence Manor, Lawrence Heights
 </td></tr>
 <tr>
 <td>M7A
 </td>
 <td>Downtown Toronto
 </td>
 <td>Queen's Park, Ontario Provincial Government
 </td></tr>
 <tr>
 <td>M8A
 </td>
 <td>Not assigned
 </td>
 <td>
 </td></tr>
 <tr>
 <td>M9A
 </td>
 <td>Etobicoke
 </td>
 <td>Islington Avenue
 </td></tr>
 <tr>
 <td>M1B
 </td>
 <td>Scarborough
 </td>
 <td>Malvern, Rouge
 </td></tr>
 <tr>
 <td>M2B
 </td>
 <td>Not assigned
 </td>
 <td>
 </td></tr>
 <tr>
 <t

In [6]:
right_table = soup.find('table', class_ = 'wikitable sortable')
right_table

<table class="wikitable sortable">
<tbody><tr>
<th>Postal Code
</th>
<th>Borough
</th>
<th>Neighborhood
</th></tr>
<tr>
<td>M1A
</td>
<td>Not assigned
</td>
<td>
</td></tr>
<tr>
<td>M2A
</td>
<td>Not assigned
</td>
<td>
</td></tr>
<tr>
<td>M3A
</td>
<td>North York
</td>
<td>Parkwoods
</td></tr>
<tr>
<td>M4A
</td>
<td>North York
</td>
<td>Victoria Village
</td></tr>
<tr>
<td>M5A
</td>
<td>Downtown Toronto
</td>
<td>Regent Park, Harbourfront
</td></tr>
<tr>
<td>M6A
</td>
<td>North York
</td>
<td>Lawrence Manor, Lawrence Heights
</td></tr>
<tr>
<td>M7A
</td>
<td>Downtown Toronto
</td>
<td>Queen's Park, Ontario Provincial Government
</td></tr>
<tr>
<td>M8A
</td>
<td>Not assigned
</td>
<td>
</td></tr>
<tr>
<td>M9A
</td>
<td>Etobicoke
</td>
<td>Islington Avenue
</td></tr>
<tr>
<td>M1B
</td>
<td>Scarborough
</td>
<td>Malvern, Rouge
</td></tr>
<tr>
<td>M2B
</td>
<td>Not assigned
</td>
<td>
</td></tr>
<tr>
<td>M3B
</td>
<td>North York
</td>
<td>Don Mills
</td></tr>
<tr>
<td>M4B
</td>
<td>East Y

In [7]:
A = []
B = []
C = []

for row in right_table.findAll('tr'):
    cells = row.findAll('td')
    if len(cells)==3:
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))

In [8]:
df = pd.DataFrame(A,columns=['Postal Code'])
df['Borough'] = B
df['Neighborhood'] = C
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A\n,Not assigned\n,\n
1,M2A\n,Not assigned\n,\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"
...,...,...,...
175,M5Z\n,Not assigned\n,\n
176,M6Z\n,Not assigned\n,\n
177,M7Z\n,Not assigned\n,\n
178,M8Z\n,Etobicoke\n,"Mimico NW, The Queensway West, South of Bloor,..."


In [9]:
df['Postal Code'] = df['Postal Code'].replace('\n', '', regex = True)
df['Borough'] = df['Borough'].replace('\n', '', regex = True)
df['Neighborhood'] = df['Neighborhood'].replace('\n', '', regex = True)
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,
176,M6Z,Not assigned,
177,M7Z,Not assigned,
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [10]:
df = df.replace('Not assigned', np.nan, regex=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [11]:
df.dropna(subset=['Borough'], axis=0, inplace=True)
df = df.reset_index(drop=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [12]:
df.shape

(103, 3)

# Part 2: Incorporating Location Data 

### I could not get the geocoder Python package to work, so I am using the CSV file that was provided.

In [13]:
loc_df = pd.read_csv('http://cocl.us/Geospatial_data')
loc_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [14]:
loc_df.shape

(103, 3)

### I am joining the two dataframes using the inner join method

In [15]:
df_comp = pd.merge(left = df, right = loc_df, left_on = "Postal Code", right_on = "Postal Code")
df_comp.head(15)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [16]:
df_comp.shape

(103, 5)

# Part 3: Clustering and Analysis

In [17]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim

import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [18]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [23]:
map_toronto = folium.Map(location = [latitude, longitude], zoom_start = 10)

for lat, lng, borough, neighborhood in zip(df_comp['Latitude'], df_comp['Longitude'], df_comp['Borough'], df_comp['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Defining Foursquare Credentials and Version

In [24]:
CLIENT_ID = 'PHUNIQKA5WBPZ5G0JZKKISKUYI3FG3MVQOVARJYKM5JMGFWJ' 
CLIENT_SECRET = 'AC5544NIW4ESTTV3F1R5HDZH5MTPPH1D44NZV1VBCIEUPPIF'
VERSION = '20180604'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PHUNIQKA5WBPZ5G0JZKKISKUYI3FG3MVQOVARJYKM5JMGFWJ
CLIENT_SECRET:AC5544NIW4ESTTV3F1R5HDZH5MTPPH1D44NZV1VBCIEUPPIF


### I'll be doing all analysis by the postal code, as the unique identifier in this dataset.

In [28]:
df_comp.loc[0, 'Postal Code']

'M3A'

In [43]:
pc_latitude = df_comp.loc[0, 'Latitude']
pc_longitude = df_comp.loc[0, 'Longitude']

pc_number = df_comp.loc[0, 'Postal Code']

print('Latitude and longitude values of postal code {} are {}, {}.'.format(pc_number,
                                                             pc_latitude,
                                                             pc_longitude))

Latitude and longitude values of postal code M3A are 43.7532586, -79.3296565.


### Testing the venue acquisition on one postal code

In [31]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    pc_latitude, 
    pc_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=PHUNIQKA5WBPZ5G0JZKKISKUYI3FG3MVQOVARJYKM5JMGFWJ&client_secret=AC5544NIW4ESTTV3F1R5HDZH5MTPPH1D44NZV1VBCIEUPPIF&v=20180604&ll=43.7532586,-79.3296565&radius=500&limit=100'

In [32]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5eb4767498205d001b3fb0cc'},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 3,
  'suggestedBounds': {'ne': {'lat': 43.757758604500005,
    'lng': -79.32343823984928},
   'sw': {'lat': 43.7487585955, 'lng': -79.33587476015072}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 245,
        'cc': 'CA',
        'c

In [33]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [35]:
venues = results['response']['groups'][0]['items']

nearby_venues = json_normalize(venues)

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,649 Variety,Convenience Store,43.754513,-79.331942
2,Variety Store,Food & Drink Shop,43.751974,-79.333114


### Replicating the venue acquisition on all postal codes in Toronto

In [44]:
def getNearbyVenues(names, latitudes, longitudes, radius = 500):

    venues_list = []
    for name, lat, long in zip(names, latitudes, longitudes):
        print(name)
        
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postal Code', 
                  'Postal Code Latitude', 
                  'Postal Code Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues) 

In [45]:
toronto_venues = getNearbyVenues(names = df_comp['Postal Code'],
                                latitudes = df_comp['Latitude'],
                                longitudes = df_comp['Longitude']
                                )

M3A
M4A
M5A
M6A
M7A
M9A
M1B
M3B
M4B
M5B
M6B
M9B
M1C
M3C
M4C
M5C
M6C
M9C
M1E
M4E
M5E
M6E
M1G
M4G
M5G
M6G
M1H
M2H
M3H
M4H
M5H
M6H
M1J
M2J
M3J
M4J
M5J
M6J
M1K
M2K
M3K
M4K
M5K
M6K
M1L
M2L
M3L
M4L
M5L
M6L
M9L
M1M
M2M
M3M
M4M
M5M
M6M
M9M
M1N
M2N
M3N
M4N
M5N
M6N
M9N
M1P
M2P
M4P
M5P
M6P
M9P
M1R
M2R
M4R
M5R
M6R
M7R
M9R
M1S
M4S
M5S
M6S
M1T
M4T
M5T
M1V
M4V
M5V
M8V
M9V
M1W
M4W
M5W
M8W
M9W
M1X
M4X
M5X
M8X
M4Y
M7Y
M8Y
M8Z


### Examining the results

In [47]:
print(toronto_venues.shape)
toronto_venues.head(15)

(746, 7)


Unnamed: 0,Postal Code,Postal Code Latitude,Postal Code Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M3A,43.753259,-79.520999,Pho Com Viet Nam,43.756631,-79.518336,Vietnamese Restaurant
1,M3A,43.753259,-79.520999,Pizza Hut,43.75634,-79.517818,Pizza Place
2,M3A,43.753259,-79.520999,KFC,43.7566,-79.5181,Fast Food Restaurant
3,M3A,43.753259,-79.520999,Tim Hortons,43.756128,-79.516266,Coffee Shop
4,M3A,43.753259,-79.520999,The Beer Store,43.756094,-79.516239,Beer Store
5,M3A,43.753259,-79.520999,Subway,43.756171,-79.518251,Sandwich Place
6,M3A,43.753259,-79.520999,Tim Hortons,43.754344,-79.527024,Coffee Shop
7,M3A,43.753259,-79.520999,Jian Hing Supermarket,43.756673,-79.518444,Grocery Store
8,M3A,43.753259,-79.520999,Planet Fitness,43.757538,-79.51961,Gym / Fitness Center
9,M3A,43.753259,-79.520999,Hwy 400 at Finch W.,43.754399,-79.526967,Road


In [51]:
toronto_venues.groupby('Postal Code').count()

Unnamed: 0_level_0,Postal Code Latitude,Postal Code Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
M1B,4,4,4,4,4,4
M1C,4,4,4,4,4,4
M1E,4,4,4,4,4,4
M1G,8,8,8,8,8,8
M1H,7,7,7,7,7,7
M1J,2,2,2,2,2,2
M1K,4,4,4,4,4,4
M1L,2,2,2,2,2,2
M1M,4,4,4,4,4,4
M1N,4,4,4,4,4,4


In [52]:
print('There are {} unique categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 89 unique categories.


### Analyzing the neighborhoods.

In [55]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix = "", prefix_sep = "")
toronto_onehot['Postal Code'] = toronto_venues['Postal Code']
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]
toronto_onehot.head()

Unnamed: 0,Postal Code,Athletics & Sports,Automotive Shop,BBQ Joint,Bakery,Bank,Baseball Field,Beer Store,Breakfast Spot,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Café,Camera Store,Caribbean Restaurant,Chinese Restaurant,Coffee Shop,Construction & Landscaping,Convenience Store,Diner,Discount Store,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store,Gas Station,Gastropub,Gift Shop,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,Historic Site,History Museum,Home Service,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Latin American Restaurant,Lawyer,Liquor Store,Locksmith,Mediterranean Restaurant,Metro Station,Music School,Other Nightlife,Outdoors & Recreation,Park,Pharmacy,Pizza Place,Playground,Plaza,Pool,Print Shop,Pub,Rental Car Location,Restaurant,Road,Sandwich Place,Shopping Mall,Skating Rink,Snack Place,Soccer Field,Social Club,Spa,Sports Bar,Sports Club,Steakhouse,Supermarket,Supplement Shop,Tea Room,Thai Restaurant,Theme Park,Thrift / Vintage Store,Video Game Store,Vietnamese Restaurant,Wings Joint,Yoga Studio,Zoo
0,M3A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
1,M3A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,M3A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,M3A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,M3A,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [56]:
toronto_onehot.shape

(746, 90)

In [57]:
toronto_grouped = toronto_onehot.groupby('Postal Code').mean().reset_index()
toronto_grouped

Unnamed: 0,Postal Code,Athletics & Sports,Automotive Shop,BBQ Joint,Bakery,Bank,Baseball Field,Beer Store,Breakfast Spot,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Café,Camera Store,Caribbean Restaurant,Chinese Restaurant,Coffee Shop,Construction & Landscaping,Convenience Store,Diner,Discount Store,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store,Gas Station,Gastropub,Gift Shop,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,Historic Site,History Museum,Home Service,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Latin American Restaurant,Lawyer,Liquor Store,Locksmith,Mediterranean Restaurant,Metro Station,Music School,Other Nightlife,Outdoors & Recreation,Park,Pharmacy,Pizza Place,Playground,Plaza,Pool,Print Shop,Pub,Rental Car Location,Restaurant,Road,Sandwich Place,Shopping Mall,Skating Rink,Snack Place,Soccer Field,Social Club,Spa,Sports Bar,Sports Club,Steakhouse,Supermarket,Supplement Shop,Tea Room,Thai Restaurant,Theme Park,Thrift / Vintage Store,Video Game Store,Vietnamese Restaurant,Wings Joint,Yoga Studio,Zoo
0,M1B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M1J,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M1K,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,M1L,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M1M,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M1N,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [58]:
toronto_grouped.shape

(103, 90)

#### This matches the original number of rows, so hooray!

In [59]:
num_top_venues = 5
for code in toronto_grouped['Postal Code']:
    print("----"+code+"----")
    temp = toronto_grouped[toronto_grouped['Postal Code'] == code].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----M1B----
                    venue  freq
0  Furniture / Home Store  0.50
1              Print Shop  0.25
2             Gas Station  0.25
3      Athletics & Sports  0.00
4              Restaurant  0.00


----M1C----
               venue  freq
0        Bus Station  0.25
1  Electronics Store  0.25
2     Hardware Store  0.25
3      Metro Station  0.25
4               Park  0.00


----M1E----
                venue  freq
0  Athletics & Sports  0.25
1        Liquor Store  0.25
2            Pharmacy  0.25
3         Gas Station  0.25
4                Park  0.00


----M1G----
           venue  freq
0       Pharmacy  0.12
1  Shopping Mall  0.12
2   Skating Rink  0.12
3  Historic Site  0.12
4    Snack Place  0.12


----M1H----
                       venue  freq
0  Latin American Restaurant  0.14
1              Shopping Mall  0.14
2              Historic Site  0.14
3               Skating Rink  0.14
4                Snack Place  0.14


----M1J----
                venue  freq
0   Food & Drink Sho

4                     Pub  0.00


----M6M----
                  venue  freq
0                  Park  0.50
1  Fast Food Restaurant  0.25
2           Pizza Place  0.25
3    Athletics & Sports  0.00
4            Restaurant  0.00


----M6N----
                 venue  freq
0    Indian Restaurant  0.25
1                 Park  0.25
2            BBQ Joint  0.25
3             Bus Line  0.25
4  Rental Car Location  0.00


----M6P----
            venue  freq
0    Skating Rink  0.06
1     Supermarket  0.06
2    Liquor Store  0.06
3  Ice Cream Shop  0.06
4            Park  0.06


----M6R----
                  venue  freq
0           Coffee Shop  0.15
1                   Pub  0.10
2           Pizza Place  0.10
3  Fast Food Restaurant  0.10
4   Japanese Restaurant  0.05


----M6S----
                   venue  freq
0     Italian Restaurant  0.33
1         History Museum  0.33
2                   Park  0.33
3  Outdoors & Recreation  0.00
4                    Pub  0.00


----M7A----
               venue

In [60]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [61]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postal Code']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
pc_venues_sorted = pd.DataFrame(columns=columns)
pc_venues_sorted['Postal Code'] = toronto_grouped['Postal Code']

for ind in np.arange(toronto_grouped.shape[0]):
    pc_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

pc_venues_sorted.head()

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Furniture / Home Store,Gas Station,Print Shop,Diner,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop
1,M1C,Hardware Store,Electronics Store,Metro Station,Bus Station,Zoo,Gas Station,Dog Run,Dry Cleaner,Fast Food Restaurant,Financial or Legal Service
2,M1E,Athletics & Sports,Liquor Store,Pharmacy,Gas Station,Gym / Fitness Center,Gym,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant
3,M1G,Historic Site,Chinese Restaurant,Pizza Place,Pharmacy,Shopping Mall,Skating Rink,Snack Place,Gas Station,Financial or Legal Service,Dog Run
4,M1H,Historic Site,Chinese Restaurant,Pizza Place,Shopping Mall,Latin American Restaurant,Skating Rink,Snack Place,Food & Drink Shop,Dog Run,Dry Cleaner


### Clustering the neighborhoods

In [63]:
kclusters = 5
toronto_grouped_clustering = toronto_grouped.drop('Postal Code', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)
kmeans.labels_[0:10]

array([1, 4, 3, 3, 3, 0, 4, 0, 1, 3])

In [64]:
pc_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_merged = df_comp
toronto_merged = toronto_merged.join(pc_venues_sorted.set_index('Postal Code'), on='Postal Code')
toronto_merged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,4,Coffee Shop,Vietnamese Restaurant,Gym / Fitness Center,Grocery Store,Road,Beer Store,Sandwich Place,Pizza Place,Fast Food Restaurant,Financial or Legal Service
1,M4A,North York,Victoria Village,43.725882,-79.315572,4,Discount Store,Intersection,Bakery,Golf Course,Gas Station,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,Italian Restaurant,History Museum,Dog Run,Zoo,Gas Station,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1,Coffee Shop,Bakery,Outdoors & Recreation,Furniture / Home Store,Zoo,Gas Station,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,4,Bus Stop,Japanese Restaurant,Convenience Store,Coffee Shop,Restaurant,Camera Store,Shopping Mall,Skating Rink,Liquor Store,Pharmacy


In [68]:
toronto_merged.shape

(103, 16)

In [65]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Postal Code'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Exploring clusters

In [66]:
toronto_merged['Cluster Labels'].value_counts()

4    56
3    16
0    15
2     8
1     8
Name: Cluster Labels, dtype: int64

#### Cluster 1

In [67]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Etobicoke,0,Park,Convenience Store,Bus Stop,Bakery,Skating Rink,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service
7,North York,0,Tea Room,Park,Food & Drink Shop,Zoo,Furniture / Home Store,Discount Store,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant
8,East York,0,Convenience Store,Breakfast Spot,Park,Gas Station,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop
25,Downtown Toronto,0,Indian Restaurant,Fast Food Restaurant,Park,Bus Stop,Furniture / Home Store,Dog Run,Dry Cleaner,Electronics Store,Financial or Legal Service,Food & Drink Shop
31,West Toronto,0,Convenience Store,Bakery,Park,Bus Stop,Gas Station,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop
32,Scarborough,0,Food & Drink Shop,Park,Zoo,Diner,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Fried Chicken Joint
44,Scarborough,0,Park,Outdoors & Recreation,Zoo,Diner,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop
47,East Toronto,0,Convenience Store,Bakery,Park,Bus Stop,Gas Station,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop
56,York,0,Park,Fast Food Restaurant,Pizza Place,Zoo,Diner,Dog Run,Dry Cleaner,Electronics Store,Financial or Legal Service,Food & Drink Shop
62,Central Toronto,0,Park,Outdoors & Recreation,Zoo,Diner,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop


Cluster 1 seems to be defined primarily by convenience, e.g. convenience stores, fast food, and shopping

#### Cluster 2

In [69]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,North York,1,Coffee Shop,Bakery,Outdoors & Recreation,Furniture / Home Store,Zoo,Gas Station,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service
6,Scarborough,1,Furniture / Home Store,Gas Station,Print Shop,Diner,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop
10,North York,1,Furniture / Home Store,Sports Club,Park,Business Service,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop
23,East York,1,Business Service,Furniture / Home Store,Zoo,Diner,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop
49,North York,1,Furniture / Home Store,Skating Rink,Outdoors & Recreation,Fried Chicken Joint,Discount Store,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service
51,Scarborough,1,Furniture / Home Store,Bakery,Outdoors & Recreation,Zoo,Gas Station,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service
67,Central Toronto,1,Furniture / Home Store,Skating Rink,Park,Outdoors & Recreation,Fried Chicken Joint,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service
73,Central Toronto,1,Construction & Landscaping,Furniture / Home Store,Video Game Store,Locksmith,Discount Store,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service


Cluster 2 seems to be defined by the home, e.g. furniture stores, locksmiths, construction, and electronics.

#### Cluster 3

In [70]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,2,Italian Restaurant,History Museum,Dog Run,Zoo,Gas Station,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop
11,Etobicoke,2,History Museum,Italian Restaurant,Coffee Shop,Park,Furniture / Home Store,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service
15,Downtown Toronto,2,History Museum,Italian Restaurant,Park,Furniture / Home Store,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop
30,Downtown Toronto,2,History Museum,Italian Restaurant,Coffee Shop,Park,Furniture / Home Store,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service
81,West Toronto,2,History Museum,Italian Restaurant,Park,Furniture / Home Store,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop
84,Downtown Toronto,2,Italian Restaurant,History Museum,Dog Run,Zoo,Gas Station,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop
85,Scarborough,2,Italian Restaurant,Butcher,Zoo,Gas Station,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop
98,Etobicoke,2,Italian Restaurant,History Museum,Dog Run,Zoo,Gas Station,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service,Food & Drink Shop


Cluster 3 is defined by culture - dining, museums, and zoos. 

#### Cluster 4

In [71]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,East York,3,Pizza Place,Grocery Store,Skating Rink,Soccer Field,Gas Station,Fast Food Restaurant,Zoo,Food & Drink Shop,Discount Store,Dog Run
16,York,3,Fast Food Restaurant,Pizza Place,Park,Gas Station,Zoo,Fried Chicken Joint,Dog Run,Dry Cleaner,Electronics Store,Financial or Legal Service
18,Scarborough,3,Athletics & Sports,Liquor Store,Pharmacy,Gas Station,Gym / Fitness Center,Gym,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant
22,Scarborough,3,Historic Site,Chinese Restaurant,Pizza Place,Pharmacy,Shopping Mall,Skating Rink,Snack Place,Gas Station,Financial or Legal Service,Dog Run
26,Scarborough,3,Historic Site,Chinese Restaurant,Pizza Place,Shopping Mall,Latin American Restaurant,Skating Rink,Snack Place,Food & Drink Shop,Dog Run,Dry Cleaner
27,North York,3,Print Shop,Gas Station,Rental Car Location,Zoo,Fried Chicken Joint,Discount Store,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant
34,North York,3,Athletics & Sports,Chinese Restaurant,Shopping Mall,Skating Rink,Snack Place,Gas Station,Pharmacy,Financial or Legal Service,Discount Store,Dog Run
40,North York,3,Home Service,Coffee Shop,Chinese Restaurant,Dry Cleaner,Golf Course,Furniture / Home Store,Dog Run,Electronics Store,Fast Food Restaurant,Financial or Legal Service
46,North York,3,Coffee Shop,Chinese Restaurant,Golf Course,Zoo,Furniture / Home Store,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service
52,North York,3,Athletics & Sports,Shopping Mall,Skating Rink,Lawyer,Fried Chicken Joint,Discount Store,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant


Cluster 4 is defined by athletics - skating rinks, golf courses, and soccer fields. 

#### Cluster 5

In [72]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,4,Coffee Shop,Vietnamese Restaurant,Gym / Fitness Center,Grocery Store,Road,Beer Store,Sandwich Place,Pizza Place,Fast Food Restaurant,Financial or Legal Service
1,North York,4,Discount Store,Intersection,Bakery,Golf Course,Gas Station,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service
4,Downtown Toronto,4,Bus Stop,Japanese Restaurant,Convenience Store,Coffee Shop,Restaurant,Camera Store,Shopping Mall,Skating Rink,Liquor Store,Pharmacy
9,Downtown Toronto,4,Bank,Camera Store,Supermarket,Spa,Zoo,Furniture / Home Store,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant
12,Scarborough,4,Hardware Store,Electronics Store,Metro Station,Bus Station,Zoo,Gas Station,Dog Run,Dry Cleaner,Fast Food Restaurant,Financial or Legal Service
13,North York,4,Discount Store,Intersection,Bakery,Golf Course,Gas Station,Dog Run,Dry Cleaner,Electronics Store,Fast Food Restaurant,Financial or Legal Service
17,Etobicoke,4,Coffee Shop,Pub,Fast Food Restaurant,Pizza Place,Bakery,Grocery Store,Gym,Park,Restaurant,Caribbean Restaurant
19,East Toronto,4,Indian Restaurant,Playground,Bus Line,Historic Site,Hardware Store,Dog Run,Dry Cleaner,Electronics Store,History Museum,Fast Food Restaurant
20,Downtown Toronto,4,Coffee Shop,Pub,Fast Food Restaurant,Pizza Place,Steakhouse,Park,Restaurant,Caribbean Restaurant,Sandwich Place,Financial or Legal Service
21,York,4,Restaurant,Bank,Pizza Place,Café,Convenience Store,Bakery,Food & Drink Shop,Dog Run,Dry Cleaner,Electronics Store


Cluster 5 is defined by food - many restaurants and coffee shops. 