In [2]:
# import needed libraries
import pandas as pd
import numpy as np
import sys

# Scrape data from Wikipedia page, import into pandas DataFrame and clean the table

# Install library and load html file

In [3]:
# Install Beautiful Soup and parser
!{sys.executable} -m pip install beautifulsoup4
!{sys.executable} -m pip install lxml

#import beautiful soup library
from bs4 import BeautifulSoup
import requests



In [4]:
# Load html
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

# construct BeautifulSoup object 
soup = BeautifulSoup(url.text, 'lxml')

print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"XptG7wpAMNAAAUaw2@sAAAES","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":951325562,"wgRevisionId":951325562,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Communications in Ontario","Postal codes in Canada","Toronto","Ontario

# Extract the table from html file and convert to DataFrame

In [5]:
table = soup.find('table', attrs={'class':'wikitable sortable'})
data = []
header = soup.find_all('th')
table_rows = table.find_all('tr')
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.replace('\n', ' ').strip() for tr in td]
    data.append(row)
can_post = pd.DataFrame(data, columns=["Postal code", "Borough", "Neighborhood"])
can_post

Unnamed: 0,Postal code,Borough,Neighborhood
0,,,
1,M1A,Not assigned,
2,M2A,Not assigned,
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Regent Park / Harbourfront
6,M6A,North York,Lawrence Manor / Lawrence Heights
7,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
8,M8A,Not assigned,
9,M9A,Etobicoke,Islington Avenue


In [6]:
# remove first row
can_post = can_post.drop(can_post.index[0])
# remove rows whose Borough is not assigned
can_post = can_post[can_post.Borough != 'Not assigned']
# replace slash with comma
can_post = can_post.apply(lambda x: x.str.replace('/',','))
can_post

Unnamed: 0,Postal code,Borough,Neighborhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park , Harbourfront"
6,M6A,North York,"Lawrence Manor , Lawrence Heights"
7,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"
9,M9A,Etobicoke,Islington Avenue
10,M1B,Scarborough,"Malvern , Rouge"
12,M3B,North York,Don Mills
13,M4B,East York,"Parkview Hill , Woodbine Gardens"
14,M5B,Downtown Toronto,"Garden District, Ryerson"


In [7]:
# reindexing the dataframe
can_post.reset_index(drop = True, inplace = True)
can_post

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern , Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill , Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [8]:
can_post.shape

(103, 3)

# Get the latitude and the longitude coordinates of each neighborhood

In [9]:
!{sys.executable} -m pip install geocoder
import geocoder # import geocoder



In [10]:
# Add Latitude and Longitude columns to dataframe
can_post["Latitude"] = ""
can_post["Longitude"] = ""
can_post

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,,
1,M4A,North York,Victoria Village,,
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",,
3,M6A,North York,"Lawrence Manor , Lawrence Heights",,
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",,
5,M9A,Etobicoke,Islington Avenue,,
6,M1B,Scarborough,"Malvern , Rouge",,
7,M3B,North York,Don Mills,,
8,M4B,East York,"Parkview Hill , Woodbine Gardens",,
9,M5B,Downtown Toronto,"Garden District, Ryerson",,


In [11]:
for index, row in can_post.iterrows():
    # initialize variable to None
    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
      g = geocoder.arcgis('{}, Toronto, Ontario'.format(row['Postal code']))
      lat_lng_coords = g.latlng

    row['Latitude'] = lat_lng_coords[0]
    row['Longitude'] = lat_lng_coords[1]

can_post

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7529,-79.3356
1,M4A,North York,Victoria Village,43.7281,-79.3119
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.651,-79.353
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.7233,-79.4512
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.6618,-79.3894
5,M9A,Etobicoke,Islington Avenue,43.6675,-79.529
6,M1B,Scarborough,"Malvern , Rouge",43.8086,-79.1899
7,M3B,North York,Don Mills,43.7489,-79.3572
8,M4B,East York,"Parkview Hill , Woodbine Gardens",43.7072,-79.3115
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6575,-79.3775


# View different areas associated with different postal codes in Downtown Toronto 

In [16]:
!{sys.executable} -m pip install geopy
from geopy.geocoders import Nominatim

# get the geographical coordinates of Downtown Toronto
address = 'Downtown Toronto, Toronto, Ontario'

geolocator = Nominatim(user_agent='toronto_explore')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Downtown Toronto are 43.6563221, -79.3809161.


In [18]:
# Select postal codes in the borough of Downton Toronto
downtown_toronto = can_post[can_post.Borough == 'Downtown Toronto']
print(downtown_toronto.shape)
downtown_toronto

(19, 5)


Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.651,-79.353
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.6618,-79.3894
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6575,-79.3775
15,M5C,Downtown Toronto,St. James Town,43.6517,-79.3756
20,M5E,Downtown Toronto,Berczy Park,43.6452,-79.3739
24,M5G,Downtown Toronto,Central Bay Street,43.6561,-79.3857
25,M6G,Downtown Toronto,Christie,43.6686,-79.4204
30,M5H,Downtown Toronto,"Richmond , Adelaide , King",43.6505,-79.3841
36,M5J,Downtown Toronto,"Harbourfront East , Union Station , Toronto Is...",43.6238,-79.3692
42,M5K,Downtown Toronto,"Toronto Dominion Centre , Design Exchange",43.6469,-79.3816


In [20]:
!{sys.executable} -m pip install folium
import folium
# create map of Downtown Toronto using latitude and longitude values
map_downtown_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(downtown_toronto['Latitude'], downtown_toronto['Longitude'], downtown_toronto['Postal code']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown_toronto)

map_downtown_toronto

Collecting folium
  Using cached folium-0.10.1-py2.py3-none-any.whl (91 kB)
Collecting branca>=0.3.0
  Using cached branca-0.4.0-py3-none-any.whl (25 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.0 folium-0.10.1


In [21]:
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Explore venues at each postal code

In [22]:
# Define Foursquare Credentials and Version
CLIENT_ID = 'PVYSPUQ3F2R4BNWUJDFHGUNRLETYYXELNGZ1LAO00N3HTKS5' # your Foursquare ID
CLIENT_SECRET = 'OEPOEAEDAZVS1ECZYHBTOOHUKKZR3WQTDMIWGFLROQHPT03Y' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PVYSPUQ3F2R4BNWUJDFHGUNRLETYYXELNGZ1LAO00N3HTKS5
CLIENT_SECRET:OEPOEAEDAZVS1ECZYHBTOOHUKKZR3WQTDMIWGFLROQHPT03Y


In [29]:
# create a function to repeat getting venue information over the postal codes in Toronto

def getNearbyVenues(postal_codes, latitudes, longitudes, LIMIT = 50, radius=500):
    venues_list = []
    for postal_code, lat, lng in zip(postal_codes, latitudes, longitudes):
        print(postal_code)

        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID,
            CLIENT_SECRET,
            VERSION,
            lat,
            lng,
            radius,
            LIMIT)

        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']

        # return only relevant information for each nearby venue
        venues_list.append([(
            postal_code,
            lat,
            lng,
            v['venue']['name'],
            v['venue']['location']['lat'],
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postal code',
                             'Postal code Latitude',
                             'Postal code Longitude',
                             'Venue',
                             'Venue Latitude',
                             'Venue Longitude',
                             'Venue Category']

    return (nearby_venues)

In [31]:
# run the above function on each neighborhood and create a new dataframe called manhattan_venues

downtown_toronto_venues = getNearbyVenues(postal_codes=downtown_toronto['Postal code'],
                                   latitudes = downtown_toronto['Latitude'],
                                   longitudes = downtown_toronto['Longitude'],
                                   LIMIT = 50, radius=500
                                  )

# check the size of the resulting dataframe
print(downtown_toronto_venues.shape)
downtown_toronto_venues.head()

M5A
M7A
M5B
M5C
M5E
M5G
M6G
M5H
M5J
M5K
M5L
M5S
M5T
M5V
M4W
M5W
M4X
M5X
M4Y
(760, 7)


Unnamed: 0,Postal code,Postal code Latitude,Postal code Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M5A,43.650964,-79.353041,Souk Tabule,43.653756,-79.35439,Mediterranean Restaurant
1,M5A,43.650964,-79.353041,Young Centre for the Performing Arts,43.650825,-79.357593,Performing Arts Venue
2,M5A,43.650964,-79.353041,SOMA chocolatemaker,43.650622,-79.358127,Chocolate Shop
3,M5A,43.650964,-79.353041,BATLgrounds,43.647088,-79.351306,Athletics & Sports
4,M5A,43.650964,-79.353041,Cluny Bistro & Boulangerie,43.650565,-79.357843,French Restaurant


In [34]:
# check how many venues were returned for each neighborhood
downtown_toronto_venues.groupby('Postal code').count()

Unnamed: 0_level_0,Postal code Latitude,Postal code Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Postal code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
M4W,4,4,4,4,4,4
M4X,45,45,45,45,45,45
M4Y,50,50,50,50,50,50
M5A,26,26,26,26,26,26
M5B,50,50,50,50,50,50
M5C,50,50,50,50,50,50
M5E,50,50,50,50,50,50
M5G,50,50,50,50,50,50
M5H,50,50,50,50,50,50
M5J,6,6,6,6,6,6


In [35]:
# find out how many unique categories can be curated from all the returned venues
print('There are {} uniques categories.'.format(len(downtown_toronto_venues['Venue Category'].unique())))

There are 164 uniques categories.


In [36]:
# Analyze each postal code

# one hot encoding
downtown_toronto_onehot = pd.get_dummies(downtown_toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add Postal code column back to dataframe
downtown_toronto_onehot['Postal code'] = downtown_toronto_venues['Postal code'] 

# move Postal code column to the first column
fixed_columns = [downtown_toronto_onehot.columns[-1]] + list(downtown_toronto_onehot.columns[:-1])
downtown_toronto_onehot = downtown_toronto_onehot[fixed_columns]

downtown_toronto_onehot.head()

Unnamed: 0,Postal code,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,Bagel Shop,...,Theme Park,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M5A,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Group rows by postal code and by taking the mean of the frequency of occurrence of each category

In [37]:
downtown_toronto_grouped = downtown_toronto_onehot.groupby('Postal code').mean().reset_index()
downtown_toronto_grouped

Unnamed: 0,Postal code,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,Bagel Shop,...,Theme Park,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,M4W,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M4X,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M4Y,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,...,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02
3,M5A,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,...,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M5B,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M5C,0.02,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0
6,M5E,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0
7,M5G,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0
8,M5H,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0
9,M5J,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Print each neighborhood along with the top 5 most common venues

In [39]:
num_top_venues = 5

for post_code in downtown_toronto_grouped['Postal code']:
    print("----"+post_code+"----")
    temp = downtown_toronto_grouped[downtown_toronto_grouped['Postal code'] == post_code].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----M4W----
                 venue  freq
0           Playground  0.25
1                 Park  0.25
2        Grocery Store  0.25
3          Candy Store  0.25
4  American Restaurant  0.00


----M4X----
         venue  freq
0  Coffee Shop  0.07
1   Restaurant  0.07
2       Bakery  0.04
3         Café  0.04
4          Pub  0.04


----M4Y----
                 venue  freq
0          Coffee Shop  0.06
1           Restaurant  0.06
2  Japanese Restaurant  0.06
3          Men's Store  0.04
4         Dance Studio  0.04


----M5A----
                     venue  freq
0                      Pub  0.12
1       Athletics & Sports  0.08
2              Coffee Shop  0.08
3                     Café  0.08
4  Health & Beauty Service  0.04


----M5B----
                       venue  freq
0                Coffee Shop  0.06
1                       Café  0.06
2           Ramen Restaurant  0.04
3  Middle Eastern Restaurant  0.04
4                 Restaurant  0.04


----M5C----
            venue  freq
0           

## Function to sort the venues in descending order

In [40]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

## create the new dataframe and display the top 10 venues for each neighborhood

In [50]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postal code']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
postal_code_venues_sorted = pd.DataFrame(columns=columns)
postal_code_venues_sorted['Postal code'] = downtown_toronto_grouped['Postal code']

for ind in np.arange(downtown_toronto_grouped.shape[0]):
    postal_code_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_toronto_grouped.iloc[ind, :], num_top_venues)

postal_code_venues_sorted

Unnamed: 0,Postal code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4W,Park,Grocery Store,Playground,Candy Store,Distribution Center,Farmers Market,Farm,Ethiopian Restaurant,Electronics Store,Dumpling Restaurant
1,M4X,Restaurant,Coffee Shop,Pizza Place,Café,Pub,Bakery,Italian Restaurant,Jewelry Store,Deli / Bodega,Japanese Restaurant
2,M4Y,Coffee Shop,Restaurant,Japanese Restaurant,Men's Store,Dance Studio,Sushi Restaurant,Gastropub,Burger Joint,Ramen Restaurant,Pub
3,M5A,Pub,Athletics & Sports,Coffee Shop,Café,Music Venue,Bakery,Seafood Restaurant,Chocolate Shop,Distribution Center,Intersection
4,M5B,Café,Coffee Shop,Ramen Restaurant,Japanese Restaurant,Clothing Store,Theater,Tea Room,Restaurant,Middle Eastern Restaurant,American Restaurant
5,M5C,Café,Gastropub,Coffee Shop,Hotel,Seafood Restaurant,Cosmetics Shop,Creperie,Farmers Market,Food Truck,Ice Cream Shop
6,M5E,Coffee Shop,Cocktail Bar,Seafood Restaurant,Farmers Market,Cheese Shop,Beer Bar,Restaurant,Bakery,Café,Molecular Gastronomy Restaurant
7,M5G,Coffee Shop,Japanese Restaurant,Clothing Store,Restaurant,Middle Eastern Restaurant,Bubble Tea Shop,Breakfast Spot,Plaza,Gastropub,Ramen Restaurant
8,M5H,Coffee Shop,Café,American Restaurant,Seafood Restaurant,Steakhouse,Restaurant,Bookstore,New American Restaurant,Opera House,Cosmetics Shop
9,M5J,Theme Park,Harbor / Marina,Park,Fast Food Restaurant,Farm,Distribution Center,Farmers Market,Ethiopian Restaurant,Electronics Store,Dumpling Restaurant


# Cluster Postal Codes

## Run k-means to cluster the neighborhood into 3 clusters

In [51]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 3

downtown_toronto_grouped_clustering = downtown_toronto_grouped.drop('Postal code', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 2])

In [52]:
# add clustering labels
postal_code_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

downtown_toronto_merged = downtown_toronto

# merge downtown_toronto_grouped with downtown_toronto to add latitude/longitude for each postal code
downtown_toronto_merged = downtown_toronto_merged.join(postal_code_venues_sorted.set_index('Postal code'), on='Postal code')

downtown_toronto_merged

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.651,-79.353,0,Pub,Athletics & Sports,Coffee Shop,Café,Music Venue,Bakery,Seafood Restaurant,Chocolate Shop,Distribution Center,Intersection
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.6618,-79.3894,0,Coffee Shop,Sushi Restaurant,Café,Italian Restaurant,Spa,Burger Joint,Sandwich Place,Burrito Place,Juice Bar,Yoga Studio
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6575,-79.3775,0,Café,Coffee Shop,Ramen Restaurant,Japanese Restaurant,Clothing Store,Theater,Tea Room,Restaurant,Middle Eastern Restaurant,American Restaurant
15,M5C,Downtown Toronto,St. James Town,43.6517,-79.3756,0,Café,Gastropub,Coffee Shop,Hotel,Seafood Restaurant,Cosmetics Shop,Creperie,Farmers Market,Food Truck,Ice Cream Shop
20,M5E,Downtown Toronto,Berczy Park,43.6452,-79.3739,0,Coffee Shop,Cocktail Bar,Seafood Restaurant,Farmers Market,Cheese Shop,Beer Bar,Restaurant,Bakery,Café,Molecular Gastronomy Restaurant
24,M5G,Downtown Toronto,Central Bay Street,43.6561,-79.3857,0,Coffee Shop,Japanese Restaurant,Clothing Store,Restaurant,Middle Eastern Restaurant,Bubble Tea Shop,Breakfast Spot,Plaza,Gastropub,Ramen Restaurant
25,M6G,Downtown Toronto,Christie,43.6686,-79.4204,0,Grocery Store,Café,Park,Coffee Shop,Candy Store,Athletics & Sports,Baby Store,Playground,Farmers Market,Farm
30,M5H,Downtown Toronto,"Richmond , Adelaide , King",43.6505,-79.3841,0,Coffee Shop,Café,American Restaurant,Seafood Restaurant,Steakhouse,Restaurant,Bookstore,New American Restaurant,Opera House,Cosmetics Shop
36,M5J,Downtown Toronto,"Harbourfront East , Union Station , Toronto Is...",43.6238,-79.3692,2,Theme Park,Harbor / Marina,Park,Fast Food Restaurant,Farm,Distribution Center,Farmers Market,Ethiopian Restaurant,Electronics Store,Dumpling Restaurant
42,M5K,Downtown Toronto,"Toronto Dominion Centre , Design Exchange",43.6469,-79.3816,0,Café,Coffee Shop,Hotel,Seafood Restaurant,Restaurant,Japanese Restaurant,American Restaurant,Pizza Place,Sandwich Place,Salad Place


In [55]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_toronto_merged['Latitude'], downtown_toronto_merged['Longitude'], downtown_toronto_merged['Postal code'], downtown_toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

### 1st Cluster

In [56]:
downtown_toronto_merged.loc[downtown_toronto_merged['Cluster Labels'] == 0, downtown_toronto_merged.columns[[1] + list(range(5, downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,0,Pub,Athletics & Sports,Coffee Shop,Café,Music Venue,Bakery,Seafood Restaurant,Chocolate Shop,Distribution Center,Intersection
4,Downtown Toronto,0,Coffee Shop,Sushi Restaurant,Café,Italian Restaurant,Spa,Burger Joint,Sandwich Place,Burrito Place,Juice Bar,Yoga Studio
9,Downtown Toronto,0,Café,Coffee Shop,Ramen Restaurant,Japanese Restaurant,Clothing Store,Theater,Tea Room,Restaurant,Middle Eastern Restaurant,American Restaurant
15,Downtown Toronto,0,Café,Gastropub,Coffee Shop,Hotel,Seafood Restaurant,Cosmetics Shop,Creperie,Farmers Market,Food Truck,Ice Cream Shop
20,Downtown Toronto,0,Coffee Shop,Cocktail Bar,Seafood Restaurant,Farmers Market,Cheese Shop,Beer Bar,Restaurant,Bakery,Café,Molecular Gastronomy Restaurant
24,Downtown Toronto,0,Coffee Shop,Japanese Restaurant,Clothing Store,Restaurant,Middle Eastern Restaurant,Bubble Tea Shop,Breakfast Spot,Plaza,Gastropub,Ramen Restaurant
25,Downtown Toronto,0,Grocery Store,Café,Park,Coffee Shop,Candy Store,Athletics & Sports,Baby Store,Playground,Farmers Market,Farm
30,Downtown Toronto,0,Coffee Shop,Café,American Restaurant,Seafood Restaurant,Steakhouse,Restaurant,Bookstore,New American Restaurant,Opera House,Cosmetics Shop
42,Downtown Toronto,0,Café,Coffee Shop,Hotel,Seafood Restaurant,Restaurant,Japanese Restaurant,American Restaurant,Pizza Place,Sandwich Place,Salad Place
48,Downtown Toronto,0,Café,Hotel,Coffee Shop,Restaurant,Gastropub,Gym,Japanese Restaurant,Deli / Bodega,American Restaurant,New American Restaurant


### 2nd Cluster

In [57]:
downtown_toronto_merged.loc[downtown_toronto_merged['Cluster Labels'] == 1, downtown_toronto_merged.columns[[1] + list(range(5, downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
91,Downtown Toronto,1,Park,Grocery Store,Playground,Candy Store,Distribution Center,Farmers Market,Farm,Ethiopian Restaurant,Electronics Store,Dumpling Restaurant


### 3rd Cluster

In [59]:
downtown_toronto_merged.loc[downtown_toronto_merged['Cluster Labels'] == 2, downtown_toronto_merged.columns[[1] + list(range(5, downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
36,Downtown Toronto,2,Theme Park,Harbor / Marina,Park,Fast Food Restaurant,Farm,Distribution Center,Farmers Market,Ethiopian Restaurant,Electronics Store,Dumpling Restaurant
