In [1]:
import json
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize

# ! pip install geocoder
import geocoder

from sklearn.cluster import KMeans

import matplotlib.cm as cm
import matplotlib.colors as colors

from bs4 import BeautifulSoup
from urllib.request import urlopen

# ! pip install folium
import folium

# ! pip install geopy
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

## 1. Scrape and Explore Dataset

In [2]:
postal_codes_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

#### Let create a soup of the wikipedia page

In [3]:
with urlopen(postal_codes_url) as html_doc:
    soup = BeautifulSoup(html_doc.read(), "lxml")

#### Let scrape for the zip codes table 

In [4]:
tr = soup.table.find('tr')
df_array = {th.text.strip():[] for th in tr.find_all('th') }

for tr in soup.table.find_all('tr'):
    for key, td in zip(df_array.keys(), tr.find_all('td')):
        df_array[key].append(td.text.strip())
df = pd.DataFrame(df_array)

In [5]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


#### Replace "Not assigned" with '' (empty string)

In [6]:
df.replace(to_replace=['Not assigned'], value=[''], inplace=True)

#### Ignoring cells with Borough being '' (empty string)

In [7]:
df = df[df.Borough != '']

#### Assigning Null Neighbourhood same as Borough

In [8]:
def fill_neighbourhood(x):
    if x.Neighbourhood == '':
        return x.Borough
    return x.Neighbourhood
df["Neighbourhood"] = df.apply(lambda x: fill_neighbourhood(x), axis=1)

In [9]:
df = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(lambda x: ", ".join(x)).to_frame().reset_index()

In [10]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


In [11]:
df.shape

(103, 3)

#### Load the geospatial coordinates data

In [12]:
csv_file_name = "Geospatial_Coordinates.csv"
coordinates = pd.read_csv(csv_file_name, index_col="Postal Code")
coordinates.at["M1B", "Latitude"]
coordinates.at["M1B", "Longitude"]

43.806686299999996

-79.19435340000001

#### Add latitude and longitude from coordinates dataframe

In [13]:
df["Latitude"] = df["Postcode"].apply(lambda x: coordinates.at[x, "Latitude"])
df["Longitude"] = df["Postcode"].apply(lambda x: coordinates.at[x, "Longitude"])

In [14]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.688905,-79.554724
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437


#### Filter data of Toronto Borough

In [15]:
toronto_data = df[df.Borough.str.contains('Toronto')]
toronto_data

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
45,M4P,Central Toronto,Davisville North,43.712751,-79.390197
46,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
47,M4S,Central Toronto,Davisville,43.704324,-79.38879
48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
49,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049


#### Find coordinates of Toronto

In [16]:
toronto_address = "Toronto, Ontario"

geolocator = Nominatim(user_agent='toronto_explorer')
location = geolocator.geocode(toronto_address)
tor_lat, tor_long = location.latitude, location.longitude
print("The geographical coordinates of Toronto are", tor_lat, tor_long)

The geographical coordinates of Toronto are 43.653963 -79.387207


In [17]:
toronto_map = folium.Map(location=[tor_lat, tor_long], zoom_start=10)

for ind, row in df.iterrows():
    label = "Neighbourhood: {} \n Borough: {}".format(row["Neighbourhood"], row["Borough"])
    label = folium.Popup(label, parse_html=True)
    marker = folium.CircleMarker(
        [row['Latitude'], row['Longitude']],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=True).add_to(toronto_map)
toronto_map

#### Define Foursquare Credentials and Version

In [18]:
CLIENT_ID = 'LZU2SDUI4CPSWU3QS4C52R4RQONSY4G24B0ZWUUTSWMA0OYY' # your Foursquare ID
CLIENT_SECRET = 'GXRV02ZPMQMNJKXTIH0XU3RKCW2CVQAOI3SJ3TKAX2PO0VEF' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: LZU2SDUI4CPSWU3QS4C52R4RQONSY4G24B0ZWUUTSWMA0OYY
CLIENT_SECRET:GXRV02ZPMQMNJKXTIH0XU3RKCW2CVQAOI3SJ3TKAX2PO0VEF


In [19]:
LIMIT = 100

radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    tor_lat, 
    tor_long, 
    radius, 
    LIMIT)

In [20]:
with urlopen(url) as req:
    res = req.read()

In [21]:
results = json.loads(res.decode())
results

{'meta': {'code': 200, 'requestId': '5e53636c60ba08001b7c85d0'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Bay Street Corridor',
  'headerFullLocation': 'Bay Street Corridor, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 72,
  'suggestedBounds': {'ne': {'lat': 43.6584630045, 'lng': -79.38099903084075},
   'sw': {'lat': 43.649462995499995, 'lng': -79.39341496915925}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5227bb01498e17bf485e6202',
       'name': 'Downtown Toronto',
       'location': {'lat': 43.65323167517444,
        'lng': -79.38529600606677,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.65323167517444,
          

### the get_category_type from the foursquare lab

In [22]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [23]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Downtown Toronto,Neighborhood,43.653232,-79.385296
1,Japango,Sushi Restaurant,43.655268,-79.385165
2,Sansotei Ramen 三草亭,Ramen Restaurant,43.655157,-79.386501
3,Poke Guys,Poke Place,43.654895,-79.385052
4,Rolltation,Japanese Restaurant,43.654918,-79.387424


In [24]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

72 venues were returned by Foursquare.


## 2. Explore Neighbourhoods of Toronto

In [25]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        with urlopen(url) as req:
            res = req.read()
        results = json.loads(res.decode())
        results = results["response"]['groups'][0]['items']
        #results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighbourhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )


The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The Junction Sout

In [27]:
print(toronto_venues.shape)

(1703, 7)


In [28]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,57,57,57,57,57,57
"Brockton, Exhibition Place, Parkdale Village",24,24,24,24,24,24
Business Reply Mail Processing Centre 969 Eastern,16,16,16,16,16,16
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",17,17,17,17,17,17
"Cabbagetown, St. James Town",44,44,44,44,44,44
Central Bay Street,80,80,80,80,80,80
"Chinatown, Grange Park, Kensington Market",84,84,84,84,84,84
Christie,19,19,19,19,19,19
Church and Wellesley,83,83,83,83,83,83


#### Let's find out how many unique categories can be curated from all the returned venues

In [29]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 231 uniques categories.


## 3. Analyze Each Neighborhood

In [30]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


And let's examine the new dataframe size.

In [31]:
toronto_onehot.shape

(1703, 231)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [32]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.0125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0125,...,0.0,0.0,0.0,0.0,0.0125,0.0,0.0,0.0125,0.0,0.0
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.035714,0.0,0.059524,0.011905,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.012048,0.012048,0.0,0.0,0.0,0.0,0.0,0.0,0.012048,...,0.0,0.0,0.0,0.0,0.0,0.0,0.012048,0.0,0.012048,0.0


#### Let's confirm the new size

In [33]:
toronto_grouped.shape

(39, 231)

#### Let's print each neighborhood along with the top 5 most common venues

In [34]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
             venue  freq
0      Coffee Shop  0.06
1             Café  0.05
2  Thai Restaurant  0.04
3       Steakhouse  0.04
4              Bar  0.04


----Berczy Park----
                venue  freq
0         Coffee Shop  0.07
1              Bakery  0.04
2        Cocktail Bar  0.04
3  Seafood Restaurant  0.04
4         Cheese Shop  0.04


----Brockton, Exhibition Place, Parkdale Village----
                 venue  freq
0                 Café  0.12
1          Coffee Shop  0.08
2       Breakfast Spot  0.08
3                  Gym  0.04
4  Japanese Restaurant  0.04


----Business Reply Mail Processing Centre 969 Eastern----
                  venue  freq
0    Light Rail Station  0.12
1           Pizza Place  0.06
2      Recording Studio  0.06
3  Fast Food Restaurant  0.06
4               Brewery  0.06


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
              venue  freq
0   Airport Ser

#### Let's put that into a *pandas* dataframe

First, let's write a function to sort the venues in descending order.

In [35]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [36]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Bar,Steakhouse,Thai Restaurant,Burger Joint,Restaurant,Bakery,Sushi Restaurant,Concert Hall
1,Berczy Park,Coffee Shop,Café,Bakery,Farmers Market,Cocktail Bar,Steakhouse,Beer Bar,Cheese Shop,Seafood Restaurant,Concert Hall
2,"Brockton, Exhibition Place, Parkdale Village",Café,Breakfast Spot,Coffee Shop,Grocery Store,Pet Store,Stadium,Burrito Place,Restaurant,Climbing Gym,Nightclub
3,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Brewery,Butcher,Restaurant,Burrito Place,Auto Workshop,Fast Food Restaurant,Farmers Market,Recording Studio,Garden
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Terminal,Airport Lounge,Coffee Shop,Sculpture Garden,Plane,Harbor / Marina,Boat or Ferry,Airport Gate,Airport Food Court


<a id='item4'></a>

## 4. Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [37]:
# set number of clusters
kclusters = 3

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [38]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,M4E,East Toronto,The Beaches,43.676357,-79.293031,2,Health Food Store,Other Great Outdoors,Pub,Trail,Discount Store,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Women's Store
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,2,Greek Restaurant,Italian Restaurant,Coffee Shop,Bookstore,Ice Cream Shop,Furniture / Home Store,Grocery Store,Fruit & Vegetable Store,Pub,Pizza Place
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,2,Park,Board Shop,Sushi Restaurant,Sandwich Place,Brewery,Liquor Store,Burger Joint,Italian Restaurant,Burrito Place,Fish & Chips Shop
43,M4M,East Toronto,Studio District,43.659526,-79.340923,2,Café,Coffee Shop,Gastropub,American Restaurant,Brewery,Bakery,Italian Restaurant,Latin American Restaurant,Gay Bar,Seafood Restaurant
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,1,Bus Line,Park,Swim School,Women's Store,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run


Finally, let's visualize the resulting clusters

In [39]:
# create map
map_clusters = folium.Map(location=[tor_lat, tor_long], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    marker = folium.CircleMarker(
        [tor_lat, tor_long],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<a id='item5'></a>

## 5. Examine Clusters

Now, you can examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, you can then assign a name to each cluster. I will leave this exercise to you.

#### Cluster 1

In [40]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
63,Central Toronto,0,Ice Cream Shop,Garden,Dance Studio,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store


#### Cluster 2

In [41]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
44,Central Toronto,1,Bus Line,Park,Swim School,Women's Store,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run
48,Central Toronto,1,Park,Women's Store,Deli / Bodega,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store
50,Downtown Toronto,1,Park,Playground,Trail,Women's Store,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run


#### Cluster 3

In [42]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,East Toronto,2,Health Food Store,Other Great Outdoors,Pub,Trail,Discount Store,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Women's Store
41,East Toronto,2,Greek Restaurant,Italian Restaurant,Coffee Shop,Bookstore,Ice Cream Shop,Furniture / Home Store,Grocery Store,Fruit & Vegetable Store,Pub,Pizza Place
42,East Toronto,2,Park,Board Shop,Sushi Restaurant,Sandwich Place,Brewery,Liquor Store,Burger Joint,Italian Restaurant,Burrito Place,Fish & Chips Shop
43,East Toronto,2,Café,Coffee Shop,Gastropub,American Restaurant,Brewery,Bakery,Italian Restaurant,Latin American Restaurant,Gay Bar,Seafood Restaurant
45,Central Toronto,2,Park,Department Store,Gym,Breakfast Spot,Dog Run,Sandwich Place,Food & Drink Shop,Hotel,Eastern European Restaurant,Dumpling Restaurant
46,Central Toronto,2,Clothing Store,Coffee Shop,Sporting Goods Shop,Gym / Fitness Center,Fast Food Restaurant,Diner,Dessert Shop,Mexican Restaurant,Cosmetics Shop,Park
47,Central Toronto,2,Dessert Shop,Sandwich Place,Italian Restaurant,Café,Pizza Place,Coffee Shop,Sushi Restaurant,Gym,Farmers Market,Indoor Play Area
49,Central Toronto,2,Coffee Shop,Pub,Pizza Place,Sushi Restaurant,Sports Bar,Fried Chicken Joint,Restaurant,American Restaurant,Liquor Store,Vietnamese Restaurant
51,Downtown Toronto,2,Coffee Shop,Italian Restaurant,Pizza Place,Park,Café,Bakery,Restaurant,Pub,Breakfast Spot,Snack Place
52,Downtown Toronto,2,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Restaurant,Fast Food Restaurant,Pub,Men's Store,Mediterranean Restaurant,Café
