# Coursera_Capstone - Week 3 peer reviewed assignment

# **Part 1 - Create data frame**

In [61]:
#!pip install bs4
from bs4 import BeautifulSoup # this module helps in web scrapping.
!pip install requests
import requests  # this module helps us to download a web page
import pandas as pd



In [62]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [63]:
data = requests.get(url).text

In [64]:
soup = BeautifulSoup(data, 'html5lib')

In [65]:

table_contents=[]
table=soup.find('table')
table
for row in table.findAll('td'):
    cell = {}
    if row.span.text == 'Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighbourhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
print(df.shape)
df.head()

(103, 3)


Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


- Data contains some rows where value are not assigned.
- Dropping the row where Borough is Not assigned (only).

In [66]:
df = df[df['Borough'] != 'Not assigned'].reset_index(drop = True)
print('After dropping rows where borough is "Not assigned", Shape is: ',df.shape)
print('Number of rows where Neighbourhood is "Not assigned" but borough has value: ', 
      df[df['Neighbourhood'] == 'Not assigned'].shape[0])

After dropping rows where borough is "Not assigned", Shape is:  (103, 3)
Number of rows where Neighbourhood is "Not assigned" but borough has value:  0


- There is only one row where Neighbourhood is Not assigned but Borough is assigned.
- Where Borough is not 'Not assigned' (means there a borugh) and Neighbourhood is 'Not assigned', borough will be the neighbourhood.

In [67]:
p, b, n = [], [], []
for postcode, borough, neigh in zip(df['PostalCode'], df['Borough'], df['Neighbourhood']):
    p.append(postcode)
    b.append(borough)
    if neigh == 'Not assigned':
        n.append(borough)
    else:
        n.append(neigh)

columns = ['PostalCode', 'Borough', 'Neighbourhood']
        
df = pd.DataFrame({'PostalCode': p, 'Borough': b, 'Neighbourhood':n})[columns]
print(df.shape)
df.head()

(103, 3)


Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


# **Part 2 - get the latitude and the longitude coordinates of each neighborhood**

We will utilize the Geocoder module to retrieve the coordinate for each postal code. Let's install the package and import the module.

In [73]:
!pip install geocoder
import geocoder

import numpy as np
import matplotlib.pyplot as plt
%config IPCompleter.greedy=True
%config IPCompleter.use_jedi=False



We will use a geocoder method called .arcgis() to retrieve the coordinate.

Wikipedia: ArcGIS is a geographic information system for working with maps and geographic information maintained by the Environmental Systems Research Institute

Let's try one postal code and see the result:

In [70]:
coord = geocoder.arcgis('M3A, Toronto, Ontario')
coord.latlng

[43.75245000000007, -79.32990999999998]

Let's loop over each postal code and create a new coordinate dataframe

In [71]:
coord_df = pd.DataFrame(columns=['postal_code','latitude', 'longitude'])
coord_df.head()

Unnamed: 0,postal_code,latitude,longitude


In [75]:
coordinate = None
for poscode in df.PostalCode.to_list():
    # print(poscode)
    while (coordinate is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(poscode))
        coordinate = g.latlng
        # print(coordinate)
       
    coord_df = coord_df.append({'postal_code': poscode, 
                                'latitude': coordinate[0], 
                                'longitude': coordinate[1]}, ignore_index=True)
    coordinate = None

coord_df.head()

Unnamed: 0,postal_code,latitude,longitude
0,M3A,43.75245,-79.32991
1,M4A,43.73057,-79.31306
2,M5A,43.65512,-79.36264
3,M6A,43.72327,-79.45042
4,M7A,43.66253,-79.39188


In [76]:
coord_df.tail()

Unnamed: 0,postal_code,latitude,longitude
98,M8X,43.65319,-79.51113
99,M4Y,43.66659,-79.38133
100,M7Y,43.64869,-79.38544
101,M8Y,43.63278,-79.48945
102,M8Z,43.62513,-79.52681


We now will combine data frames

In [79]:
toronto_cdf = pd.concat([df, coord_df[['latitude', 'longitude']]], axis=1)

In [80]:
toronto_cdf

Unnamed: 0,PostalCode,Borough,Neighbourhood,latitude,longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Queen's Park,Ontario Provincial Government,43.66253,-79.39188
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.65319,-79.51113
99,M4Y,Downtown Toronto,Church and Wellesley,43.66659,-79.38133
100,M7Y,East Toronto Business,Enclave of M4L,43.64869,-79.38544
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.63278,-79.48945


# **Part 3 - Explore and cluster the neighborhoods in Toronto**

In [134]:

!conda install -c conda-forge geocoder --yes
import geocoder
from geopy.geocoders import Nominatim 

address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\hp\anaconda3

  added / updated specs:
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geocoder-1.38.1            |             py_1          53 KB  conda-forge
    ratelim-0.1.6              |             py_2           6 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          59 KB

The following NEW packages will be INSTALLED:

  geocoder           conda-forge/noarch::geocoder-1.38.1-py_1
  ratelim            conda-forge/noarch::ratelim-0.1.6-py_2



Downloading and Extracting Packages

ratelim-0.1.6        | 6 KB      |            |   0% 
ratelim-0.1.6        | 6 KB      | ########## | 100% 
ratelim-0.1.6        | 6 KB   

Print the map with all the boroughs

In [145]:
import folium

# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)


# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_cdf['latitude'], toronto_cdf['longitude'], toronto_cdf['Borough'], toronto_cdf['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        ).add_to(map_Toronto)  
    
map_Toronto

# **Making Clusters for Neighborhood**

In [146]:
CLIENT_ID = 'EGZOACTZT1IS30QWSNMDXU4TFEZIMD4M300ZMCNGDJR1BV5B' # your Foursquare ID
CLIENT_SECRET = 'VKNA5A3EDU2PSACP11L21QD4MDI5QZI4MD4HWII02B2MGBIU' # your Foursquare Secret
VERSION = '20180604'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: EGZOACTZT1IS30QWSNMDXU4TFEZIMD4M300ZMCNGDJR1BV5B
CLIENT_SECRET:VKNA5A3EDU2PSACP11L21QD4MDI5QZI4MD4HWII02B2MGBIU


Explore the data, and get the venues in 500 meters range from our first entry

In [149]:
neighborhood_latitude = toronto_cdf.loc[0, 'latitude'] # neighborhood latitude value
neighborhood_longitude = toronto_cdf.loc[0, 'longitude'] # neighborhood longitude value

neighborhood_name = toronto_cdf.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.75245000000007, -79.32990999999998.


Create the GET request URL

In [150]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=EGZOACTZT1IS30QWSNMDXU4TFEZIMD4M300ZMCNGDJR1BV5B&client_secret=VKNA5A3EDU2PSACP11L21QD4MDI5QZI4MD4HWII02B2MGBIU&v=20180604&ll=43.75245000000007,-79.32990999999998&radius=500&limit=100'

In [151]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '606f2b13dfffa92aad4862e3'},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 3,
  'suggestedBounds': {'ne': {'lat': 43.75695000450007,
    'lng': -79.32369182386579},
   'sw': {'lat': 43.747949995500065, 'lng': -79.33612817613418}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 186,
        'cc': 'CA',
      

In [153]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [155]:
import json
from pandas.io.json import json_normalize

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,Variety Store,Food & Drink Shop,43.751974,-79.333114
2,Corrosion Service Company Limited,Construction & Landscaping,43.752432,-79.334661


Generalize to obtain the venues from all neighbourhoods in Toronto

In [157]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [161]:
toronto_venues = getNearbyVenues(names=toronto_cdf['Neighbourhood'],
                                   latitudes=toronto_cdf['latitude'],
                                   longitudes=toronto_cdf['longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Ontario Provincial Government
Islington Avenue
Malvern, Rouge
Don Mills North
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
The Danforth  East
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview East
The Danforth

check the size of the dataframe

In [162]:
print(toronto_venues.shape)
toronto_venues.head()

(2370, 7)


Unnamed: 0,Neighbourhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.75245,-79.32991,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.75245,-79.32991,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Parkwoods,43.75245,-79.32991,Corrosion Service Company Limited,43.752432,-79.334661,Construction & Landscaping
3,Victoria Village,43.73057,-79.31306,Stellar's Lenonade Stand,43.731507,-79.311712,Food Stand
4,Victoria Village,43.73057,-79.31306,Wigmore Park,43.731023,-79.310771,Park


In [163]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,12,12,12,12,12,12
"Alderwood, Long Branch",4,4,4,4,4,4
Bayview Village,5,5,5,5,5,5
"Bedford Park, Lawrence Manor East",23,23,23,23,23,23
Berczy Park,66,66,66,66,66,66
...,...,...,...,...,...,...
"Willowdale, Newtonbrook",21,21,21,21,21,21
Woburn,4,4,4,4,4,4
Woodbine Heights,20,20,20,20,20,20
York Mills West,4,4,4,4,4,4


In [164]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,ATM,Accessories Store,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [165]:
toronto_onehot.shape

(2370, 265)

In [167]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighbourhood,ATM,Accessories Store,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Berczy Park,0.0,0.0,0.0,0.015152,0.0,0.015152,0.0,0.0,0.0,...,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152


In [169]:
toronto_grouped.shape

(99, 265)

Get the top 10 for each neighbourhood

In [170]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [171]:
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Bakery,Hong Kong Restaurant,Sushi Restaurant,Supermarket,Badminton Court,Department Store,Newsagent,Shopping Mall,Vietnamese Restaurant
1,"Alderwood, Long Branch",Gym,Performing Arts Venue,Convenience Store,Pub,Yoga Studio,Electronics Store,Doctor's Office,Dog Run,Donut Shop,Dumpling Restaurant
2,Bayview Village,Trail,Construction & Landscaping,Park,Golf Driving Range,Yoga Studio,Doctor's Office,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant
3,"Bedford Park, Lawrence Manor East",Sandwich Place,Coffee Shop,Italian Restaurant,Greek Restaurant,Thai Restaurant,Fast Food Restaurant,Liquor Store,Butcher,Café,Sports Club
4,Berczy Park,Coffee Shop,Seafood Restaurant,Cocktail Bar,Bakery,Breakfast Spot,Beer Bar,Farmers Market,Pharmacy,Restaurant,Cheese Shop


**Custer neighbourhoods**

In [172]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 2, 4, 0, 0, 0, 0, 0, 0, 0])

In [179]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_cdf

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.75245,-79.32991,4.0,Construction & Landscaping,Park,Food & Drink Shop,Yoga Studio,Escape Room,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
1,M4A,North York,Victoria Village,43.73057,-79.31306,0.0,Nail Salon,Food Stand,Grocery Store,Park,Doctor's Office,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264,0.0,Coffee Shop,Breakfast Spot,Pub,Spa,Event Space,Electronics Store,Food Truck,Distribution Center,Restaurant,Italian Restaurant
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042,0.0,Clothing Store,Furniture / Home Store,Men's Store,Coffee Shop,Bookstore,Cosmetics Shop,Women's Store,Food Court,Mediterranean Restaurant,Juice Bar
4,M7A,Queen's Park,Ontario Provincial Government,43.66253,-79.39188,0.0,Coffee Shop,Italian Restaurant,Gastropub,Burrito Place,Park,Falafel Restaurant,Sandwich Place,Café,Fried Chicken Joint,Bank


In [180]:
toronto_merged[toronto_merged['Cluster Labels'].isnull()]

Unnamed: 0,PostalCode,Borough,Neighbourhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
28,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.75788,-79.44847,,,,,,,,,,,
62,M5N,Central Toronto,Roselawn,43.71208,-79.41848,,,,,,,,,,,
95,M1X,Scarborough,Upper Rouge,43.83412,-79.21668,,,,,,,,,,,
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.65319,-79.51113,,,,,,,,,,,


**Plot the clusters in the map**

In [182]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

toronto_merged_nonan = toronto_merged.dropna(subset=['Cluster Labels'])

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged_nonan['latitude'], toronto_merged_nonan['longitude'], toronto_merged_nonan['Neighbourhood'], toronto_merged_nonan['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

**Examine clusters**

**Cluster 1**

In [183]:
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 0, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,0.0,Nail Salon,Food Stand,Grocery Store,Park,Doctor's Office,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
2,Downtown Toronto,0.0,Coffee Shop,Breakfast Spot,Pub,Spa,Event Space,Electronics Store,Food Truck,Distribution Center,Restaurant,Italian Restaurant
3,North York,0.0,Clothing Store,Furniture / Home Store,Men's Store,Coffee Shop,Bookstore,Cosmetics Shop,Women's Store,Food Court,Mediterranean Restaurant,Juice Bar
4,Queen's Park,0.0,Coffee Shop,Italian Restaurant,Gastropub,Burrito Place,Park,Falafel Restaurant,Sandwich Place,Café,Fried Chicken Joint,Bank
5,Etobicoke,0.0,Pharmacy,Skating Rink,Café,Bank,Shopping Mall,Grocery Store,Park,Event Space,Ethiopian Restaurant,Escape Room
...,...,...,...,...,...,...,...,...,...,...,...,...
97,Downtown Toronto,0.0,Coffee Shop,Hotel,Café,Restaurant,Gym,Asian Restaurant,Salad Place,Seafood Restaurant,Japanese Restaurant,Deli / Bodega
99,Downtown Toronto,0.0,Coffee Shop,Japanese Restaurant,Restaurant,Sushi Restaurant,Dance Studio,Men's Store,Mediterranean Restaurant,Café,Pub,Gay Bar
100,East Toronto Business,0.0,Coffee Shop,Hotel,Café,Salon / Barbershop,Bar,Steakhouse,Gym,Seafood Restaurant,Sandwich Place,Salad Place
101,Etobicoke,0.0,Fast Food Restaurant,Flower Shop,Italian Restaurant,Sushi Restaurant,Coffee Shop,Bank,Park,Falafel Restaurant,Farm,Event Space


**Cluster 2**

In [184]:
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 1, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
27,North York,1.0,Park,Residential Building (Apartment / Condo),Yoga Studio,Electronics Store,Doctor's Office,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Escape Room
35,East York/East Toronto,1.0,Intersection,Park,Yoga Studio,Escape Room,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
53,North York,1.0,Park,Home Service,Yoga Studio,Escape Room,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space
73,Central Toronto,1.0,Gym Pool,Park,Playground,Electronics Store,Doctor's Office,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Ethiopian Restaurant
91,Downtown Toronto,1.0,Park,Playground,Bike Trail,Yoga Studio,Escape Room,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store


**Cluster 3**

In [185]:
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 2, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
66,North York,2.0,Speakeasy,Park,Convenience Store,Korean Restaurant,Escape Room,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
67,Central Toronto,2.0,Gym / Fitness Center,Gym,Park,Convenience Store,Playground,Breakfast Spot,Hotel,Department Store,Food & Drink Shop,Electronics Store
69,West Toronto,2.0,Sandwich Place,Park,Convenience Store,Residential Building (Apartment / Condo),Yoga Studio,Eastern European Restaurant,Doctor's Office,Dog Run,Donut Shop,Dumpling Restaurant
71,Scarborough,2.0,Convenience Store,Auto Garage,Yoga Studio,Ethiopian Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Escape Room,Event Space
83,Central Toronto,2.0,Gym,Convenience Store,Park,Yoga Studio,Electronics Store,Doctor's Office,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant
93,Etobicoke,2.0,Gym,Performing Arts Venue,Convenience Store,Pub,Yoga Studio,Electronics Store,Doctor's Office,Dog Run,Donut Shop,Dumpling Restaurant


**Cluster 4**

In [186]:
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 3, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,North York,3.0,Park,Yoga Studio,Escape Room,Doctor's Office,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
68,Central Toronto,3.0,Park,Yoga Studio,Escape Room,Doctor's Office,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant


**Cluster 5**

In [188]:
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 4, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,4.0,Construction & Landscaping,Park,Food & Drink Shop,Yoga Studio,Escape Room,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
6,Scarborough,4.0,Construction & Landscaping,Fast Food Restaurant,Yoga Studio,Escape Room,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space
12,Scarborough,4.0,Construction & Landscaping,Bar,Yoga Studio,Ethiopian Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Escape Room,Event Space
18,Scarborough,4.0,Gym / Fitness Center,Construction & Landscaping,Park,Bus Stop,Yoga Studio,Electronics Store,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant
39,North York,4.0,Trail,Construction & Landscaping,Park,Golf Driving Range,Yoga Studio,Doctor's Office,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant
50,North York,4.0,Sporting Goods Shop,Construction & Landscaping,Ethiopian Restaurant,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Escape Room,Event Space
