# Segmentation and Clusturing Neighbourhoods in Toronto

## Import Libraries

In [2]:
import pandas as pd
import numpy as np
import json 
#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim 
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

In [4]:
#!conda install -c conda-forge folium --yes
import folium
import requests
import bs4

In [5]:
weblink = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

## Code to scrap wikipedia page

In [6]:
response = requests.get(weblink)

try:
    response.raise_for_status()
    soup_obj = bs4.BeautifulSoup(response.text, 'html.parser')
    
except Exception as exc:
    print('Error while downloading the webpage.. %s' % exc)

## Contents of Postal code

In [7]:
content = soup_obj.find('table', attrs={'class':'wikitable sortable'})
content

<table class="wikitable sortable">
<tbody><tr>
<th>Postcode</th>
<th>Borough</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
</td></tr>
<tr>
<td>M4A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Regent_Park" title="Regent Park">Harbourfront</a>
</td></tr>
<tr>
<td>M6A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Lawrence_Heights" title="Lawrence Heights">Lawrence Heights</a>
</td></tr>
<tr>
<td>M6A</td>
<td><a href="/wiki/North

## Building dataframe

In [8]:
column_name = ['PostalCode', 'Borough', 'Neighborhood'] 
data_frame = pd.DataFrame(columns=column_name)
data_frame

for row in content.findAll('tr'):
    line_item = row.findAll('td')
    if (len(line_item) == 3):
        post_code = line_item[0].text.strip()
        borough = line_item[1].text.strip()
        neighborhood = line_item[2].text.strip()
        data_frame = data_frame.append({'PostalCode': post_code,
                                        'Borough': borough,
                                        'Neighborhood': neighborhood},
                                        ignore_index=True)

## Removing the row of Not assigned Borough

In [9]:
df = data_frame[data_frame['Borough'] != 'Not assigned']
df.head(n=10)

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Etobicoke,Islington Avenue
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


## Not assigned neighbourhood replace with borough name

In [10]:
def set_neighborhood_as_borough(row):
    if row['Neighborhood'] == 'Not assigned':
        row['Neighborhood'] = row['Borough']
    return row
    
df = df.apply(set_neighborhood_as_borough, axis=1)
df.head(n=10)

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Etobicoke,Islington Avenue
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


## Same postal code more than once, assigning neighbourhood with comma

In [11]:
df = df.groupby(['PostalCode', 'Borough'], sort=False)['Neighborhood'].apply(', '.join).reset_index()
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park


## Shape of the data

In [12]:
df.shape

(103, 3)

## Read csv and get latitude and longitude data

In [13]:
coordinate_file = pd.read_csv('http://cocl.us/Geospatial_data')
coordinate_file.columns = ['PostalCode', 'Latitude', 'Longitude']
coordinate_file.head(n=10)

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [14]:
df_toronto = pd.merge(df, coordinate_file, on='PostalCode')
df_toronto.head(n=10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


## Cluster only borough containing Toronto

In [15]:
toronto_borough = df_toronto[df_toronto['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto_borough.head(n=5)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
1,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
2,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [16]:
toronto_borough.shape

(39, 5)

## Foursquare

In [17]:
CLIENT_ID = '3ACP5FJQJV5ZZC00LGS23WTVM4HDA5R3IVOPS3CRU0BAY3RT' # your Foursquare ID
CLIENT_SECRET = 'K5JKZTM3NOYEODXKZSOSZTFNWTVTT3WLAHBHGJVEPNM5ZNH0' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

radius = 500
limit = 100

In [18]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [19]:
toronto_venues = getNearbyVenues(names=toronto_borough['Neighborhood'],
                                 latitudes=toronto_borough['Latitude'],
                                 longitudes=toronto_borough['Longitude']
                                )

Harbourfront
Queen's Park
Ryerson, Garden District
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Adelaide, King, Richmond
Dovercourt Village, Dufferin
Harbourfront East, Toronto Islands, Union Station
Little Portugal, Trinity
The Danforth West, Riverdale
Design Exchange, Toronto Dominion Centre
Brockton, Exhibition Place, Parkdale Village
The Beaches West, India Bazaar
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North, Forest Hill West
High Park, The Junction South
North Toronto West
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
Harbord, University of Toronto
Runnymede, Swansea
Moore Park, Summerhill East
Chinatown, Grange Park, Kensington Market
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown, St. James Town
Fir

In [20]:
print(toronto_venues.shape)
toronto_venues.head(n=10)

(1716, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Harbourfront,43.65426,-79.360636,Cooper Koo Family YMCA,43.653191,-79.357947,Gym / Fitness Center
3,Harbourfront,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Harbourfront,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
5,Harbourfront,43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
6,Harbourfront,43.65426,-79.360636,Corktown Common,43.655618,-79.356211,Park
7,Harbourfront,43.65426,-79.360636,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot
8,Harbourfront,43.65426,-79.360636,The Distillery Historic District,43.650244,-79.359323,Historic Site
9,Harbourfront,43.65426,-79.360636,Distillery Sunday Market,43.650075,-79.361832,Farmers Market


In [21]:
print('The number of popular venues in each borough are as follows:')
toronto_venues.groupby('Neighborhood').count()

The number of popular venues in each borough are as follows:


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,56,56,56,56,56,56
"Brockton, Exhibition Place, Parkdale Village",25,25,25,25,25,25
Business Reply Mail Processing Centre 969 Eastern,16,16,16,16,16,16
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",16,16,16,16,16,16
"Cabbagetown, St. James Town",44,44,44,44,44,44
Central Bay Street,80,80,80,80,80,80
"Chinatown, Grange Park, Kensington Market",89,89,89,89,89,89
Christie,18,18,18,18,18,18
Church and Wellesley,83,83,83,83,83,83


In [22]:
print('There are %d unique categories of venues in Toronto Boroughs' % len(toronto_venues['Venue Category'].unique()))
toronto_venues['Venue Category'].unique()

There are 235 unique categories of venues in Toronto Boroughs


array(['Bakery', 'Coffee Shop', 'Gym / Fitness Center', 'Spa',
       'Breakfast Spot', 'Restaurant', 'Park', 'Historic Site',
       'Farmers Market', 'Chocolate Shop', 'Pub', 'Performing Arts Venue',
       'Dessert Shop', 'Theater', 'French Restaurant', 'Café',
       'Mexican Restaurant', 'Event Space', 'Yoga Studio',
       'Ice Cream Shop', 'Shoe Store', 'Asian Restaurant', 'Art Gallery',
       'Cosmetics Shop', 'Electronics Store', 'Bank', 'Beer Store',
       'Hotel', 'Health Food Store', 'Antique Shop',
       'Portuguese Restaurant', 'Italian Restaurant', 'Gym', 'Creperie',
       'Sushi Restaurant', 'Beer Bar', 'Burrito Place',
       'Arts & Crafts Store', 'Burger Joint', 'Diner',
       'Fried Chicken Joint', 'Discount Store', 'Japanese Restaurant',
       'Wings Joint', 'Fast Food Restaurant', 'Seafood Restaurant',
       'Chinese Restaurant', 'Juice Bar', 'Sandwich Place',
       'College Auditorium', 'Bar', 'College Cafeteria',
       'Vegetarian / Vegan Restaurant', '

## Analyze each neighbourhood

In [23]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot = toronto_onehot.rename(columns={"Neighborhood": "Neighborhod"})
toronto_onehot.insert(0, 'Neighborhood', toronto_venues['Neighborhood'])
toronto_onehot.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
toronto_onehot.shape

(1716, 236)

In [25]:
toronto_grouped = toronto_onehot.groupby("Neighborhood").mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0625,0.0625,0.0625,0.125,0.1875,0.125,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
             venue  freq
0      Coffee Shop  0.06
1  Thai Restaurant  0.04
2       Restaurant  0.04
3              Bar  0.04
4             Café  0.04


----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1            Beer Bar  0.04
2        Cocktail Bar  0.04
3  Seafood Restaurant  0.04
4         Cheese Shop  0.04


----Brockton, Exhibition Place, Parkdale Village----
                 venue  freq
0            Nightclub  0.12
1                 Café  0.12
2       Breakfast Spot  0.08
3          Coffee Shop  0.08
4  Japanese Restaurant  0.04


----Business Reply Mail Processing Centre 969 Eastern----
           venue  freq
0     Comic Shop  0.06
1  Auto Workshop  0.06
2        Brewery  0.06
3  Burrito Place  0.06
4            Spa  0.06


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
                 venue  freq
0      Airport Service  0.19
1       Airport Lounge  0

In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Restaurant,Bar,Thai Restaurant,Sushi Restaurant,Steakhouse,Lounge,Cosmetics Shop,Concert Hall
1,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Cheese Shop,Café,Restaurant,Seafood Restaurant,Beer Bar,Farmers Market,Gourmet Shop
2,"Brockton, Exhibition Place, Parkdale Village",Nightclub,Café,Coffee Shop,Breakfast Spot,Gym,Bakery,Performing Arts Venue,Pet Store,Climbing Gym,Restaurant
3,Business Reply Mail Processing Centre 969 Eastern,Smoke Shop,Auto Workshop,Skate Park,Brewery,Spa,Farmers Market,Fast Food Restaurant,Burrito Place,Butcher,Restaurant
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Lounge,Airport Terminal,Boutique,Harbor / Marina,Boat or Ferry,Rental Car Location,Coffee Shop,Sculpture Garden,Airport


## Clustering

In [29]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [30]:
neighborhoods_venues_sorted.iloc[0:0]
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
neighborhoods_venues_sorted
toronto_merged = toronto_borough

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged= toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,0,Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Theater,Café,Restaurant,Mexican Restaurant,Chocolate Shop
1,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494,0,Coffee Shop,Gym,Park,Burger Joint,Discount Store,Restaurant,Portuguese Restaurant,Music Venue,Mexican Restaurant,Juice Bar
2,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,0,Coffee Shop,Clothing Store,Bubble Tea Shop,Café,Japanese Restaurant,Theater,Middle Eastern Restaurant,Pizza Place,Electronics Store,Bakery
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Café,Restaurant,Clothing Store,American Restaurant,Beer Bar,Breakfast Spot,Cosmetics Shop,Italian Restaurant,Hotel
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,Trail,Neighborhod,Pub,Health Food Store,Doner Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Yoga Studio


In [31]:
toronto_merged = toronto_merged.drop('PostalCode', 1)
toronto_merged

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,Harbourfront,43.65426,-79.360636,0,Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Theater,Café,Restaurant,Mexican Restaurant,Chocolate Shop
1,Downtown Toronto,Queen's Park,43.662301,-79.389494,0,Coffee Shop,Gym,Park,Burger Joint,Discount Store,Restaurant,Portuguese Restaurant,Music Venue,Mexican Restaurant,Juice Bar
2,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,0,Coffee Shop,Clothing Store,Bubble Tea Shop,Café,Japanese Restaurant,Theater,Middle Eastern Restaurant,Pizza Place,Electronics Store,Bakery
3,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Café,Restaurant,Clothing Store,American Restaurant,Beer Bar,Breakfast Spot,Cosmetics Shop,Italian Restaurant,Hotel
4,East Toronto,The Beaches,43.676357,-79.293031,4,Trail,Neighborhod,Pub,Health Food Store,Doner Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Yoga Studio
5,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Cocktail Bar,Bakery,Cheese Shop,Café,Restaurant,Seafood Restaurant,Beer Bar,Farmers Market,Gourmet Shop
6,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Italian Restaurant,Sandwich Place,Juice Bar,Burger Joint,Japanese Restaurant,Department Store,Ice Cream Shop,Bar,Thai Restaurant
7,Downtown Toronto,Christie,43.669542,-79.422564,0,Grocery Store,Café,Park,Baby Store,Gas Station,Restaurant,Italian Restaurant,Athletics & Sports,Diner,Coffee Shop
8,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568,0,Coffee Shop,Café,Restaurant,Bar,Thai Restaurant,Sushi Restaurant,Steakhouse,Lounge,Cosmetics Shop,Concert Hall
9,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259,0,Bakery,Pharmacy,Park,Art Gallery,Café,Middle Eastern Restaurant,Bar,Supermarket,Bank,Pool


## Create Map

In [32]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))


The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [34]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
        
map_clusters

## Cluster examining

In [35]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Harbourfront,Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Theater,Café,Restaurant,Mexican Restaurant,Chocolate Shop
1,Queen's Park,Coffee Shop,Gym,Park,Burger Joint,Discount Store,Restaurant,Portuguese Restaurant,Music Venue,Mexican Restaurant,Juice Bar
2,"Ryerson, Garden District",Coffee Shop,Clothing Store,Bubble Tea Shop,Café,Japanese Restaurant,Theater,Middle Eastern Restaurant,Pizza Place,Electronics Store,Bakery
3,St. James Town,Coffee Shop,Café,Restaurant,Clothing Store,American Restaurant,Beer Bar,Breakfast Spot,Cosmetics Shop,Italian Restaurant,Hotel
5,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Cheese Shop,Café,Restaurant,Seafood Restaurant,Beer Bar,Farmers Market,Gourmet Shop
6,Central Bay Street,Coffee Shop,Italian Restaurant,Sandwich Place,Juice Bar,Burger Joint,Japanese Restaurant,Department Store,Ice Cream Shop,Bar,Thai Restaurant
7,Christie,Grocery Store,Café,Park,Baby Store,Gas Station,Restaurant,Italian Restaurant,Athletics & Sports,Diner,Coffee Shop
8,"Adelaide, King, Richmond",Coffee Shop,Café,Restaurant,Bar,Thai Restaurant,Sushi Restaurant,Steakhouse,Lounge,Cosmetics Shop,Concert Hall
9,"Dovercourt Village, Dufferin",Bakery,Pharmacy,Park,Art Gallery,Café,Middle Eastern Restaurant,Bar,Supermarket,Bank,Pool
10,"Harbourfront East, Toronto Islands, Union Station",Coffee Shop,Aquarium,Café,Hotel,Restaurant,Fried Chicken Joint,Scenic Lookout,Italian Restaurant,Brewery,Sports Bar


In [36]:

toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Roselawn,Garden,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant


In [37]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,"Moore Park, Summerhill East",Restaurant,Playground,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run


In [38]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Lawrence Park,Park,Gym / Fitness Center,Bus Line,Swim School,Dim Sum Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
33,Rosedale,Park,Playground,Trail,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


In [39]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,The Beaches,Trail,Neighborhod,Pub,Health Food Store,Doner Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Yoga Studio
