# Segmenting and Clustering Neighbourhoods in Toronto

### Creating the Dataframe

In [1]:
import pandas as pd

In [2]:
# Scraping the table from the website
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
dfs = pd.read_html(url)

print(len(dfs))

3


In [3]:
# Converting the table to a Dataframe
df = dfs[0]
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [4]:
# Removing the rows where the Borough is 'Not assigned'
df.drop(df[df["Borough"] == "Not assigned"].index, inplace = True)
df.reset_index(drop = True, inplace = True)
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [5]:
# The shape of the Dataframe
df.shape

(103, 3)

### Adding Latitude and Longitude values

In [6]:
# Reading the CSV file into a Dataframe
df1 = pd.read_csv("C:/Stuff/IBM Cloud/Geospatial_Coordinates.csv", index_col = 0)
df1

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476
...,...,...
M9N,43.706876,-79.518188
M9P,43.696319,-79.532242
M9R,43.688905,-79.554724
M9V,43.739416,-79.588437


In [7]:
# Merging the Dataframes to add latitude and longitude values
df = df.join(df1, on = "Postal Code")
df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


### Clustering the neighbourhoods

In [8]:
from geopy.geocoders import Nominatim
import folium
import requests
import numpy as np

In [9]:
# Getting the latitude and longitude values of Toronto
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode("Toronto, ON")
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


In [10]:
# Producing a map of Toronto using the latitude and longitude values
map_toronto = folium.Map(location = [latitude, longitude], zoom_start = 10)

# Marking all the neighbourhoods on the map
for lat, lng, postal, borough, neighbourhood in zip(df["Latitude"], df["Longitude"], df["Postal Code"], df["Borough"], df["Neighbourhood"]):
    label = "{}, {}, {}".format(neighbourhood, borough, postal)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker([lat, lng],
                       radius = 5,
                       popup = label,
                       color = "blue",
                       fill = True,
                       fill_color = "#3186cc",
                       fill_opacity = 0.7,
                       parse_html = False).add_to(map_toronto)
map_toronto

##### Working with only boroughs containing the word Toronto

In [11]:
# Creating a new Dataframe that contains only boroughs with the word Toronto
toronto_df = df[df["Borough"].str.contains("Toronto")].reset_index(drop = True)
toronto_df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [12]:
# Creating a new map showing only the neighbourhoods in the new Dataframe
map_toronto1 = folium.Map(location = [latitude, longitude], zoom_start = 11)

for lat, lng, postal, borough, neighbourhood in zip(toronto_df["Latitude"], toronto_df["Longitude"], toronto_df["Postal Code"], toronto_df["Borough"], toronto_df["Neighbourhood"]):
    label = "{}, {}, {}".format(neighbourhood, borough, postal)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker([lat, lng],
                       radius = 5,
                       popup = label,
                       color = "blue",
                       fill = True,
                       fill_color = "#3186cc",
                       fill_opacity = 0.7,
                       parse_html = False).add_to(map_toronto1)
map_toronto1

In [13]:
#This cell contained the client id and secret for Foursquare

##### Using Foursquare credentials to get information about each of the neighbourhoods

In [14]:
# Creating a function to get info for all of the neighbourhoods
def getNearbyVenues(names, postals, latitudes, longitudes, radius = 500):
    
    venues_list = []
    for name, postal, lat, lng in zip(names, postals, latitudes, longitudes):
        print(postal, ",", name)
        
        url = "https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
            CLIENT_ID,
            CLIENT_SECRET,
            VERSION,
            lat,
            lng,
            radius,
            LIMIT)
        
        results = requests.get(url).json()["response"]["groups"][0]["items"]
        
        venues_list.append([(
            postal,
            name,
            lat,
            lng,
            v["venue"]["name"],
            v["venue"]["location"]["lat"],
            v["venue"]["location"]["lng"],
            v["venue"]["categories"][0]["name"]) for v in results])
        
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ["Postal Code",
                            "Neighbourhood",
                            "Neighbourhood Latitude",
                            "Neighbourhood Longitude",
                            "Venue",
                            "Venue Latitude",
                            "Venue Longitude",
                            "Venue Category"]
    return(nearby_venues)

In [15]:
# Creating a new Dataframe with this information
toronto_venues = getNearbyVenues(names = toronto_df["Neighbourhood"],
                                postals = toronto_df["Postal Code"],
                                latitudes = toronto_df["Latitude"],
                                longitudes = toronto_df["Longitude"])
toronto_venues

M5A , Regent Park, Harbourfront
M7A , Queen's Park, Ontario Provincial Government
M5B , Garden District, Ryerson
M5C , St. James Town
M4E , The Beaches
M5E , Berczy Park
M5G , Central Bay Street
M6G , Christie
M5H , Richmond, Adelaide, King
M6H , Dufferin, Dovercourt Village
M5J , Harbourfront East, Union Station, Toronto Islands
M6J , Little Portugal, Trinity
M4K , The Danforth West, Riverdale
M5K , Toronto Dominion Centre, Design Exchange
M6K , Brockton, Parkdale Village, Exhibition Place
M4L , India Bazaar, The Beaches West
M5L , Commerce Court, Victoria Hotel
M4M , Studio District
M4N , Lawrence Park
M5N , Roselawn
M4P , Davisville North
M5P , Forest Hill North & West, Forest Hill Road Park
M6P , High Park, The Junction South
M4R , North Toronto West, Lawrence Park
M5R , The Annex, North Midtown, Yorkville
M6R , Parkdale, Roncesvalles
M4S , Davisville
M5S , University of Toronto, Harbord
M6S , Runnymede, Swansea
M4T , Moore Park, Summerhill East
M5T , Kensington Market, Chinatown, 

Unnamed: 0,Postal Code,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M5A,"Regent Park, Harbourfront",43.654260,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
1,M5A,"Regent Park, Harbourfront",43.654260,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
2,M5A,"Regent Park, Harbourfront",43.654260,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,M5A,"Regent Park, Harbourfront",43.654260,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,M5A,"Regent Park, Harbourfront",43.654260,-79.360636,Impact Kitchen,43.656369,-79.356980,Restaurant
...,...,...,...,...,...,...,...,...
1589,M7Y,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,TTC Russell Division,43.664908,-79.322560,Light Rail Station
1590,M7Y,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Jonathan Ashbridge Park,43.664702,-79.319898,Park
1591,M7Y,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,The Ten Spot,43.664815,-79.324213,Spa
1592,M7Y,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,ONE Academy,43.662253,-79.326911,Gym / Fitness Center


In [16]:
# The number of unique types of venues
print("There are {} unique categories".format(len(toronto_venues["Venue Category"].unique())))

# The number of venues per neighbourhood
toronto_venues.groupby("Neighbourhood").count()

There are 233 unique categories


Unnamed: 0_level_0,Postal Code,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Berczy Park,55,55,55,55,55,55,55
"Brockton, Parkdale Village, Exhibition Place",22,22,22,22,22,22,22
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",16,16,16,16,16,16,16
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",18,18,18,18,18,18,18
Central Bay Street,61,61,61,61,61,61,61
Christie,16,16,16,16,16,16,16
Church and Wellesley,79,79,79,79,79,79,79
"Commerce Court, Victoria Hotel",100,100,100,100,100,100,100
Davisville,36,36,36,36,36,36,36
Davisville North,8,8,8,8,8,8,8


##### Analysing each neighbourhood

In [17]:
# Creating a function to reorder the columns of the Dataframe
def reorder_columns(columns, first_cols = []):
    columns = list(set(columns) - set(first_cols))
    columns.sort()
    new_order = first_cols + columns
    return new_order

In [18]:
# One hot encoding each venue type
toronto_onehot = pd.get_dummies(toronto_venues[["Venue Category"]], prefix = "", prefix_sep = "")

# Adding the Postal Code and Neighbourhood columns to the new Dataframe
toronto_onehot["Postal Code"] = toronto_venues["Postal Code"]
toronto_onehot["Neighbourhood"] = toronto_venues["Neighbourhood"]

# Re-ordering the columns so the Postal Code and Neighbourhood columns are first
column_list = toronto_onehot.columns.tolist()
reordered_cols = reorder_columns(column_list, first_cols = ["Postal Code", "Neighbourhood"])
toronto_onehot = toronto_onehot[reordered_cols]

toronto_onehot

Unnamed: 0,Postal Code,Neighbourhood,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theme Restaurant,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,M5A,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M5A,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M5A,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M5A,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M5A,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1589,M7Y,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1590,M7Y,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1591,M7Y,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1592,M7Y,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
# Grouping the rows by Postal Code and Neighbourhood and taking the mean of the frequency of each category
toronto_grouped = toronto_onehot.groupby(["Postal Code", "Neighbourhood"]).mean().reset_index()
toronto_grouped

Unnamed: 0,Postal Code,Neighbourhood,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theme Restaurant,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,M4E,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0
1,M4K,"The Danforth West, Riverdale",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,...,0.0,0.02381,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.02381
2,M4L,"India Bazaar, The Beaches West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,Studio District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.054054,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027027
4,M4N,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M4P,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M4R,"North Toronto West, Lawrence Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05
7,M4S,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M4T,"Moore Park, Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M4V,"Summerhill West, Rathnelly, South Hill, Forest...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0


In [20]:
# Printing each neighbourhood along with the top 5 most common types of venues
for hood, code in zip(toronto_grouped["Neighbourhood"], toronto_grouped["Postal Code"]):
    print(hood, ", ", code)
    temp = toronto_grouped[toronto_grouped["Neighbourhood"] == hood].T.reset_index()
    temp.columns = ["Venue", "Freq"]
    temp = temp.iloc[2:]
    temp["Freq"] = temp["Freq"].astype(float)
    temp = temp.round({"Freq" : 2})
    print(temp.sort_values("Freq", ascending = False).reset_index(drop = True).head(5))
    print("\n")

The Beaches ,  M4E
               Venue  Freq
0  Health Food Store   0.2
1               Park   0.2
2              Trail   0.2
3       Neighborhood   0.2
4                Pub   0.2


The Danforth West, Riverdale ,  M4K
                Venue  Freq
0    Greek Restaurant  0.17
1         Coffee Shop  0.10
2  Italian Restaurant  0.07
3          Restaurant  0.05
4      Ice Cream Shop  0.05


India Bazaar, The Beaches West ,  M4L
                  Venue  Freq
0  Fast Food Restaurant  0.11
1                   Gym  0.05
2      Sushi Restaurant  0.05
3                  Park  0.05
4           Coffee Shop  0.05


Studio District ,  M4M
         Venue  Freq
0  Coffee Shop  0.08
1    Gastropub  0.05
2       Bakery  0.05
3      Brewery  0.05
4         Café  0.05


Lawrence Park ,  M4N
            Venue  Freq
0            Park  0.33
1     Swim School  0.33
2        Bus Line  0.33
3  Adult Boutique  0.00
4          Museum  0.00


Davisville North ,  M4P
                  Venue  Freq
0  Gym / Fitness Ce

In [21]:
# Creating a function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[2:]
    row_categories_sorted = row_categories.sort_values(ascending = False)
    
    return row_categories_sorted.index.values[0 : num_top_venues]

In [22]:
# Creating a new Dataframe and including the top 10 most common venues
num_top_venues = 10

indicators = ["st", "nd", "rd"]

# Creating columns to show top 10 most common venues
columns = ["Postal Code", "Neighbourhood"]
for ind in np.arange(num_top_venues):
    try:
        columns.append("{}{} Most Common Venue".format(ind + 1, indicators[ind]))
    except:
        columns.append("{}th Most Common Venue".format(ind + 1))

# Creating the new Dataframe and matching the Postal Code and Neighbourhood columns
neighbourhoods_venues_sorted = pd.DataFrame(columns = columns)
neighbourhoods_venues_sorted["Postal Code"] = toronto_grouped["Postal Code"]
neighbourhoods_venues_sorted["Neighbourhood"] = toronto_grouped["Neighbourhood"]

# Filling in the Dataframe with most common venue types
for ind in np.arange(toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 2:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)
    
neighbourhoods_venues_sorted

Unnamed: 0,Postal Code,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,The Beaches,Pub,Park,Neighborhood,Trail,Health Food Store,Yoga Studio,Dessert Shop,Diner,Discount Store,Distribution Center
1,M4K,"The Danforth West, Riverdale",Greek Restaurant,Coffee Shop,Italian Restaurant,Furniture / Home Store,Restaurant,Ice Cream Shop,Cosmetics Shop,Brewery,Bubble Tea Shop,Café
2,M4L,"India Bazaar, The Beaches West",Fast Food Restaurant,Gym,Park,Brewery,Sandwich Place,Restaurant,Pub,Pizza Place,Coffee Shop,Movie Theater
3,M4M,Studio District,Coffee Shop,Brewery,Gastropub,Café,Bakery,American Restaurant,Yoga Studio,Convenience Store,Cheese Shop,Clothing Store
4,M4N,Lawrence Park,Bus Line,Park,Swim School,Yoga Studio,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
5,M4P,Davisville North,Pizza Place,Food & Drink Shop,Park,Gym / Fitness Center,Breakfast Spot,Hotel,Department Store,Sandwich Place,Diner,Discount Store
6,M4R,"North Toronto West, Lawrence Park",Coffee Shop,Clothing Store,Shoe Store,Seafood Restaurant,Salon / Barbershop,Restaurant,Café,Chinese Restaurant,Yoga Studio,Sporting Goods Shop
7,M4S,Davisville,Pizza Place,Sandwich Place,Dessert Shop,Gym,Italian Restaurant,Café,Sushi Restaurant,Coffee Shop,Toy / Game Store,Greek Restaurant
8,M4T,"Moore Park, Summerhill East",Summer Camp,Restaurant,Yoga Studio,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
9,M4V,"Summerhill West, Rathnelly, South Hill, Forest...",Coffee Shop,Pub,Restaurant,Bank,Supermarket,Sushi Restaurant,Light Rail Station,Athletics & Sports,Fried Chicken Joint,Bagel Shop


##### Clustering the neighbourhoods

In [23]:
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

In [24]:
toronto_grouped_clustering = toronto_grouped.drop("Neighbourhood", 1).drop("Postal Code", 1)

# Setting number of clusters for k-means
kclusters = 5

# Running k-means clustering
kmeans = KMeans(n_clusters = kclusters, random_state = 0).fit(toronto_grouped_clustering)
kmeans.labels_[0 : 10]

array([0, 3, 3, 3, 0, 3, 3, 3, 4, 3])

In [25]:
# Inserting a new column for the cluster labels
neighbourhoods_venues_sorted.insert(0, "Cluster Label", kmeans.labels_)

# Creating a new Dataframe
toronto_merged = toronto_df

# Dropping the Neighbourhood column so Dataframes can merge successfully
neighbourhoods_venues_sorted = neighbourhoods_venues_sorted.drop("Neighbourhood", 1)

# Merging the two Dataframes to add latitude and longitude values for each neighbourhood
toronto_merged = toronto_merged.join(neighbourhoods_venues_sorted.set_index("Postal Code"), on = "Postal Code")
toronto_merged

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,3,Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Restaurant,Café,Theater,Cosmetics Shop,Shoe Store
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,3,Coffee Shop,Sushi Restaurant,Yoga Studio,Diner,Portuguese Restaurant,Park,Mexican Restaurant,Japanese Restaurant,Italian Restaurant,Fried Chicken Joint
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,3,Clothing Store,Coffee Shop,Café,Cosmetics Shop,Japanese Restaurant,Middle Eastern Restaurant,Italian Restaurant,Bubble Tea Shop,Electronics Store,Ramen Restaurant
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,3,Café,Coffee Shop,Cocktail Bar,American Restaurant,Gastropub,Restaurant,Clothing Store,Moroccan Restaurant,Cosmetics Shop,Creperie
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Pub,Park,Neighborhood,Trail,Health Food Store,Yoga Studio,Dessert Shop,Diner,Discount Store,Distribution Center
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,3,Coffee Shop,Cocktail Bar,Restaurant,Bakery,Beer Bar,Farmers Market,Seafood Restaurant,Cheese Shop,Park,Beach
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,3,Coffee Shop,Italian Restaurant,Sandwich Place,Café,Salad Place,Bubble Tea Shop,Burger Joint,Yoga Studio,Ramen Restaurant,Bike Rental / Bike Share
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564,3,Grocery Store,Café,Park,Restaurant,Baby Store,Candy Store,Coffee Shop,Athletics & Sports,Nightclub,Italian Restaurant
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,3,Coffee Shop,Café,Restaurant,Gym,Clothing Store,Deli / Bodega,Thai Restaurant,Sushi Restaurant,Concert Hall,Cosmetics Shop
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,3,Pharmacy,Bakery,Park,Music Venue,Supermarket,Bar,Middle Eastern Restaurant,Café,Pool,Bank


In [26]:
# Creating a new map to show the clusters
map_clusters = folium.Map(location = [latitude, longitude], zoom_start = 11)

# Setting a colour scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i * x) ** 2 for i in range(kclusters)]
colours_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colours_array]

# Creating markers
markers_colours = []
for lat, lng, postal, neighbourhood, cluster in zip(toronto_merged["Latitude"], toronto_merged["Longitude"], toronto_merged["Postal Code"], toronto_merged["Neighbourhood"], toronto_merged["Cluster Label"]):
    label = "{}, {}, Cluster {}".format(neighbourhood, postal, cluster)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker([lat, lng],
                       radius = 5,
                       popup = label,
                       color = rainbow[cluster - 1],
                       fill = True,
                       fill_color = rainbow[cluster - 1],
                       fill_opacity = 0.7).add_to(map_clusters)
    
map_clusters

##### Looking at the neighbourhoods in each cluster and analysing how they are similar.

In [27]:
toronto_merged.loc[toronto_merged['Cluster Label'] == 0, toronto_merged.columns[[0, 2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,Neighbourhood,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,M4E,The Beaches,0,Pub,Park,Neighborhood,Trail,Health Food Store,Yoga Studio,Dessert Shop,Diner,Discount Store,Distribution Center
18,M4N,Lawrence Park,0,Bus Line,Park,Swim School,Yoga Studio,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
33,M4W,Rosedale,0,Park,Playground,Trail,Yoga Studio,Deli / Bodega,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant


The first cluster contains three neighbourhoods where Parks are very common venues.

In [28]:
toronto_merged.loc[toronto_merged['Cluster Label'] == 1, toronto_merged.columns[[0, 2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,Neighbourhood,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,M5N,Roselawn,1,Home Service,Garden,Yoga Studio,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


The second cluster contains one neighbourhood with a Home Service and a Garden (the Dataframe toronto_venues shows that the neighbourhood Roselawn only contains two venues).

In [29]:
toronto_merged.loc[toronto_merged['Cluster Label'] == 2, toronto_merged.columns[[0, 2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,Neighbourhood,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,M5P,"Forest Hill North & West, Forest Hill Road Park",2,Trail,Jewelry Store,Mexican Restaurant,Sushi Restaurant,Yoga Studio,Department Store,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


The third cluster contains one neighbourhood with trails as the most common venue type.

In [30]:
toronto_merged.loc[toronto_merged['Cluster Label'] == 3, toronto_merged.columns[[0, 2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,Neighbourhood,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,"Regent Park, Harbourfront",3,Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Restaurant,Café,Theater,Cosmetics Shop,Shoe Store
1,M7A,"Queen's Park, Ontario Provincial Government",3,Coffee Shop,Sushi Restaurant,Yoga Studio,Diner,Portuguese Restaurant,Park,Mexican Restaurant,Japanese Restaurant,Italian Restaurant,Fried Chicken Joint
2,M5B,"Garden District, Ryerson",3,Clothing Store,Coffee Shop,Café,Cosmetics Shop,Japanese Restaurant,Middle Eastern Restaurant,Italian Restaurant,Bubble Tea Shop,Electronics Store,Ramen Restaurant
3,M5C,St. James Town,3,Café,Coffee Shop,Cocktail Bar,American Restaurant,Gastropub,Restaurant,Clothing Store,Moroccan Restaurant,Cosmetics Shop,Creperie
5,M5E,Berczy Park,3,Coffee Shop,Cocktail Bar,Restaurant,Bakery,Beer Bar,Farmers Market,Seafood Restaurant,Cheese Shop,Park,Beach
6,M5G,Central Bay Street,3,Coffee Shop,Italian Restaurant,Sandwich Place,Café,Salad Place,Bubble Tea Shop,Burger Joint,Yoga Studio,Ramen Restaurant,Bike Rental / Bike Share
7,M6G,Christie,3,Grocery Store,Café,Park,Restaurant,Baby Store,Candy Store,Coffee Shop,Athletics & Sports,Nightclub,Italian Restaurant
8,M5H,"Richmond, Adelaide, King",3,Coffee Shop,Café,Restaurant,Gym,Clothing Store,Deli / Bodega,Thai Restaurant,Sushi Restaurant,Concert Hall,Cosmetics Shop
9,M6H,"Dufferin, Dovercourt Village",3,Pharmacy,Bakery,Park,Music Venue,Supermarket,Bar,Middle Eastern Restaurant,Café,Pool,Bank
10,M5J,"Harbourfront East, Union Station, Toronto Islands",3,Coffee Shop,Aquarium,Hotel,Café,Brewery,Fried Chicken Joint,Scenic Lookout,Restaurant,Italian Restaurant,Bar


The fourth cluster contains neighbourhoods where Coffee Shops/Cafes are fairly common venue types.

In [31]:
toronto_merged.loc[toronto_merged['Cluster Label'] == 4, toronto_merged.columns[[0, 2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,Neighbourhood,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,M4T,"Moore Park, Summerhill East",4,Summer Camp,Restaurant,Yoga Studio,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant


The fifth cluster contains one neighbourhood with Summer Camps and Restaurants (again, the Dataframe toronto_venues shows that this neighbourhood only contains two types of venues).