# Segmenting and Clustering Neighborhoods in Toronto
---

## PART 1

#### Transform the data into a pandas dataframe

In [2]:
##Import basic libraries
import numpy as np 
import pandas as pd
import requests

from bs4 import BeautifulSoup # library to parse HTML and XML documents

print("Libraries imported.")

Libraries imported.


#### Build the code to scrape the following Wikipedia page, <https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M>

In [3]:
# send the GET request
data = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data.content, 'html.parser')

#### Creating dataframe consist of three columns: PostalCode, Borough, and Neighborhood

In [4]:
table = soup.find('tbody')
row = table.select('tr')
rec = [i.get_text() for i in row]

df = pd.DataFrame(rec)
df1 = df[0].str.split('\n', expand=True)
df2 = df1.rename(columns=df1.iloc[0])
df3 = df2.drop(df2.index[0])
df3.head()

Unnamed: 0,Unnamed: 1,Postal code,Unnamed: 3,Borough,Unnamed: 5,Neighborhood,Unnamed: 7
1,,M1A,,Not assigned,,,
2,,M2A,,Not assigned,,,
3,,M3A,,North York,,Parkwoods,
4,,M4A,,North York,,Victoria Village,
5,,M5A,,Downtown Toronto,,Regent Park / Harbourfront,


#### Ignoring cells with a borough that is Not assigned

In [5]:
df4 = df3[df3.Borough != 'Not assigned']
df4.head()

Unnamed: 0,Unnamed: 1,Postal code,Unnamed: 3,Borough,Unnamed: 5,Neighborhood,Unnamed: 7
3,,M3A,,North York,,Parkwoods,
4,,M4A,,North York,,Victoria Village,
5,,M5A,,Downtown Toronto,,Regent Park / Harbourfront,
6,,M6A,,North York,,Lawrence Manor / Lawrence Heights,
7,,M7A,,Downtown Toronto,,Queen's Park / Ontario Provincial Government,


#### Combining neighborhood with same postal code area

In [6]:
df5 = df4.groupby(['Postal code', 'Borough'], sort = False).agg(','.join)
df5.reset_index(inplace = True)
df5.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


#### Assigning Borough to  "Not Assigned"  Neigborhood 

In [7]:
# For Neighborhood="Not assigned", assign the value the same as Borough
for index, row in df5.iterrows():
    if row["Neighborhood"] == "Not assigned":
        row["Neighborhood"] = row["Borough"]

df5.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [8]:
df6 = df5.rename(columns={'Postal code': 'PostalCode'}) #Renaming Postal Code
df6.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


####  print the number of rows of your dataframe

In [9]:
df6.shape

(103, 3)

## PART 2

#### Importing geographical coordinates of each postal code

In [10]:
url = "http://cocl.us/Geospatial_data"
df7 = pd.read_csv(url)
df7.rename(columns={'Postal Code': 'PostalCode'}, inplace=True) # chane the first column's name to be as the first dataframe
df7.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
## Combine Dataframe 6 and 7 to get a combined table.
df8 = pd.merge(df6, df7, on='PostalCode')
df8.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494


In [12]:
## Finding number of Borough & Neighbourhood in the dataframe

print('The dataframe has {} Borough and {} Neighborhood.'.format(
        len(df8['Borough'].unique()),
        df8.shape[0]
    )
)

The dataframe has 10 Borough and 103 Neighborhood.


#### Creating a new dataframe for Borough that contain the word Toronto

In [13]:
df_toronto=df8[df8['Borough'].str.contains('Toronto')]
df_toronto

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,M5H,Downtown Toronto,Richmond / Adelaide / King,43.650571,-79.384568
31,M6H,West Toronto,Dufferin / Dovercourt Village,43.669005,-79.442259


## PART 3

In [14]:
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1

ModuleNotFoundError: No module named 'folium'

In [15]:
!pip install folium
import folium # plotting library

print('Folium installed')

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 12.4MB/s eta 0:00:01
[?25hCollecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/81/6d/31c83485189a2521a75b4130f1fee5364f772a0375f81afff619004e5237/branca-0.4.0-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.0 folium-0.10.1
Folium installed


In [16]:
address = 'Toronto'
geolocator = Nominatim(user_agent="Toronto")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))


The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [17]:
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [27]:
# Creating a map where Borough contains only Toronto

Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighbor in zip(df_toronto['Latitude'], df_toronto['Longitude'], 
                                           df_toronto['Borough'], df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighbor, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3140cc',
        fill_opacity=0.7,
        parse_html=False).add_to(Toronto_map)  
    
Toronto_map

#### Use oF Foursquare API to explore the neighborhoods

In [18]:
CLIENT_ID = 'BFNBDSDWET3P03IASXXA0IIWZJ5U20LRKAW2I1PXLW5KOR05'
CLIENT_SECRET = 'P2JYOLTXU5YUISQ5POOINUZGPE5MLLAVOLJPYT1DZMI05QYS'
VERSION = '20180604' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: BFNBDSDWET3P03IASXXA0IIWZJ5U20LRKAW2I1PXLW5KOR05
CLIENT_SECRET:P2JYOLTXU5YUISQ5POOINUZGPE5MLLAVOLJPYT1DZMI05QYS


#### Top 50 venues within a radius of 500 meters.

In [19]:
radius = 500
LIMIT = 50

venues = []

for latitude, longitude, post, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['PostalCode'], df_toronto['Borough'], df_toronto['Neighborhood']):
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
        CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
  
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            latitude, 
            longitude, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [20]:
## Convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

## Define the column Names
venues_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(1196, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
3,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
4,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [21]:
## Find Number of venues for each PostalCode
venues_df.groupby(["PostalCode", "Borough", "Neighborhood"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
PostalCode,Borough,Neighborhood,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M4E,East Toronto,The Beaches,5,5,5,5,5,5
M4K,East Toronto,The Danforth West / Riverdale,43,43,43,43,43,43
M4L,East Toronto,India Bazaar / The Beaches West,23,23,23,23,23,23
M4M,East Toronto,Studio District,40,40,40,40,40,40
M4N,Central Toronto,Lawrence Park,3,3,3,3,3,3
M4P,Central Toronto,Davisville North,8,8,8,8,8,8
M4R,Central Toronto,North Toronto West,22,22,22,22,22,22
M4S,Central Toronto,Davisville,35,35,35,35,35,35
M4T,Central Toronto,Moore Park / Summerhill East,1,1,1,1,1,1
M4V,Central Toronto,Summerhill West / Rathnelly / South Hill / Forest Hill SE / Deer Park,16,16,16,16,16,16


In [22]:
## Number of unique Venue Category
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 209 uniques categories.


In [23]:
## Some Examples
venues_df['VenueCategory'].unique()[:10]

array(['Bakery', 'Coffee Shop', 'Breakfast Spot', 'Distribution Center',
       'Spa', 'Restaurant', 'Park', 'Gym / Fitness Center',
       'Historic Site', 'Farmers Market'], dtype=object)

In [24]:
## Analyzing Area Based on Category

df_cat = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")
df_cat.head()

Unnamed: 0,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
# add postal, borough and neighborhood column back to dataframe
df_cat['PostalCode'] = venues_df['PostalCode'] 
df_cat['Borough'] = venues_df['Borough'] 
df_cat['Neighborhoods'] = venues_df['Neighborhood'] 

# move postal, borough and neighborhood column to the first column
fixed_columns = list(df_cat.columns[-3:]) + list(df_cat.columns[:-3])
df_cat = df_cat[fixed_columns]

print(df_cat.shape)
df_cat.head()

(1196, 212)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,M5A,Downtown Toronto,Regent Park / Harbourfront,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M5A,Downtown Toronto,Regent Park / Harbourfront,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M5A,Downtown Toronto,Regent Park / Harbourfront,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M5A,Downtown Toronto,Regent Park / Harbourfront,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M5A,Downtown Toronto,Regent Park / Harbourfront,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
## Calculating Mean

df_cat_mean = df_cat.groupby(["PostalCode", "Borough", "Neighborhoods"]).mean().reset_index()

print(df_cat_mean.shape)
df_cat_mean

(39, 212)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,M4E,East Toronto,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0
1,M4K,East Toronto,The Danforth West / Riverdale,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,...,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.023256
2,M4L,East Toronto,India Bazaar / The Beaches West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,East Toronto,Studio District,0.0,0.0,0.0,0.0,0.0,0.0,0.05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.025
4,M4N,Central Toronto,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M4P,Central Toronto,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M4R,Central Toronto,North Toronto West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.045455
7,M4S,Central Toronto,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,...,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M4T,Central Toronto,Moore Park / Summerhill East,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M4V,Central Toronto,Summerhill West / Rathnelly / South Hill / For...,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0


#### Display Top 5 venues for each PostalCode within Toronto

In [29]:
top = 5

indicators = ['st', 'nd', 'rd']

# create columns
areaColumns = ['PostalCode', 'Borough', 'Neighborhoods']
freqColumns = []
for ind in np.arange(top):
    try:
        freqColumns.append('{}{} Preferred Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Preferred Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
sorted_venues = pd.DataFrame(columns=columns)
sorted_venues['PostalCode'] = df_cat_mean['PostalCode']
sorted_venues['Borough'] = df_cat_mean['Borough']
sorted_venues['Neighborhoods'] = df_cat_mean['Neighborhoods']

for ind in np.arange(df_cat_mean.shape[0]):
    row_categories = df_cat_mean.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    sorted_venues.iloc[ind, 3:] = row_categories_sorted.index.values[0:top]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(sorted_venues.shape)
sorted_venues

(39, 8)


Unnamed: 0,PostalCode,Borough,Neighborhoods,1st Preferred Venue,2nd Preferred Venue,3rd Preferred Venue,4th Preferred Venue,5th Preferred Venue
0,M4E,East Toronto,The Beaches,Pub,Park,Trail,Neighborhood,Health Food Store
1,M4K,East Toronto,The Danforth West / Riverdale,Greek Restaurant,Coffee Shop,Italian Restaurant,Bookstore,Ice Cream Shop
2,M4L,East Toronto,India Bazaar / The Beaches West,Pizza Place,Sandwich Place,Fast Food Restaurant,Board Shop,Fish & Chips Shop
3,M4M,East Toronto,Studio District,Café,Coffee Shop,Gastropub,Brewery,American Restaurant
4,M4N,Central Toronto,Lawrence Park,Park,Bus Line,Swim School,Dumpling Restaurant,Donut Shop
5,M4P,Central Toronto,Davisville North,Dance Studio,Food & Drink Shop,Park,Breakfast Spot,Sandwich Place
6,M4R,Central Toronto,North Toronto West,Clothing Store,Coffee Shop,Yoga Studio,Restaurant,Bagel Shop
7,M4S,Central Toronto,Davisville,Sandwich Place,Dessert Shop,Pizza Place,Sushi Restaurant,Italian Restaurant
8,M4T,Central Toronto,Moore Park / Summerhill East,Playground,Yoga Studio,Dance Studio,Dumpling Restaurant,Donut Shop
9,M4V,Central Toronto,Summerhill West / Rathnelly / South Hill / For...,Coffee Shop,Pub,Supermarket,Bagel Shop,Bank


## Clustering

In [31]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

In [33]:
# set number of clusters
no_of_clusters = 5

toronto_cluster = df_cat_mean.drop(["PostalCode", "Borough", "Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=no_of_clusters, random_state=0).fit(toronto_cluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 3, 3, 3, 0, 3, 3, 3, 1, 3], dtype=int32)

In [34]:
# Create a new dataframe that includes the cluster as well as the top 5 venues for each neighborhood.
toronto_merged = df_toronto.copy()

# Add clustering labels
toronto_merged["ClusterLabels"] = kmeans.labels_

# Merge toronto cluster with toronto data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(sorted_venues.drop(["Borough", "Neighborhoods"], 1).set_index("PostalCode"), on="PostalCode")

print(toronto_merged.shape)
toronto_merged.head()

(39, 11)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,ClusterLabels,1st Preferred Venue,2nd Preferred Venue,3rd Preferred Venue,4th Preferred Venue,5th Preferred Venue
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636,0,Coffee Shop,Bakery,Park,Pub,Café
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494,3,Coffee Shop,Sushi Restaurant,Diner,Yoga Studio,Beer Bar
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,3,Coffee Shop,Café,Tea Room,Middle Eastern Restaurant,Clothing Store
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,3,Café,Gastropub,Coffee Shop,Cosmetics Shop,Farmers Market
19,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Pub,Park,Trail,Neighborhood,Health Food Store


In [35]:
# sort the results by Cluster Labels
print(toronto_merged.shape)
toronto_merged.sort_values(["ClusterLabels"], inplace=True)
toronto_merged

(39, 11)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,ClusterLabels,1st Preferred Venue,2nd Preferred Venue,3rd Preferred Venue,4th Preferred Venue,5th Preferred Venue
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636,0,Coffee Shop,Bakery,Park,Pub,Café
19,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Pub,Park,Trail,Neighborhood,Health Food Store
36,M5J,Downtown Toronto,Harbourfront East / Union Station / Toronto Is...,43.640816,-79.381752,0,Coffee Shop,Aquarium,Plaza,Brewery,Hotel
30,M5H,Downtown Toronto,Richmond / Adelaide / King,43.650571,-79.384568,1,Coffee Shop,American Restaurant,Café,Pizza Place,Steakhouse
69,M6P,West Toronto,High Park / The Junction South,43.661608,-79.464763,2,Mexican Restaurant,Café,Thai Restaurant,Arts & Crafts Store,Bookstore
74,M5R,Central Toronto,The Annex / North Midtown / Yorkville,43.67271,-79.405678,3,Sandwich Place,Café,Coffee Shop,Middle Eastern Restaurant,Burger Joint
75,M6R,West Toronto,Parkdale / Roncesvalles,43.64896,-79.456325,3,Gift Shop,Bookstore,Italian Restaurant,Dessert Shop,Eastern European Restaurant
79,M4S,Central Toronto,Davisville,43.704324,-79.38879,3,Sandwich Place,Dessert Shop,Pizza Place,Sushi Restaurant,Italian Restaurant
80,M5S,Downtown Toronto,University of Toronto / Harbord,43.662696,-79.400049,3,Café,Bar,Japanese Restaurant,Bakery,Restaurant
81,M6S,West Toronto,Runnymede / Swansea,43.651571,-79.48445,3,Coffee Shop,Pizza Place,Café,Italian Restaurant,Sushi Restaurant


In [37]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [39]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(no_of_clusters)
ys = [i+x+(i*x)**2 for i in range(no_of_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], 
                                             toronto_merged['PostalCode'], toronto_merged['Borough'], 
                                             toronto_merged['Neighborhood'], toronto_merged['ClusterLabels']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters