# IBM Data Science Capstone Project Week 3 Part 3
AIM: Explore the data and do cluster analysis on neighborhood of Toronto

# Import libraries

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import folium
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans

# Get the data from Wikipedia page and save the table in 3 lists


In [3]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
result = requests.get(url).text

In [4]:
#Use BeautifulSoup to get the html format of data
soup = BeautifulSoup(result, 'html.parser')

In [5]:
#Create three lists 
postalcode, borough, neighborhood = [], [], []

In [6]:
#Find the table and get the data 
rows = soup.find('table').find_all('tr')

for row in rows:
    data = row.find_all('td')
    if len(data) > 0:
        postalcode.append(data[0].text)
        borough.append(data[1].text)
        neighborhood.append(data[2].text.rstrip('\n'))

In [7]:
#Create a dataframe and store the data in it
df = pd.DataFrame({'Postal Code' : postalcode,
                   'Borough' : borough,
                   'Neighborhood' : neighborhood})
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


# Drop the rows with Neighborhood as not assigned

In [8]:
df = df[df['Neighborhood'] != 'Not assigned'].reset_index(drop = True)

# Group the neighborhood in the same borough

In [9]:
df = df.groupby(['Postal Code', 'Borough'], as_index = False).agg(lambda x : ','.join(x))
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


# For neighborhood as "Not Assigned", make it same as Borough


In [10]:
for index, row in df.iterrows():
    if row['Neighborhood'] == 'Not assigned':
        row['Neighborhood'] = row['Borough']


# Print the shape of dataframe

In [11]:
df.shape

(103, 3)

# Get the longitude and latitude 


#  Load the geospatial coordinates file and save in Dataframe

In [12]:
coordinates = pd.read_csv('Geospatial_Coordinates.csv')
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


# Merge the two dataframe to single one

In [13]:
toronto_data = df.merge(coordinates, on = 'Postal Code', how = 'left')
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Get the latitude and longitude of Toronto

In [14]:
address = 'Toronto'
geolocator = Nominatim(user_agent = 'my-application')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
latitude, longitude

(43.653963, -79.387207)

# Create map of Toronto using Folium

In [16]:
toronto_map = folium.Map(location = [latitude, longitude], zoom_start = 12)

#Add the circle marker to the map
for lat, lng, bor, neigh in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neigh, bor)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker([lat, lng], radius = 3, popup = label, color = 'red', fill = True, fill_color = 'blue', fill_opacity = 1).add_to(toronto_map)
toronto_map

# Filter the boroughs with word Toronto in it

In [17]:
borough_names = list(toronto_data['Borough'].unique())
toronto_borough = []
for name in borough_names:
    if 'toronto' in name.lower():
        toronto_borough.append(name)
toronto_borough

['East Toronto', 'Central Toronto', 'Downtown Toronto', 'West Toronto']

# Modify the data frame with Borough containing Toronto

In [70]:
toronto_data = toronto_data[toronto_data['Borough'].isin(toronto_borough)].reset_index(drop = True)
toronto_data

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197
6,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
7,M4S,Central Toronto,Davisville,43.704324,-79.38879
8,M4T,Central Toronto,"Moore Park,Summerhill East",43.689574,-79.38316
9,M4V,Central Toronto,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",43.686412,-79.400049


# Create the new map with filtered data

In [21]:
toronto_map = folium.Map(location = [latitude, longitude], zoom_start = 12)

#Add the circle marker to the map
for lat, lng, bor, neigh in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neigh, bor)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker([lat, lng], radius = 3, popup = label, color = 'red', fill = True, fill_color = 'blue', fill_opacity = 1).add_to(toronto_map)
toronto_map

# Exploring neighbors using FourSquare API and perform clustering

In [22]:
Client_Id = 'Your key'
Client_Secret_Key = 'Your Key'
Version = '20200306'      


# Get top 50 venues with radius of 300 meters

In [30]:
radius = 300
limit = 50
venues = []

for lat, lng, post, bor, neigh in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Postal Code'], toronto_data['Borough'], toronto_data['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        Client_Id, Client_Secret_Key, Version, lat, lng, radius, limit)
    result = requests.get(url).json()
    result = result['response']['groups'][0]['items']
    
    for venue in result:
        venues.append((post, bor, neigh, lat, lng, venue['venue']['name'], venue['venue']['location']['lat'], venue['venue']['location']['lng'],venue['venue']['categories'][0]['name']))

# Create a dataframe and store the venue details

In [31]:
venues_df = pd.DataFrame(venues)

venues_df.columns = ['Postal Code', 'Borough', 'Neighborhood', 'Latitude_Borough', 'Longitude_Borough', 'Venue', 'Latitude_Venue', 'Longitude_Venue', 'Category']

venues_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude_Borough,Longitude_Borough,Venue,Latitude_Venue,Longitude_Venue,Category
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,M4E,East Toronto,The Beaches,43.676357,-79.293031,Glen Stewart Ravine,43.6763,-79.294784,Other Great Outdoors
2,M4E,East Toronto,The Beaches,43.676357,-79.293031,Glen Stewart Park,43.675278,-79.294647,Park
3,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant
4,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188,MenEssentials,43.67782,-79.351265,Cosmetics Shop


# Group the dataframe to get unique number of venues

In [33]:
venues_df.groupby(['Postal Code', 'Borough', 'Neighborhood']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Latitude_Borough,Longitude_Borough,Venue,Latitude_Venue,Longitude_Venue,Category
Postal Code,Borough,Neighborhood,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M4E,East Toronto,The Beaches,3,3,3,3,3,3
M4K,East Toronto,"The Danforth West,Riverdale",22,22,22,22,22,22
M4L,East Toronto,"The Beaches West,India Bazaar",15,15,15,15,15,15
M4M,East Toronto,Studio District,28,28,28,28,28,28
M4N,Central Toronto,Lawrence Park,1,1,1,1,1,1
M4P,Central Toronto,Davisville North,4,4,4,4,4,4
M4R,Central Toronto,North Toronto West,1,1,1,1,1,1
M4S,Central Toronto,Davisville,24,24,24,24,24,24
M4T,Central Toronto,"Moore Park,Summerhill East",2,2,2,2,2,2
M4V,Central Toronto,"Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West",6,6,6,6,6,6


# Data Analysis

In [39]:
#One hot Encoding
venues_dummies = pd.get_dummies(venues_df[['Category']], prefix = '', prefix_sep = '')
venues_dummies

Unnamed: 0,Adult Boutique,Airport Food Court,Airport Lounge,Airport Terminal,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,...,Thrift / Vintage Store,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
808,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
809,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
810,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
811,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [42]:
#Add three basic columns
venues_dummies['Postal Code'] = venues_df['Postal Code']
venues_dummies['Borough'] = venues_df['Borough']
venues_dummies['Neighborhood'] = venues_df['Neighborhood']


True

In [49]:
#Move the three columns at the begining
cols = list(venues_dummies.columns[-2:]) + list(venues_dummies.columns[:-2])
venues_dummies = venues_dummies[cols]

venues_dummies[['Postal Code', 'Borough', 'Neighborhood'] + [c for c in venues_dummies if c not in ['Postal Code', 'Borough', 'Neighborhood']]]


Unnamed: 0,Postal Code,Borough,Neighborhood,Wings Joint,Yoga Studio,Adult Boutique,Airport Food Court,Airport Lounge,Airport Terminal,American Restaurant,...,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop
0,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4K,East Toronto,"The Danforth West,Riverdale",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4K,East Toronto,"The Danforth West,Riverdale",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
808,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
809,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
810,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
811,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Group by neighborhood and take the mean of the frequency

In [52]:
venues_grouped = venues_dummies.groupby(['Postal Code', 'Borough', 'Neighborhood']).mean().reset_index()
venues_grouped

Unnamed: 0,Postal Code,Borough,Neighborhood,Wings Joint,Yoga Studio,Adult Boutique,Airport Food Court,Airport Lounge,Airport Terminal,American Restaurant,...,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop
0,M4E,East Toronto,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0
1,M4K,East Toronto,"The Danforth West,Riverdale",0.0,0.045455,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M4L,East Toronto,"The Beaches West,India Bazaar",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,East Toronto,Studio District,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,...,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M4N,Central Toronto,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M4P,Central Toronto,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M4R,Central Toronto,North Toronto West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,M4S,Central Toronto,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0
8,M4T,Central Toronto,"Moore Park,Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M4V,Central Toronto,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Display top 5 venues of each Postal Code

In [57]:
lim = 5
import numpy as np

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['Postal Code', 'Borough', 'Neighborhood']
freqColumns = []
for ind in np.arange(lim):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Postal Code'] = venues_grouped['Postal Code']
neighborhoods_venues_sorted['Borough'] = venues_grouped['Borough']
neighborhoods_venues_sorted['Neighborhood'] = venues_grouped['Neighborhood']

for ind in np.arange(venues_grouped.shape[0]):
    row_categories = venues_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:lim]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted


(38, 8)


Unnamed: 0,Postal Code,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M4E,East Toronto,The Beaches,Other Great Outdoors,Park,Trail,Wine Shop,Department Store
1,M4K,East Toronto,"The Danforth West,Riverdale",Greek Restaurant,Ice Cream Shop,Brewery,Fruit & Vegetable Store,Bubble Tea Shop
2,M4L,East Toronto,"The Beaches West,India Bazaar",Liquor Store,Pet Store,Hotel,Burrito Place,Ice Cream Shop
3,M4M,East Toronto,Studio District,Coffee Shop,Café,Italian Restaurant,Bar,Comfort Food Restaurant
4,M4N,Central Toronto,Lawrence Park,Photography Studio,Wine Shop,Department Store,Ethiopian Restaurant,Electronics Store
5,M4P,Central Toronto,Davisville North,Pool,Gym,Convenience Store,Breakfast Spot,Dessert Shop
6,M4R,Central Toronto,North Toronto West,Sushi Restaurant,Wine Shop,Dessert Shop,Ethiopian Restaurant,Electronics Store
7,M4S,Central Toronto,Davisville,Dessert Shop,Coffee Shop,Pizza Place,Café,Toy / Game Store
8,M4T,Central Toronto,"Moore Park,Summerhill East",Park,Construction & Landscaping,Wine Shop,Dessert Shop,Ethiopian Restaurant
9,M4V,Central Toronto,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",Coffee Shop,Light Rail Station,Liquor Store,Supermarket,Café


# CLUSTERING for 5 clusters

In [69]:
kclusters = 5

toronto_grouped_clustering = venues_grouped.drop(["Postal Code", "Borough", "Neighborhood"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
len(kmeans.labels_)

38

In [73]:
toronto_merged = toronto_data.copy()

toronto_merged = toronto_merged.drop(toronto_merged.index[34])

# add clustering labels
toronto_merged["Cluster Labels"] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.drop(["Borough", "Neighborhood"], 1).set_index("Postal Code"), on="Postal Code")

print(toronto_merged.shape)
toronto_merged.head() # check the last columns!

(38, 11)


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Other Great Outdoors,Park,Trail,Wine Shop,Department Store
1,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188,0,Greek Restaurant,Ice Cream Shop,Brewery,Fruit & Vegetable Store,Bubble Tea Shop
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572,0,Liquor Store,Pet Store,Hotel,Burrito Place,Ice Cream Shop
3,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Coffee Shop,Café,Italian Restaurant,Bar,Comfort Food Restaurant
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,3,Photography Studio,Wine Shop,Department Store,Ethiopian Restaurant,Electronics Store


In [74]:
# sort the results by Cluster Labels
print(toronto_merged.shape)
toronto_merged.sort_values(["Cluster Labels"], inplace=True)
toronto_merged

(38, 11)


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Other Great Outdoors,Park,Trail,Wine Shop,Department Store
20,M5K,Downtown Toronto,"Design Exchange,Toronto Dominion Centre",43.647177,-79.381576,0,Coffee Shop,Restaurant,Café,Bar,Bakery
21,M5L,Downtown Toronto,"Commerce Court,Victoria Hotel",43.648198,-79.379817,0,Coffee Shop,Café,Gastropub,Restaurant,Japanese Restaurant
24,M5R,Central Toronto,"The Annex,North Midtown,Yorkville",43.67271,-79.405678,0,Sandwich Place,Cheese Shop,Vegetarian / Vegan Restaurant,Burger Joint,Middle Eastern Restaurant
25,M5S,Downtown Toronto,"Harbord,University of Toronto",43.662696,-79.400049,0,Café,Sandwich Place,Coffee Shop,Pizza Place,Chinese Restaurant
26,M5T,Downtown Toronto,"Chinatown,Grange Park,Kensington Market",43.653206,-79.400049,0,Café,Chinese Restaurant,Bar,Vietnamese Restaurant,Bakery
27,M5V,Downtown Toronto,"CN Tower,Bathurst Quay,Island airport,Harbourf...",43.628947,-79.39442,0,Airport Food Court,Airport Lounge,Airport Terminal,Coffee Shop,Wine Shop
19,M5J,Downtown Toronto,"Harbourfront East,Toronto Islands,Union Station",43.640816,-79.381752,0,Coffee Shop,Café,Sporting Goods Shop,Sports Bar,Hotel
28,M5W,Downtown Toronto,Stn A PO Boxes 25 The Esplanade,43.646435,-79.374846,0,Cocktail Bar,Beer Bar,Moroccan Restaurant,Coffee Shop,Café
30,M6G,Downtown Toronto,Christie,43.669542,-79.422564,0,Grocery Store,Gym / Fitness Center,American Restaurant,Coffee Shop,Café


# Visulaize the clusters

In [77]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)
import matplotlib.cm as cm
import matplotlib.colors as colors
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Postal Code'], toronto_merged['Borough'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine the clusters

# Cluster 1

In [78]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,East Toronto,0,Other Great Outdoors,Park,Trail,Wine Shop,Department Store
20,Downtown Toronto,0,Coffee Shop,Restaurant,Café,Bar,Bakery
21,Downtown Toronto,0,Coffee Shop,Café,Gastropub,Restaurant,Japanese Restaurant
24,Central Toronto,0,Sandwich Place,Cheese Shop,Vegetarian / Vegan Restaurant,Burger Joint,Middle Eastern Restaurant
25,Downtown Toronto,0,Café,Sandwich Place,Coffee Shop,Pizza Place,Chinese Restaurant
26,Downtown Toronto,0,Café,Chinese Restaurant,Bar,Vietnamese Restaurant,Bakery
27,Downtown Toronto,0,Airport Food Court,Airport Lounge,Airport Terminal,Coffee Shop,Wine Shop
19,Downtown Toronto,0,Coffee Shop,Café,Sporting Goods Shop,Sports Bar,Hotel
28,Downtown Toronto,0,Cocktail Bar,Beer Bar,Moroccan Restaurant,Coffee Shop,Café
30,Downtown Toronto,0,Grocery Store,Gym / Fitness Center,American Restaurant,Coffee Shop,Café


# Cluster 2

In [79]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
22,Central Toronto,1,Health & Beauty Service,Wine Shop,Dessert Shop,Falafel Restaurant,Ethiopian Restaurant


# Cluster 3

In [80]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
6,Central Toronto,2,Sushi Restaurant,Wine Shop,Dessert Shop,Ethiopian Restaurant,Electronics Store


# Cluster 4

In [81]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
4,Central Toronto,3,Photography Studio,Wine Shop,Department Store,Ethiopian Restaurant,Electronics Store


# Cluster 5

In [82]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
23,Central Toronto,4,Locksmith,Wine Shop,Farmers Market,Ethiopian Restaurant,Electronics Store


# Conclusion

In [85]:
print('Most of the neighborhoods fall into Cluster 1 which are mostly business areas with cafe, restaurants, supermarkets etc. Cluster 2 is just a garden, Cluster 3 are playground and park, Cluster 4 park and swim school, and lastly Cluster 5 park and trail.')

Most of the neighborhoods fall into Cluster 1 which are mostly business areas with cafe, restaurants, supermarkets etc. Cluster 2 is just a garden, Cluster 3 are playground and park, Cluster 4 park and swim school, and lastly Cluster 5 park and trail.
