# Segmenting and Clustering the Neighborhoods of Toronto

## Importing Libraries and all dependencies needed

In [88]:
!pip install folium
!pip install geopy

import pandas as pd # Library for data analsysis
import numpy as np # Library to handle data in a vectorized manner

from geopy.geocoders import Nominatim # Convert an address into latitude and longitude values

import requests # Library to handle requests
import json # Library to handle JSON files
from pandas.io.json import json_normalize # Tranform JSON file into a pandas dataframe

import folium # Map rendering library

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# Import k-means from clustering stage
from sklearn.cluster import KMeans



### Scaping data from website

In [89]:
# Scrap data from website and insert into a Pandas dataframe
df = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M", header=0)[0]

# Make dataframe wider to display longer names
pd.options.display.max_colwidth = 200

# Make dataframe display 3o total rows for previewing purposes
pd.options.display.max_rows = 30

# Display dataframe
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


In [90]:
# Print number of rows and columns in the dataframe
print("There are {} rows and {} columns in the Dataframe.".format(df.shape[0], df.shape[1]))

There are 180 rows and 3 columns in the Dataframe.


Cleaning the data

In [91]:
# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
df['Neighborhood'] = np.where(df['Neighborhood'] == 'Not assigned', df['Borough'], df['Neighborhood'])

# Ignore cells with a borough that is Not assigned
df = df[df.Borough != 'Not assigned'].reset_index(drop=True)

# Display dataframe
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [92]:
print("There are {} rows and {} columns in the Dataframe.".format(df.shape[0], df.shape[1]))

There are 103 rows and 3 columns in the Dataframe.


Reading coordinates for each Postal Code from csv file

In [93]:
# Read CSV of all the coordinates for each Postal Code
postal_data = pd.read_csv('http://cocl.us/Geospatial_data')

# Merge dataframes
df_toronto = pd.merge(df, postal_data, on='Postal Code')

# Display dataframe
df_toronto

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


### Generating the coordinates of Toronto

In [94]:
address = 'Toronto'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The coordinates of Toronto are {}, {}'.format(latitude, longitude))

The coordinates of Toronto are 43.6534817, -79.3839347


### Using coordinates to create map of Toronto

In [95]:
# Create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# Add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  

# Display map
map_toronto

Creating new DataFrame with only boroughs containing the word Etobicoke

In [96]:
# Create a new DataFrame with only boroughs that contain the word Etobicoke
df_toronto_york = df_toronto[df_toronto['Borough'].str.contains('Etobicoke')].reset_index(drop=True)

# Display dataframe
df_toronto_york

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
1,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale",43.650943,-79.554724
2,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",43.643515,-79.577201
3,M9P,Etobicoke,Westmount,43.696319,-79.532242
4,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens",43.688905,-79.554724
5,M8V,Etobicoke,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321
6,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens",43.739416,-79.588437
7,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484
8,M9W,Etobicoke,"Northwest, West Humber - Clairville",43.706748,-79.594054
9,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944


### Specifying size of new dataframe

In [97]:
print("There are {} rows and {} columns in the Dataframe.".format(df_toronto_york.shape[0], df_toronto_york.shape[1]))
print('There are {} uniques Boroughs that have the word Etobicoke in it.'.format(len(df_toronto_york['Borough'].unique())))
print('There names of the Boroughs that have the word Etobicoke in it are {}.'.format(df_toronto_york['Borough'].unique()))

There are 12 rows and 5 columns in the Dataframe.
There are 1 uniques Boroughs that have the word Etobicoke in it.
There names of the Boroughs that have the word Etobicoke in it are ['Etobicoke'].


### Using folium to create map of Toronto using latitude and longitude values

In [98]:
# Create map of Toronto using latitude and longitude values
map_toronto_york = folium.Map(location=[latitude, longitude], zoom_start=11)

# Add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto_york['Latitude'], df_toronto_york['Longitude'], df_toronto_york['Borough'], df_toronto_york['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto_york)  

# Display map
map_toronto_york

# Foursquare Credentials

In [99]:
CLIENT_ID = '35L0RXF1IBSS51VOHJB1SB5SGNRNCEH0JIWRRXDUZXIXW0QD'
CLIENT_SECRET = '4N1FDBZWUTRZJOZCFEKREV4HQF11FGFC4ZSNK0ZJUVRYFGYE'
VERSION = '20180605'
print('Your credentials:')
print('CLIENT_ID:'+CLIENT_ID)
print('CLIENT_SECRET:'+CLIENT_SECRET)

Your credentials:
CLIENT_ID:35L0RXF1IBSS51VOHJB1SB5SGNRNCEH0JIWRRXDUZXIXW0QD
CLIENT_SECRET:4N1FDBZWUTRZJOZCFEKREV4HQF11FGFC4ZSNK0ZJUVRYFGYE


# Exploring Nearby Venues

In [100]:
# Create definition to get nearby venues
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[] # Create empty list to store venues
    
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # Create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET,
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # Make the GET request
        result = requests.get(url).json()
        
        results = result["response"]['groups'][0]['items']

        # Append only relevant information for each nearby venue to the list
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'], 
            v['venue']['categories'][0]['name']) for v in results])

    # Create Dataframe storing all the nearby venues and any relavant information
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [101]:
# Call function definition to get nearby venues
explore_neighborhoods = getNearbyVenues(names=df_toronto_york['Neighborhood'],
                                   latitudes=df_toronto_york['Latitude'],
                                   longitudes=df_toronto_york['Longitude']
                                  )

# Display dataframe
explore_neighborhoods

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale",43.650943,-79.554724,Haus Of Vine,43.649251,-79.549214,Brewery
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",43.643515,-79.577201,LCBO,43.642099,-79.576592,Liquor Store
2,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",43.643515,-79.577201,The Beer Store,43.641313,-79.576925,Beer Store
3,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",43.643515,-79.577201,Starbucks,43.641312,-79.576924,Coffee Shop
4,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",43.643515,-79.577201,Pizza Hut,43.641845,-79.576556,Pizza Place
5,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",43.643515,-79.577201,Shoppers Drug Mart,43.641312,-79.576924,Pharmacy
6,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",43.643515,-79.577201,Cafe Sympatico,43.641820,-79.576721,Café
7,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",43.643515,-79.577201,Pet Valu,43.641667,-79.577050,Pet Store
8,Westmount,43.696319,-79.532242,Mayflower Chinese Food,43.692753,-79.531566,Chinese Restaurant
9,Westmount,43.696319,-79.532242,Starbucks,43.696338,-79.533398,Coffee Shop


In [102]:
print('{} venues were returned by FourSquare.'.format(explore_neighborhoods.shape[0]))

71 venues were returned by FourSquare.


### Grouping Nearby Venues

In [103]:
df_count = explore_neighborhoods.groupby('Neighborhood').count()
df_count = df_count.drop(['Neighborhood Longitude', 'Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category'], axis=1)
df_count = df_count.rename(columns={'Neighborhood Latitude': 'Number of Venues'})
df_count

Unnamed: 0_level_0,Number of Venues
Neighborhood,Unnamed: 1_level_1
"Alderwood, Long Branch",8
"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",7
"Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens",4
"Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West",13
"New Toronto, Mimico South, Humber Bay Shores",16
"Northwest, West Humber - Clairville",2
"Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East",1
"South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens",10
"The Kingsway, Montgomery Road, Old Mill North",1
"West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale",1


In [105]:
print('There are {} unique categories.'.format(len(explore_neighborhoods['Venue Category'].unique())))
print('The names of the categories are {}.'.format(explore_neighborhoods['Venue Category'].unique()))

There are 40 unique categories.
The names of the categories are ['Brewery' 'Liquor Store' 'Beer Store' 'Coffee Shop' 'Pizza Place'
 'Pharmacy' 'Café' 'Pet Store' 'Chinese Restaurant' 'Sandwich Place'
 'Middle Eastern Restaurant' 'Intersection' 'Discount Store' 'Park'
 'Bus Line' 'Restaurant' 'Bakery' 'Fried Chicken Joint'
 'American Restaurant' 'Fast Food Restaurant' 'Seafood Restaurant'
 'Hobby Shop' 'Gym' 'Flower Shop' 'Mexican Restaurant' 'Grocery Store'
 'Video Store' 'Pub' 'Athletics & Sports' 'Rental Car Location'
 'Drugstore' 'River' 'Baseball Field' 'Wings Joint' 'Burger Joint'
 'Supplement Shop' 'Convenience Store' 'Hardware Store' 'Tanning Salon'
 'Kids Store'].


In [106]:
# One hot encoding
toronto_onehot = pd.get_dummies(explore_neighborhoods[['Venue Category']], prefix="", prefix_sep="")

# Add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = explore_neighborhoods['Neighborhood'] 

# Move neighborhood column to the first column
fixed_columns = list(toronto_onehot.columns[-1:]) + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

# Display Dataframe
toronto_onehot

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Bakery,Baseball Field,Beer Store,Brewery,Burger Joint,Bus Line,Café,...,Pub,Rental Car Location,Restaurant,River,Sandwich Place,Seafood Restaurant,Supplement Shop,Tanning Salon,Video Store,Wings Joint
0,"West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale",0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
7,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Westmount,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Westmount,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [77]:
# Group and get the mean
toronto_grouped = toronto_onehot.groupby(['Neighborhood']).mean().reset_index()

# Display Dataframe
toronto_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,...,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Women's Store,Yoga Studio
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,...,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Caledonia-Fairbanks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0
4,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Don Mills,0.0,0.0,0.0,0.037037,0.0,0.074074,0.037037,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Downsview,0.0,0.066667,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"East Toronto, Broadview North (Old East York)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Fairview, Henry Farm, Oriole",0.0,0.0,0.015625,0.0,0.0,0.015625,0.0,0.0,0.015625,...,0.0,0.015625,0.015625,0.0,0.015625,0.0,0.0,0.0,0.046875,0.0
9,Glencairn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Identifying most common venues

In [107]:
# Create definition to get most common venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Top 10 venues

In [108]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# Create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# Create a new dataframe
top_venues = pd.DataFrame(columns=columns)
top_venues['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    top_venues.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

# Display Dataframe
top_venues

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Fried Chicken Joint,Supermarket,Deli / Bodega,Sandwich Place,Bridal Shop,Shopping Mall,Middle Eastern Restaurant,Diner
1,Bayview Village,Chinese Restaurant,Café,Bank,Japanese Restaurant,Yoga Studio,Distribution Center,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice
2,"Bedford Park, Lawrence Manor East",Coffee Shop,Restaurant,Sandwich Place,Sushi Restaurant,Italian Restaurant,Indian Restaurant,Comfort Food Restaurant,Café,Butcher,Pharmacy
3,Caledonia-Fairbanks,Park,Women's Store,Convenience Store,Bar,Distribution Center,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Curling Ice,Deli / Bodega
4,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",Discount Store,Sandwich Place,Bar,Skating Rink,Yoga Studio,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
5,Don Mills,Gym,Japanese Restaurant,Coffee Shop,Restaurant,Asian Restaurant,Beer Store,Dim Sum Restaurant,Italian Restaurant,Sandwich Place,Discount Store
6,Downsview,Park,Grocery Store,Bank,Business Service,Liquor Store,Discount Store,Shopping Mall,Baseball Field,Snack Place,Athletics & Sports
7,"East Toronto, Broadview North (Old East York)",Park,Convenience Store,Intersection,Yoga Studio,Discount Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Curling Ice
8,"Fairview, Henry Farm, Oriole",Clothing Store,Coffee Shop,Fast Food Restaurant,Women's Store,Restaurant,Juice Bar,Food Court,Shoe Store,Bank,Japanese Restaurant
9,Glencairn,Pizza Place,Bakery,Japanese Restaurant,Italian Restaurant,Pub,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Curling Ice


## Clustering venues

In [109]:
# Set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# Run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# Check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 1, 0, 0, 0, 1, 0, 0], dtype=int32)

In [110]:
# Add clustering labels
top_venues.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_toronto_york

# Merge Dataframes to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(top_venues.set_index('Neighborhood'), on='Neighborhood')

# Display Dataframe
toronto_merged

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242,,,,,,,,,,,
1,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale",43.650943,-79.554724,,,,,,,,,,,
2,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",43.643515,-79.577201,,,,,,,,,,,
3,M9P,Etobicoke,Westmount,43.696319,-79.532242,,,,,,,,,,,
4,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens",43.688905,-79.554724,,,,,,,,,,,
5,M8V,Etobicoke,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321,,,,,,,,,,,
6,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens",43.739416,-79.588437,,,,,,,,,,,
7,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484,,,,,,,,,,,
8,M9W,Etobicoke,"Northwest, West Humber - Clairville",43.706748,-79.594054,,,,,,,,,,,
9,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944,,,,,,,,,,,


## Creating map clusters

In [113]:
# Create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi)+'Cluster'+str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color = rainbow[cluster-1],
        fill=True,
        fill_color = rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

# Display map
map_clusters

TypeError: list indices must be integers or slices, not float

### Cluster 1

In [114]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


## Cluster 2

In [115]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


## Cluster 3

In [116]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


## Cluster 4

In [117]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


## Cluster 5

In [118]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


### Conclusion
Clusters not populated due to error involving NaN values in df