<a href="https://colab.research.google.com/github/rnpcs/IBM-applied-ds/blob/main/Segmenting_and_Clustering_Toronto.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [170]:
#!pip install geocoder
#!pip install folium


In [171]:
import pandas as pd
import numpy as np
import requests
import geocoder
import folium

import matplotlib.cm as cm
import matplotlib.colors as colors


from sklearn.cluster import KMeans
from bs4 import BeautifulSoup 

#Part 1 - Buidling a DataFrame

In [172]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [173]:
request = requests.get(url)

In [174]:
soup = BeautifulSoup(request.text, 'html.parser')

In [175]:
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)


In [176]:
# print(table_contents)
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

In [177]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


In [178]:
df.shape[0]

103

#Part 2 - Adding Latitude and Longitude

In [179]:
geo = pd.read_csv('/content/Geospatial_Coordinates.csv')

In [180]:
geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [181]:
df = pd.merge(df, geo, left_on='PostalCode', right_on='Postal Code', how='inner').drop('Postal Code',axis=1)

In [182]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


# Part 3 - Explore any Neighborhood that contains 'park' in the name

Create a new data frame for 'park' related hood

In [183]:
new_df = df[df['Neighborhood'].str.lower().str.contains('park') == True]

In [184]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

Function for getting nearby menus

In [185]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [186]:
#Applying the above function
tor_park_venues= getNearbyVenues(new_df['Neighborhood'],
                                 new_df['Latitude'],
                                 new_df['Longitude'],
                                 )

Parkwoods
Regent Park, Harbourfront
Parkview Hill, Woodbine Gardens
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Berczy Park
Thorncliffe Park
Northwood Park, York University
Kennedy Park, Ionview, East Birchmount Park
Brockton, Parkdale Village, Exhibition Place
North Park, Maple Leaf Park, Upwood Park
Bedford Park, Lawrence Manor East
Lawrence Park
Dorset Park, Wexford Heights, Scarborough Town Centre
High Park, The Junction South
Parkdale, Roncesvalles
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East
Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West


In [187]:
tor_park_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
1,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
4,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery


In [188]:
tor_park_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bedford Park, Lawrence Manor East",23,23,23,23,23,23
Berczy Park,57,57,57,57,57,57
"Brockton, Parkdale Village, Exhibition Place",24,24,24,24,24,24
"Dorset Park, Wexford Heights, Scarborough Town Centre",6,6,6,6,6,6
"High Park, The Junction South",24,24,24,24,24,24
"Kennedy Park, Ionview, East Birchmount Park",5,5,5,5,5,5
"Kensington Market, Chinatown, Grange Park",66,66,66,66,66,66
Lawrence Park,3,3,3,3,3,3
"Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West",16,16,16,16,16,16
"Moore Park, Summerhill East",2,2,2,2,2,2


Exploring the  hood

In [189]:
# one hot encoding
park_oneshot = pd.get_dummies(tor_park_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
park_oneshot['Neighborhood'] = tor_park_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [park_oneshot.columns[-1]] + list(park_oneshot.columns[:-1])
park_oneshot = park_oneshot[fixed_columns]

park_oneshot.head()

Unnamed: 0,Neighborhood,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Athletics & Sports,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Bookstore,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Line,Bus Station,Butcher,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,...,Office,Organic Grocery,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Poutine Place,Pub,Record Shop,Restaurant,Sandwich Place,Seafood Restaurant,Shoe Store,Smoothie Shop,Social Club,Spa,Speakeasy,Sporting Goods Shop,Stadium,Steakhouse,Summer Camp,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Tailor Shop,Tanning Salon,Tea Room,Thai Restaurant,Theater,Thrift / Vintage Store,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Check out details before clustering

In [190]:
park_grouped = park_oneshot.groupby('Neighborhood').mean().reset_index()
park_grouped

Unnamed: 0,Neighborhood,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Athletics & Sports,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Bookstore,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Line,Bus Station,Butcher,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,...,Office,Organic Grocery,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Poutine Place,Pub,Record Shop,Restaurant,Sandwich Place,Seafood Restaurant,Shoe Store,Smoothie Shop,Social Club,Spa,Speakeasy,Sporting Goods Shop,Stadium,Steakhouse,Summer Camp,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Tailor Shop,Tanning Salon,Tea Room,Thai Restaurant,Theater,Thrift / Vintage Store,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Yoga Studio
0,"Bedford Park, Lawrence Manor East",0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.086957,0.043478,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.043478,0.0,0.043478,0.086957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Berczy Park,0.0,0.0,0.017544,0.0,0.0,0.017544,0.052632,0.0,0.0,0.0,0.0,0.017544,0.017544,0.0,0.035088,0.0,0.0,0.017544,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.035088,0.0,0.0,0.0,0.017544,0.052632,0.087719,0.017544,0.017544,0.0,...,0.0,0.0,0.017544,0.0,0.0,0.017544,0.0,0.0,0.017544,0.0,0.035088,0.0,0.035088,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.041667,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.083333,0.0,0.0,0.0,...,0.041667,0.0,0.0,0.041667,0.041667,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Dorset Park, Wexford Heights, Scarborough Town...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0
4,"High Park, The Junction South",0.0,0.041667,0.0,0.041667,0.0,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Kennedy Park, Ionview, East Birchmount Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Kensington Market, Chinatown, Grange Park",0.0,0.0,0.0,0.015152,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.015152,0.015152,0.0,0.015152,0.0,0.0,0.015152,0.015152,0.030303,0.015152,0.0,0.0,0.0,0.075758,0.0,0.030303,0.015152,0.0,0.0,0.0,0.0,0.015152,0.045455,0.015152,0.0,0.0,...,0.0,0.015152,0.030303,0.0,0.0,0.015152,0.015152,0.015152,0.0,0.015152,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.045455,0.045455,0.0,0.015152,0.0,0.0
7,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Mimico NW, The Queensway West, South of Bloor,...",0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0
9,"Moore Park, Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Check the top 5

In [191]:
num_top_venues = 5

for hood in park_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = park_grouped[park_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bedford Park, Lawrence Manor East----
                 venue  freq
0   Italian Restaurant  0.09
1          Coffee Shop  0.09
2       Sandwich Place  0.09
3  American Restaurant  0.04
4     Sushi Restaurant  0.04


----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1        Cocktail Bar  0.05
2              Bakery  0.05
3  Seafood Restaurant  0.04
4  Italian Restaurant  0.04


----Brockton, Parkdale Village, Exhibition Place----
            venue  freq
0            Café  0.12
1  Breakfast Spot  0.08
2          Bakery  0.08
3     Coffee Shop  0.08
4      Restaurant  0.04


----Dorset Park, Wexford Heights, Scarborough Town Centre----
                    venue  freq
0       Indian Restaurant  0.33
1               Pet Store  0.17
2   Vietnamese Restaurant  0.17
3      Chinese Restaurant  0.17
4  Thrift / Vintage Store  0.17


----High Park, The Junction South----
                venue  freq
0     Thai Restaurant  0.08
1                Café  0.08
2  Mexican Rest

Clustering

In [192]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [193]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = park_grouped['Neighborhood']

for ind in np.arange(park_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(park_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bedford Park, Lawrence Manor East",Coffee Shop,Sandwich Place,Italian Restaurant,Sushi Restaurant,Comfort Food Restaurant,Liquor Store,Café,Pharmacy,Pizza Place,Butcher
1,Berczy Park,Coffee Shop,Bakery,Cocktail Bar,Cheese Shop,Italian Restaurant,Restaurant,Beer Bar,Seafood Restaurant,Farmers Market,Bagel Shop
2,"Brockton, Parkdale Village, Exhibition Place",Café,Breakfast Spot,Bakery,Coffee Shop,Italian Restaurant,Stadium,Nightclub,Office,Convenience Store,Performing Arts Venue
3,"Dorset Park, Wexford Heights, Scarborough Town...",Indian Restaurant,Pet Store,Vietnamese Restaurant,Chinese Restaurant,Thrift / Vintage Store,Department Store,Dog Run,Distribution Center,Discount Store,Diner
4,"High Park, The Junction South",Mexican Restaurant,Thai Restaurant,Café,Park,Bookstore,Gastropub,Grocery Store,Flea Market,Fast Food Restaurant,Italian Restaurant


In [194]:
# set number of clusters
kclusters = 5

park_grouped_cluster = park_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(park_grouped_cluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 3, 0, 2], dtype=int32)

In [195]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

park_merged = new_df

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
park_merged = park_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

park_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,4,Food & Drink Shop,Park,Fast Food Restaurant,Department Store,Dog Run,Distribution Center,Discount Store,Diner,Dessert Shop,Yoga Studio
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Park,Pub,Café,Bakery,Theater,Breakfast Spot,Shoe Store,Restaurant,Performing Arts Venue
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,0,Pizza Place,Flea Market,Gastropub,Breakfast Spot,Athletics & Sports,Pet Store,Pharmacy,Bank,Gym / Fitness Center,Café
11,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724,1,Bakery,Yoga Studio,Dessert Shop,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Department Store
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Bakery,Cocktail Bar,Cheese Shop,Italian Restaurant,Restaurant,Beer Bar,Seafood Restaurant,Farmers Market,Bagel Shop


Custering visualization

In [196]:
# create map
latitude = 43.6532
longitude = -79.3832
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(park_merged['Latitude'], park_merged['Longitude'], park_merged['Neighborhood'], park_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Check the value of each Cluster

In [197]:
park_merged['Cluster Labels'].value_counts()

0    15
3     2
4     1
2     1
1     1
Name: Cluster Labels, dtype: int64

Explore Cluster 0

In [198]:
park_merged.loc[park_merged['Cluster Labels'] == 0, park_merged.columns[[1] + list(range(5, park_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,0,Coffee Shop,Park,Pub,Café,Bakery,Theater,Breakfast Spot,Shoe Store,Restaurant,Performing Arts Venue
8,East York,0,Pizza Place,Flea Market,Gastropub,Breakfast Spot,Athletics & Sports,Pet Store,Pharmacy,Bank,Gym / Fitness Center,Café
20,Downtown Toronto,0,Coffee Shop,Bakery,Cocktail Bar,Cheese Shop,Italian Restaurant,Restaurant,Beer Bar,Seafood Restaurant,Farmers Market,Bagel Shop
29,East York,0,Indian Restaurant,Gym,Park,Pharmacy,Pizza Place,Restaurant,Sandwich Place,Discount Store,Burger Joint,Intersection
34,North York,0,Bar,Caribbean Restaurant,Massage Studio,Metro Station,Coffee Shop,Furniture / Home Store,Convenience Store,Creperie,Cuban Restaurant,Construction & Landscaping
38,Scarborough,0,Hobby Shop,Convenience Store,Bus Station,Department Store,Coffee Shop,Creperie,Cuban Restaurant,Construction & Landscaping,Dessert Shop,Dumpling Restaurant
43,West Toronto,0,Café,Breakfast Spot,Bakery,Coffee Shop,Italian Restaurant,Stadium,Nightclub,Office,Convenience Store,Performing Arts Venue
49,North York,0,Basketball Court,Park,Construction & Landscaping,Trail,Bakery,Yoga Studio,Doner Restaurant,Dog Run,Distribution Center,Discount Store
55,North York,0,Coffee Shop,Sandwich Place,Italian Restaurant,Sushi Restaurant,Comfort Food Restaurant,Liquor Store,Café,Pharmacy,Pizza Place,Butcher
65,Scarborough,0,Indian Restaurant,Pet Store,Vietnamese Restaurant,Chinese Restaurant,Thrift / Vintage Store,Department Store,Dog Run,Distribution Center,Discount Store,Diner


Explore Cluster 1

In [199]:
park_merged.loc[park_merged['Cluster Labels'] == 1, park_merged.columns[[1] + list(range(5, park_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Etobicoke,1,Bakery,Yoga Studio,Dessert Shop,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Department Store


Explore Cluster 2

In [200]:
park_merged.loc[park_merged['Cluster Labels'] == 2, park_merged.columns[[1] + list(range(5, park_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
83,Central Toronto,2,Summer Camp,Restaurant,Department Store,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Dessert Shop,Cuban Restaurant


Explore Cluster 3

In [201]:
park_merged.loc[park_merged['Cluster Labels'] == 3, park_merged.columns[[1] + list(range(5, park_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
61,Central Toronto,3,Park,Swim School,Bus Line,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dessert Shop,Department Store
101,Etobicoke,3,Park,Baseball Field,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Dessert Shop


Explore Cluster 4

In [202]:
park_merged.loc[park_merged['Cluster Labels'] == 4, park_merged.columns[[1] + list(range(5, park_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,4,Food & Drink Shop,Park,Fast Food Restaurant,Department Store,Dog Run,Distribution Center,Discount Store,Diner,Dessert Shop,Yoga Studio
