# Applied Data Science Capstone Coursera Week 3 Assignment

Goal: Segmenting and clustering neighborhoods in the city of Toronto, Canada

# Part 1

Import necessary libraries

In [1]:
import pandas as pd
import numpy as np

Read table into dataframe

In [2]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
df.rename(columns={'Neighbourhood':'Neighborhood'}, inplace=True)
df.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


Ignore cells with a borough that is Not assigned.

In [3]:
df = df[df['Borough'] != 'Not assigned']
df.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


Combining rows into one row with the neighborhoods separated with a comma.

In [4]:
df_new = df.groupby('Postal Code', sort=False).agg(', '.join)
df_new.head(10)

Unnamed: 0_level_0,Borough,Neighborhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
M1B,Scarborough,"Malvern, Rouge"
M3B,North York,Don Mills
M4B,East York,"Parkview Hill, Woodbine Gardens"
M5B,Downtown Toronto,"Garden District, Ryerson"


Replacing cells with a borough but a Not assigned neighborhood with the neighborhood same as the borough.

In [5]:
df_new.loc[df_new['Neighborhood'] =='Not assigned', 'Neighborhood'] = df_new.loc[df_new['Neighborhood'] =='Not assigned', 'Borough']
df_new.reset_index(inplace=True)
df_new.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


Displaying shape of final dataframe

In [6]:
df_new.shape

(103, 3)

# Part 2

Importing geocoder for getting latitude and logitude

In [7]:
!pip install geocoder
import geocoder
print('Done')

Done


Creating empty columns for latitude and longitude

In [8]:
df_new['Latitude'] = None
df_new['Longitude'] = None
df_new.head(5)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,,
1,M4A,North York,Victoria Village,,
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",,
3,M6A,North York,"Lawrence Manor, Lawrence Heights",,
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",,


Obtaining latitude and longitude from geocoder.arcgis and adding it to df_new

In [9]:
for i, pc in enumerate(df_new['Postal Code']):
    lat_lng_coords = None
    
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(pc))
        lat_lng_coords = g.latlng
    
    if lat_lng_coords:
        latitude = lat_lng_coords[0]
        longitude = lat_lng_coords[1]
    
    df_new.loc[i, 'Latitude'] = latitude
    df_new.loc[i, 'Longitude'] = longitude

df_new.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7519,-79.3304
1,M4A,North York,Victoria Village,43.7304,-79.3128
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6551,-79.3626
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7232,-79.4514
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6645,-79.393
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.6628,-79.5283
6,M1B,Scarborough,"Malvern, Rouge",43.8115,-79.1955
7,M3B,North York,Don Mills,43.7493,-79.3617
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.7079,-79.3116
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6574,-79.3782


# Part 3

Using Geopy to get latitude and longitude of Toronto for visualization

In [11]:
!pip install geopy
from geopy.geocoders import Nominatim

address = 'Toronto, CA'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.




Importing folium for creating visualizations

In [12]:
!pip install folium
import folium
print('Done')

Done


Creating the map of Toronto with neighborhoods superimposed on top

In [13]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, borough, neighborhood in zip(df_new['Latitude'], df_new['Longitude'], df_new['Borough'], df_new['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Creating Foursquare Credentials

In [1]:
CLIENT_ID = 'Y0HXHNP4OTTXTRGTH1K01HO2XTRUZ13I2LV3PIFL1GLJ2FVM' #Please Input Personalized Credentials here
CLIENT_SECRET = 'VZGSX02NYILB1HQQSFP5RCMRBZRDOHIOHQ2N2WYVGU0ZMNNU'
VERSION = '20200803'

Creating a function to process all neighborhoods and obtai their nearby venues

In [16]:
import requests
LIMIT = 50
radius = 500

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Running the function getNearbyVenues on each neighborhood and creating a new dataframe called venues

In [17]:
venues = getNearbyVenues(names=df_new['Neighborhood'], latitudes=df_new['Latitude'], longitudes=df_new['Longitude'])

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

In [18]:
print(venues.shape)
venues.head()

(1855, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.75188,-79.33036,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.75188,-79.33036,PetSmart,43.748639,-79.333488,Pet Store
2,Parkwoods,43.75188,-79.33036,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.73042,-79.31282,Memories of Africa,43.726602,-79.312427,Grocery Store
4,Victoria Village,43.73042,-79.31282,The Retreat Nail & Beauty Bar,43.726134,-79.312205,Nail Salon


Number of venues returned for each neighborhood:

In [19]:
venues.groupby('Neighborhood', as_index=False).count()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Agincourt,8,8,8,8,8,8
1,"Alderwood, Long Branch",5,5,5,5,5,5
2,"Bathurst Manor, Wilson Heights, Downsview North",3,3,3,3,3,3
3,Bayview Village,5,5,5,5,5,5
4,"Bedford Park, Lawrence Manor East",20,20,20,20,20,20
5,Berczy Park,50,50,50,50,50,50
6,"Birch Cliff, Cliffside West",5,5,5,5,5,5
7,"Brockton, Parkdale Village, Exhibition Place",50,50,50,50,50,50
8,"Business reply mail Processing Centre, South C...",50,50,50,50,50,50
9,"CN Tower, King and Spadina, Railway Lands, Har...",50,50,50,50,50,50


Checking how many unique categories there are:

In [20]:
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))

There are 246 uniques categories.


## Analyzing each neighborhood

In [21]:
# one hot encoding
venues_onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

venues_onehot.head()

Unnamed: 0,Airport,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Adding the neighborhood column into venues_onehot dataframe

In [22]:
venues_onehot['Neighborhood'] = venues['Neighborhood']
venues_onehot.head()

Unnamed: 0,Airport,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Moving the Neighborhood column to the first column

In [23]:
temp = list(venues_onehot.columns)

if 'Neighborhood' in temp:
    temp.remove('Neighborhood')
    
fixed_columns = ['Neighborhood'] + temp
venues_onehot = venues_onehot[fixed_columns]

venues_onehot.head()

Unnamed: 0,Neighborhood,Airport,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Groupby neighborhood and take mean for all categories

In [24]:
venues_grouped = venues_onehot.groupby('Neighborhood', sort=False).mean().reset_index()
print(venues_grouped.shape)
venues_grouped.head(10)

(97, 246)


Unnamed: 0,Neighborhood,Airport,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,Parkwoods,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Victoria Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Regent Park, Harbourfront",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.045455,0.0
3,"Lawrence Manor, Lawrence Heights",0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0
4,"Queen's Park, Ontario Provincial Government",0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Islington Avenue, Humber Valley Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Malvern, Rouge",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5
7,Don Mills,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Parkview Hill, Woodbine Gardens",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Garden District, Ryerson",0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Viewing the top 3 categories in each neighborhood

In [25]:
num_top_venues = 3

for hood in venues_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = venues_grouped[venues_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Parkwoods----
               venue  freq
0          Pet Store  0.33
1               Park  0.33
2  Food & Drink Shop  0.33


----Victoria Village----
           venue  freq
0     Nail Salon  0.33
1  Grocery Store  0.33
2   Intersection  0.33


----Regent Park, Harbourfront----
            venue  freq
0     Coffee Shop  0.23
1  Breakfast Spot  0.09
2             Spa  0.05


----Lawrence Manor, Lawrence Heights----
                    venue  freq
0          Clothing Store  0.22
1  Furniture / Home Store  0.04
2              Restaurant  0.04


----Queen's Park, Ontario Provincial Government----
            venue  freq
0     Coffee Shop  0.16
1  Sandwich Place  0.06
2            Park  0.06


----Islington Avenue, Humber Valley Village----
           venue  freq
0       Pharmacy  0.22
1  Grocery Store  0.11
2           Café  0.11


----Malvern, Rouge----
          venue  freq
0   Zoo Exhibit   0.5
1  Home Service   0.5
2   Yoga Studio   0.0


----Don Mills----
          venue  freq
0  In

### Creating a dataframe with most common venues

First defining a function to sort venues

In [26]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Creating the dataframe and displaying the top 5 venues for each neighborhood

In [27]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = venues_grouped['Neighborhood']

for ind in np.arange(venues_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(venues_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Parkwoods,Pet Store,Park,Food & Drink Shop,Donut Shop,Fish & Chips Shop
1,Victoria Village,Nail Salon,Intersection,Grocery Store,Zoo Exhibit,Event Space
2,"Regent Park, Harbourfront",Coffee Shop,Breakfast Spot,Theater,Event Space,Spa
3,"Lawrence Manor, Lawrence Heights",Clothing Store,Restaurant,Men's Store,Food Court,Toy / Game Store
4,"Queen's Park, Ontario Provincial Government",Coffee Shop,Café,Park,Sandwich Place,Persian Restaurant
5,"Islington Avenue, Humber Valley Village",Pharmacy,Bank,Park,Skating Rink,Home Service
6,"Malvern, Rouge",Zoo Exhibit,Home Service,Dumpling Restaurant,Flea Market,Fish Market
7,Don Mills,Intersection,Coffee Shop,Soccer Field,Park,Gas Station
8,"Parkview Hill, Woodbine Gardens",Pizza Place,Pet Store,Gastropub,Rock Climbing Spot,Bank
9,"Garden District, Ryerson",Coffee Shop,Café,Ramen Restaurant,Clothing Store,Bookstore


## KMeans Clustering to cluster neighborhoods

In [28]:
from sklearn.cluster import KMeans

Running KMeans algorithm with 5 clusters

In [29]:
k = 5
venues_grouped_clustering = venues_grouped.drop('Neighborhood', axis=1)
kmeans = KMeans(n_clusters=k, random_state=67).fit(venues_grouped_clustering)
kmeans.labels_

array([0, 0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 0, 0, 2, 0, 2,
       2, 2, 2, 0, 0, 3, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2,
       3, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0, 0, 0, 1, 0, 2,
       4, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 1, 2, 2, 0, 2, 2, 0, 2,
       2, 2, 2, 2, 0, 2, 2, 2, 2], dtype=int32)

Create a new dataframe that includes cluster labels and the top 5 venues

In [30]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
venues_merged = df_new
venues_merged = venues_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
venues_merged.dropna(inplace=True)

Viewing the newly created dataframe

In [31]:
print(venues_merged.shape)
venues_merged['Cluster Labels'] = venues_merged['Cluster Labels'].astype(int)
venues_merged

(101, 11)


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M3A,North York,Parkwoods,43.7519,-79.3304,0,Pet Store,Park,Food & Drink Shop,Donut Shop,Fish & Chips Shop
1,M4A,North York,Victoria Village,43.7304,-79.3128,0,Nail Salon,Intersection,Grocery Store,Zoo Exhibit,Event Space
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6551,-79.3626,2,Coffee Shop,Breakfast Spot,Theater,Event Space,Spa
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7232,-79.4514,2,Clothing Store,Restaurant,Men's Store,Food Court,Toy / Game Store
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6645,-79.393,2,Coffee Shop,Café,Park,Sandwich Place,Persian Restaurant
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.6628,-79.5283,0,Pharmacy,Bank,Park,Skating Rink,Home Service
6,M1B,Scarborough,"Malvern, Rouge",43.8115,-79.1955,0,Zoo Exhibit,Home Service,Dumpling Restaurant,Flea Market,Fish Market
7,M3B,North York,Don Mills,43.7493,-79.3617,2,Intersection,Coffee Shop,Soccer Field,Park,Gas Station
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.7079,-79.3116,2,Pizza Place,Pet Store,Gastropub,Rock Climbing Spot,Bank
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6574,-79.3782,2,Coffee Shop,Café,Ramen Restaurant,Clothing Store,Bookstore


Creating a map to visualize clustering

In [32]:
import matplotlib.cm as cm
import matplotlib.colors as colors

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(venues_merged['Latitude'], venues_merged['Longitude'], venues_merged['Neighborhood'], venues_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Viewing members of each cluster

In [33]:
venues_merged.loc[venues_merged['Cluster Labels'] == 0, venues_merged.columns[[1] + list(range(5, venues_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,North York,0,Pet Store,Park,Food & Drink Shop,Donut Shop,Fish & Chips Shop
1,North York,0,Nail Salon,Intersection,Grocery Store,Zoo Exhibit,Event Space
5,Etobicoke,0,Pharmacy,Bank,Park,Skating Rink,Home Service
6,Scarborough,0,Zoo Exhibit,Home Service,Dumpling Restaurant,Flea Market,Fish Market
12,Scarborough,0,Bar,Zoo Exhibit,Eastern European Restaurant,Flea Market,Fish Market
16,York,0,Park,Hockey Arena,Field,Grocery Store,Coffee Shop
18,Scarborough,0,Construction & Landscaping,Park,Gym / Fitness Center,Zoo Exhibit,Falafel Restaurant
19,East Toronto,0,Health Food Store,Trail,Pub,College Gym,Fast Food Restaurant
21,York,0,Park,Gym,Bakery,Beer Store,Mexican Restaurant
26,Scarborough,0,Playground,Trail,Zoo Exhibit,Donut Shop,Fish & Chips Shop


In [34]:
venues_merged.loc[venues_merged['Cluster Labels'] == 1, venues_merged.columns[[1] + list(range(5, venues_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
68,Central Toronto,1,Pharmacy,Park,Donut Shop,Fish & Chips Shop,Field
85,Scarborough,1,Pharmacy,Flower Shop,Fish Market,Fish & Chips Shop,Field


In [35]:
venues_merged.loc[venues_merged['Cluster Labels'] == 2, venues_merged.columns[[1] + list(range(5, venues_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,Downtown Toronto,2,Coffee Shop,Breakfast Spot,Theater,Event Space,Spa
3,North York,2,Clothing Store,Restaurant,Men's Store,Food Court,Toy / Game Store
4,Downtown Toronto,2,Coffee Shop,Café,Park,Sandwich Place,Persian Restaurant
7,North York,2,Intersection,Coffee Shop,Soccer Field,Park,Gas Station
8,East York,2,Pizza Place,Pet Store,Gastropub,Rock Climbing Spot,Bank
9,Downtown Toronto,2,Coffee Shop,Café,Ramen Restaurant,Clothing Store,Bookstore
10,North York,2,Pizza Place,Mediterranean Restaurant,Latin American Restaurant,Rental Car Location,Japanese Restaurant
11,Etobicoke,2,Movie Theater,Jewelry Store,Zoo Exhibit,Eastern European Restaurant,Fish Market
13,North York,2,Intersection,Coffee Shop,Soccer Field,Park,Gas Station
14,East York,2,Bus Line,Café,Grocery Store,Doctor's Office,Fast Food Restaurant


In [36]:
venues_merged.loc[venues_merged['Cluster Labels'] == 3, venues_merged.columns[[1] + list(range(5, venues_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
28,North York,3,IT Services,Park,Music Venue,Zoo Exhibit,Eastern European Restaurant
45,North York,3,Music Venue,Zoo Exhibit,Dumpling Restaurant,Fish Market,Fish & Chips Shop


In [37]:
venues_merged.loc[venues_merged['Cluster Labels'] == 4, venues_merged.columns[[1] + list(range(5, venues_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
71,Scarborough,4,Auto Garage,Zoo Exhibit,Dumpling Restaurant,Flea Market,Fish Market
