# Peer-Graded Assignment: Segmenting and Clustering Neighborhoods in Toronto

Import necessary libraries for the assignment.

In [3]:
import pandas as pd # library to process data as dataframes
import urllib.request # library to import data from url
from bs4 import BeautifulSoup # library for working with HTML / XML

print('Libraries imported.')

Libraries imported.


## Assignment 1.1

### Scrape Wikipedia

In [4]:
# Specify which URL/web page we are going to be scraping
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

In [5]:
# Open the url using urllib.request and put the HTML into the page variable
page = urllib.request.urlopen(url)

In [6]:
# Use the BeautifulSoup library so we can parse HTML and XML documents
soup = BeautifulSoup(page, "lxml")

In [7]:
# Parse out everything contained in the table of "wikitable sortable" class
table=soup.find('table', class_='wikitable sortable')

In [8]:
# Check the type of the parsed table to ensure that Beautiful Soup methods can be used
type(table)

bs4.element.Tag

In [9]:
# Loop through rows of the table to parse out the three columns of data
A=[]
B=[]
C=[]

for row in table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)==3:
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))

# Remove unwanted delimiters from the lists
A = list([a.replace('\n', '') for a in A]) # remove all '\n' 
B = list([b.replace('\n', '') for b in B]) # remove all '\n' 
C = list([c.replace('\n', '') for c in C]) # remove all '\n' 

In [10]:
# Create a dataframe containing the three columns of data
df=pd.DataFrame(A, columns=['PostalCode'])
df['Borough']=B
df['Neighborhood']=C
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [11]:
# Delete rows containing 'Not assigned' boroughs
df.drop(df[df['Borough'] == 'Not assigned' ].index , inplace=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [12]:
# Assign borough name where neighborhood name is 'Not assigned'
df['Neighborhood'][df['Neighborhood'] == 'Not assigned'] = df['Borough']

# Reset the index
df.reset_index(inplace=True, drop=True)

In [13]:
df.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [14]:
df.shape

(103, 3)

## Assignment 1.2

In [15]:
df.to_csv('df_assignment_1_1.csv')

In [16]:
import geocoder # import geocoder

# initialize your variable to None
lat_lng_coords = None

# loop until you get the coordinates
while(lat_lng_coords is None):
  g = geocoder.google('M5G, Toronto, Ontario')
  lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

KeyboardInterrupt: 

In [15]:
# Load dataframe with coordinates as geocoder is not working
df_coords = pd.read_csv('Geospatial_Coordinates.csv')
df_coords.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [18]:
df = pd.merge(df, df_coords, how='inner', left_on='PostalCode', right_on='Postal Code',
                   sort=True, copy=True, indicator=False).drop('Postal Code', axis = 1)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Assignment 1.3

In [40]:
# Use only boroughs containing 'Toronto'
df_toronto = df[df['Borough'].str.contains('Toronto')] # df # Set 'df_toronto = df' to work with all boroughs
print(df_toronto.shape)
df_toronto.head()

(39, 5)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [41]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_toronto['Borough'].unique()),
        df_toronto.shape[0])
     )

The dataframe has 4 boroughs and 39 neighborhoods.


In [66]:
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import numpy as np

import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


#### Use geopy library to get the latitude and longitude values of Toronto
In order to define an instance of the geocoder, it is required to define a user_agent. I will name our agent <em>explorer</em>, as shown below.

In [43]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of {} are {}, {}.'.format(address, latitude, longitude))

The geograpical coordinates of Toronto, Canada are 43.6534817, -79.3839347.


#### Create a map of Toronto with neighborhoods superimposed on top.

In [44]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Define Foursquare Credentials and Version
Use the Foursquare API to get data on venues in each neighborhood.

In [100]:
# Import API Keys
with open('credentials.json') as creds:    
    credentials = json.load(creds)

In [104]:
CLIENT_ID = credentials['CLIENT_ID'] # your Foursquare ID
CLIENT_SECRET = credentials['CLIENT_SECRET'] # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
radius = 500
LIMIT = 100

Define a function to extract data from the JSON response from the API.

In [105]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            lat, 
            lng, 
            VERSION,
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()['response']['venues']

        # return only relevant information for each nearby venue        
        for v in results:   
            df = pd.json_normalize(v['categories'])
            if not df.empty:
                venues_list.append([(
                    name,
                    lat,
                    lng,
                    v['name'], 
                    v['location']['lat'], 
                    v['location']['lng'],  
                    df['name'][0])]) 

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [106]:
# Get data on venues in each neighborhood in Toronto
toronto_venues = getNearbyVenues(names=df_toronto['Neighborhood'],
                                   latitudes=df_toronto['Latitude'],
                                   longitudes=df_toronto['Longitude']
                                  )


The Beaches
The Danforth West, Riverdale
India Bazaar, The Beaches West
Studio District
Lawrence Park
Davisville North
North Toronto West,  Lawrence Park
Davisville
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Rosedale
St. James Town, Cabbagetown
Church and Wellesley
Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North & West, Forest Hill Road Park
The Annex, North Midtown, Yorkville
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Stn A PO Boxes
First Canadian Place, Underground city
Christie
Dufferin, Dovercourt Village


ConnectionError: HTTPSConnectionPool(host='api.foursquare.com', port=443): Max retries exceeded with url: /v2/venues/search?&client_id=YVES0BMLLYC4PFYRXZHJE4N2AZX2PBGWL1THBHCPWQCZLW2S&client_secret=XYK2LJZ2241BK02GHQMBQ3UTSLFN5XCBQC0G3SVEFVYN2CDZ&ll=43.66900510000001,-79.4422593&v=20180605&radius=500&limit=100 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x7fb0ab1b07d0>: Failed to establish a new connection: [Errno 60] Operation timed out'))

In [107]:
print(toronto_venues.shape)
toronto_venues.head()

(3285, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,Kew-Balmy Beach,43.667372,-79.295312,Beach
2,The Beaches,43.676357,-79.293031,Glen Stewart Park,43.675278,-79.294647,Park
3,The Beaches,43.676357,-79.293031,The Georgian,43.674395,-79.298086,Residential Building (Apartment / Condo)
4,The Beaches,43.676357,-79.293031,Groseille's Grotto,43.674311,-79.289162,Playground


Let's check the number of venues returned for each neighborhood

In [108]:
toronto_venues.groupby('Neighborhood').count()['Venue'].sort_values(ascending=False)

Neighborhood
University of Toronto, Harbord                                                                                96
Richmond, Adelaide, King                                                                                      96
First Canadian Place, Underground city                                                                        96
Central Bay Street                                                                                            95
The Danforth West, Riverdale                                                                                  94
Christie                                                                                                      94
Commerce Court, Victoria Hotel                                                                                93
Runnymede, Swansea                                                                                            93
Kensington Market, Chinatown, Grange Park                                          

In [109]:
print('There are {} unique categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 408 unique categories.


### Analyze each neighborhood
Start by one-hot encoding the venue data

In [110]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(3285, 409)


Unnamed: 0,Neighborhood,ATM,Accessories Store,Adult Boutique,Advertising Agency,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,...,Warehouse,Watch Shop,Water Park,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Group by neighborhood.

In [111]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
print(toronto_grouped.shape)
toronto_grouped

(39, 409)


Unnamed: 0,Neighborhood,ATM,Accessories Store,Adult Boutique,Advertising Agency,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,...,Warehouse,Watch Shop,Water Park,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.032609,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01087,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0,0.0,0.0,0.032258,0.016129,0.193548,0.032258,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.010638,0.0,0.0,0.021277,0.0
6,Church and Wellesley,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.010753,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Davisville,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.012658,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Print top venues for each neighborhood. 

In [112]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("---- "+ hood +" ----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues), '\n')

---- Berczy Park ----
                                      venue  freq
0                                    Office  0.13
1                                  Building  0.08
2  Residential Building (Apartment / Condo)  0.04
3                              Tech Startup  0.04
4                                   Parking  0.04 

---- Brockton, Parkdale Village, Exhibition Place ----
                                      venue  freq
0                                    Office  0.25
1  Residential Building (Apartment / Condo)  0.17
2                              Tech Startup  0.08
3                           Conference Room  0.05
4                                  Building  0.04 

---- Business reply mail Processing Centre, South Central Letter Processing Plant Toronto ----
                venue  freq
0            Building  0.05
1  Light Rail Station  0.04
2              Office  0.04
3             Butcher  0.03
4        Antique Shop  0.03 

---- CN Tower, King and Spadina, Railway Lands, Harbou

                                      venue  freq
0  Residential Building (Apartment / Condo)  0.17
1                                    Office  0.07
2                           Bed & Breakfast  0.05
3                                  Building  0.04
4                             Metro Station  0.02 

---- The Beaches ----
                                      venue  freq
0                                    School  0.07
1                                      Park  0.05
2  Residential Building (Apartment / Condo)  0.03
3                                Playground  0.03
4                           Laundry Service  0.03 

---- The Danforth West, Riverdale ----
                  venue  freq
0      Greek Restaurant  0.07
1                   Spa  0.06
2    Salon / Barbershop  0.05
3  Gym / Fitness Center  0.04
4     Health Food Store  0.03 

---- Toronto Dominion Centre, Design Exchange ----
         venue  freq
0       Office  0.17
1  Coffee Shop  0.04
2         Park  0.03
3   Restaurant  0.

Put that into a dataframe.

In [113]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [114]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Office,Building,Parking,Residential Building (Apartment / Condo),Tech Startup,Movie Theater,Breakfast Spot,Assisted Living,Laundry Service,Hotel
1,"Brockton, Parkdale Village, Exhibition Place",Office,Residential Building (Apartment / Condo),Tech Startup,Conference Room,Building,Advertising Agency,Coworking Space,Café,Convenience Store,Medical Center
2,"Business reply mail Processing Centre, South C...",Building,Office,Light Rail Station,Convenience Store,Medical Center,Butcher,Fast Food Restaurant,Theater,Restaurant,Gym / Fitness Center
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Gate,Airport Service,Moving Target,Airport Terminal,Boat or Ferry,Park,Airport Lounge,General Travel,Airport,Coffee Shop
4,Central Bay Street,Hospital,Hospital Ward,Coffee Shop,Medical Center,Office,Emergency Room,Pharmacy,Sandwich Place,Parking,Fast Food Restaurant


### Cluster the neighborhoods
Run the *k*-means algorithm to cluster the neighborhoods.

In [115]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 1, 0, 2, 0, 0, 3, 4, 0, 3], dtype=int32)

In [116]:
# add clustering labels
try:
    neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
except:
    neighborhoods_venues_sorted['Cluster Labels'] = kmeans.labels_

toronto_merged = df_toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,School,Park,Building,Playground,Residential Building (Apartment / Condo),Laundry Service,Salon / Barbershop,Martial Arts Dojo,Flower Shop,Coffee Shop
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Spa,Salon / Barbershop,Gym / Fitness Center,Office,Miscellaneous Shop,Health Food Store,Women's Store,Ice Cream Shop,Shop & Service
42,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,0,Convenience Store,Park,Office,Church,Pet Store,Residential Building (Apartment / Condo),Indian Restaurant,Fast Food Restaurant,Laundry Service,Bar
43,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Building,Coffee Shop,Pharmacy,Automotive Shop,Nail Salon,Restaurant,Furniture / Home Store,Spa,Doctor's Office,Dentist's Office
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,0,College Classroom,School,Bus Line,Housing Development,General Entertainment,Hospital,Park,Parking,College Auditorium,Pool


Finally, let's visualize the resulting clusters.

In [117]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=.5).add_to(map_clusters)
       
map_clusters

### Examine the clusters

#### Cluster 1

In [118]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, 
                   toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,East Toronto,0,School,Park,Building,Playground,Residential Building (Apartment / Condo),Laundry Service,Salon / Barbershop,Martial Arts Dojo,Flower Shop,Coffee Shop
41,East Toronto,0,Greek Restaurant,Spa,Salon / Barbershop,Gym / Fitness Center,Office,Miscellaneous Shop,Health Food Store,Women's Store,Ice Cream Shop,Shop & Service
42,East Toronto,0,Convenience Store,Park,Office,Church,Pet Store,Residential Building (Apartment / Condo),Indian Restaurant,Fast Food Restaurant,Laundry Service,Bar
43,East Toronto,0,Building,Coffee Shop,Pharmacy,Automotive Shop,Nail Salon,Restaurant,Furniture / Home Store,Spa,Doctor's Office,Dentist's Office
44,Central Toronto,0,College Classroom,School,Bus Line,Housing Development,General Entertainment,Hospital,Park,Parking,College Auditorium,Pool
46,Central Toronto,0,Shoe Store,Men's Store,Clothing Store,Health & Beauty Service,Cosmetics Shop,General Entertainment,Arts & Crafts Store,Kids Store,Boutique,Metro Station
47,Central Toronto,0,Coffee Shop,Italian Restaurant,Café,Dessert Shop,Spa,Candy Store,Salon / Barbershop,Bookstore,Flower Shop,Laundry Service
48,Central Toronto,0,Building,Office,Residential Building (Apartment / Condo),Park,Other Great Outdoors,Gym,Tennis Court,Rest Area,School,Gym / Fitness Center
51,Downtown Toronto,0,Coffee Shop,Street Art,Café,Flower Shop,Office,Pharmacy,Pizza Place,Laundry Service,Residential Building (Apartment / Condo),Bakery
54,Downtown Toronto,0,College Lab,College Administrative Building,University,College Arts Building,General College & University,Coffee Shop,Parking,College Classroom,College Academic Building,Student Center


Coffee shops, small businesses, and stores.

#### Cluster 2

In [119]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, 
                   toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
69,Downtown Toronto,1,Office,Building,Tech Startup,Residential Building (Apartment / Condo),Gym,Bar,Hotel,Pub,Grocery Store,Café
78,West Toronto,1,Office,Residential Building (Apartment / Condo),Tech Startup,Conference Room,Building,Advertising Agency,Coworking Space,Café,Convenience Store,Medical Center


Office buildings and startups.

#### Cluster 3

In [120]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, 
                   toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
68,Downtown Toronto,2,Airport Gate,Airport Service,Moving Target,Airport Terminal,Boat or Ferry,Park,Airport Lounge,General Travel,Airport,Coffee Shop


The airport.

#### Cluster 4

In [121]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, 
                   toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,Central Toronto,3,Residential Building (Apartment / Condo),Office,Dog Run,Medical Center,Building,Scenic Lookout,Hotel,Park,Playground,Breakfast Spot
49,Central Toronto,3,Residential Building (Apartment / Condo),Office,Building,Doctor's Office,Dentist's Office,Light Rail Station,Medical Center,Spiritual Center,Government Building,Café
50,Downtown Toronto,3,Residential Building (Apartment / Condo),Office,Park,Building,Trail,Other Great Outdoors,Government Building,Conference Room,Salon / Barbershop,Dog Run
52,Downtown Toronto,3,Residential Building (Apartment / Condo),Office,Building,Doctor's Office,Spa,Gym,Pub,Bank,Smoke Shop,Hotel
59,Downtown Toronto,3,Residential Building (Apartment / Condo),Office,Coffee Shop,Building,Light Rail Station,Doctor's Office,Fried Chicken Joint,Parking,Indian Restaurant,Monument / Landmark
65,Central Toronto,3,Residential Building (Apartment / Condo),Office,Bed & Breakfast,Building,General Entertainment,Metro Station,Japanese Restaurant,Medical Center,Gym,Café


#### Cluster 5

In [122]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, 
                   toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
53,Downtown Toronto,4,Office,Automotive Shop,Furniture / Home Store,Italian Restaurant,Auto Dealership,Food Truck,Art Gallery,Park,Building,Coffee Shop
55,Downtown Toronto,4,Office,Residential Building (Apartment / Condo),Event Space,Building,Church,Japanese Restaurant,Furniture / Home Store,Other Nightlife,Nightclub,Spa
56,Downtown Toronto,4,Office,Building,Parking,Residential Building (Apartment / Condo),Tech Startup,Movie Theater,Breakfast Spot,Assisted Living,Laundry Service,Hotel
58,Downtown Toronto,4,Office,Building,Café,Coffee Shop,Food Court,Vegetarian / Vegan Restaurant,Pool,Hotel Bar,Ballroom,Indian Restaurant
60,Downtown Toronto,4,Office,Coffee Shop,Building,Café,Park,Restaurant,Deli / Bodega,Italian Restaurant,Cosmetics Shop,Cocktail Bar
61,Downtown Toronto,4,Office,Financial or Legal Service,Salon / Barbershop,Bank,Food Court,Coffee Shop,Sushi Restaurant,Restaurant,Café,Bus Stop


Office buildings in downtown.