# IBM Data Science Professional Capstone Project

## Step 0: Setting up the environment

In [125]:
import numpy as np
import pandas as pd

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as Colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

import geocoder
from geopy.geocoders import Nominatim

from bs4 import BeautifulSoup
from urllib.request import urlopen

## Step 1: Scraping the neighbrhood data

In [3]:
# Scraping webpage data into an HTML table
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = urlopen(url).read().decode('utf-8')
soup = BeautifulSoup(page, 'html.parser')

wiki_table = soup.body.table.tbody

In [4]:
# Extracting table data to data frame

def get_cell(element):
    cells = element.find_all('td')
    row = []
    
    for cell in cells:
        if cell.a:            
            if (cell.a.text):
                row.append(cell.a.text)
                continue
        row.append(cell.string.strip())
        
    return row

def get_row():    
    data = []  
    
    for tr in wiki_table.find_all('tr'):
        row = get_cell(tr)
        if len(row) != 3:
            continue
        data.append(row)        
    
    return data

In [5]:
#Creating the data frame
data = get_row()
columns = ['Postcode', 'Borough', 'Neighbourhood']
df = pd.DataFrame(data, columns=columns)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [6]:
df.shape

(287, 3)

In [7]:
# Ignore postcodes with 'Borough' not assigned
df1=df[df['Borough']!='Not assigned']
df1.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [8]:
# Assign Borough if Neigborhood is Not assigned
df1.loc[df['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = df1['Borough']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [9]:
# More than one neighborhood can exist in one postal code area. 
# These rows will be combined into one row with the neighborhoods separated with a comma
df2=df1.groupby(by=['Postcode','Borough']).agg(','.join)
df2.reset_index(inplace=True)
df2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [10]:
df2.shape

(103, 3)

## Step 2: Adding location data

Now that we have built a dataframe of the postal code of each neighborhood along with the borough name and neighborhood name, in order to utilize the Foursquare location data, we need to get the latitude and the longitude coordinates of each neighborhood.

We will use the Geocoder Python instead: https://geocoder.readthedocs.io/index.html.

The problem with this Package is that you can make a call to get the latitude and longitude coordinates of a given postal code and the result would be None, and then make the call again and you would get the coordinates. So, in order to make sure that we get the coordinates for all of our neighborhoods, we'll run a while loop for each postal code.

In [11]:
def get_latlng(postal_code):
    # initialize the variable to None
    lat_lng_coords = None
    # loop until we get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [12]:
# Make new columns for latitude and longitude
df2['Lat_Long'] = df2.apply(lambda x: get_latlng(x['Postcode']),axis=1)
df2[['Latitude','Longitude']] = pd.DataFrame(df2['Lat_Long'].values.tolist(), index= df2.index)
df2.drop('Lat_Long',axis=1,inplace=True)
df2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.811525,-79.195517
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.785665,-79.158725
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.765815,-79.175193
3,M1G,Scarborough,Woburn,43.768369,-79.21759
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944


In [13]:
toronto_map = folium.Map(location=[43.65, -79.4], zoom_start=12)

X = df2['Latitude']
Y = df2['Longitude']
Z = np.stack((X, Y), axis=1)

kmeans = KMeans(n_clusters=4, random_state=0).fit(Z)

clusters = kmeans.labels_
colors = ['red', 'green', 'blue', 'yellow']
df2['Cluster'] = clusters

for latitude, longitude, borough, cluster in zip(df2['Latitude'], df2['Longitude'], df2['Borough'], df2['Cluster']):
    label = folium.Popup(borough, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color=colors[cluster],
        fill_opacity=0.7).add_to(toronto_map)  

In [14]:
toronto_map

In [15]:
def getNearbyVenues(names, latitudes, longitudes, radius=5000, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']

            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude',  
                  'Venue Category']
    
    except:
        print(url)
        print(response)
        print(results)
        print(nearby_venues)

    return(nearby_venues)

In [18]:
df2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster
0,M1B,Scarborough,"Rouge,Malvern",43.811525,-79.195517,2
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.785665,-79.158725,2
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.765815,-79.175193,2
3,M1G,Scarborough,Woburn,43.768369,-79.21759,2
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944,2


In [16]:
#College Academic Building
#4bf58dd8d48988d198941735

LIMIT = 500 
radius = 5000 
CLIENT_ID = 'ZMHWBS0SR12Z3YDYVHJVTZPRK3U1ZP3I2TYQAJ5CU3JUHMB5'
CLIENT_SECRET = 'H3TT0XT3P5TIAFCV1Y2UUVLF42N44DICNKLUELK34H2TKLFR'
VERSION = '20181020'

In [20]:
neighborhoods = df2['Neighbourhood']
toronto_unis = getNearbyVenues(names=df2['Neighbourhood'], latitudes=df2['Latitude'], longitudes=df2['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d1ae941735')
toronto_unis.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Fairview,Henry Farm,Oriole",43.78097,-79.347813,Lambton College,43.773343,-79.336044,College Administrative Building
1,"Fairview,Henry Farm,Oriole",43.78097,-79.347813,Tct,43.773436,-79.33675,University
2,York Mills West,43.747895,-79.399919,St. Andrew's Junior High School,43.75388,-79.404208,University
3,"Northwood Park,York University",43.764765,-79.488094,York University Executive Learning Centre,43.77286,-79.497815,University
4,"The Beaches West,India Bazaar",43.667965,-79.314667,The Hamilton Institute For Recreational Studies,43.671802,-79.310166,University


In [22]:
toronto_unis.to_csv('Toronto_Unis.csv')

In [24]:
toronto_unis.shape

(574, 7)

In [29]:
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['Neighborhood'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [37]:
toronto_unis_map = folium.Map(location=[latitude, longitude], zoom_start=11)
addToMap(toronto_unis, 'red', toronto_unis_map)

toronto_unis_map

In [38]:
def addColumn(startDf, columnTitle, dataDf):
    grouped = dataDf.groupby('Neighborhood').count()
    
    for n in startDf['Neighborhood']:
        try:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = grouped.loc[n, 'Venue']
        except:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = 0

In [42]:
toronto_grouped=toronto_unis.groupby('Neighborhood').count()
toronto_grouped

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",36,36,36,36,36,36
Berczy Park,7,7,7,7,7,7
"Brockton,Exhibition Place,Parkdale Village",1,1,1,1,1,1
Business Reply Mail Processing Centre 969 Eastern,33,33,33,33,33,33
"CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara",2,2,2,2,2,2
Canada Post Gateway Processing Centre,33,33,33,33,33,33
Central Bay Street,45,45,45,45,45,45
"Chinatown,Grange Park,Kensington Market",46,46,46,46,46,46
Christie,2,2,2,2,2,2
Church and Wellesley,45,45,45,45,45,45


In [43]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_unis[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_unis['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()


Unnamed: 0,Neighborhood,College Academic Building,College Administrative Building,College Arts Building,College Communications Building,College Engineering Building,College Lab,College Library,College Residence Hall,College Science Building,General College & University,High School,Medical School,Student Center,University
0,"Fairview,Henry Farm,Oriole",0,1,0,0,0,0,0,0,0,0,0,0,0,0
1,"Fairview,Henry Farm,Oriole",0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,York Mills West,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,"Northwood Park,York University",0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,"The Beaches West,India Bazaar",0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [44]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,College Academic Building,College Administrative Building,College Arts Building,College Communications Building,College Engineering Building,College Lab,College Library,College Residence Hall,College Science Building,General College & University,High School,Medical School,Student Center,University
0,"Adelaide,King,Richmond",0.055556,0.0,0.027778,0.027778,0.027778,0.0,0.0,0.0,0.0,0.0,0.027778,0.027778,0.027778,0.777778
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.857143
2,"Brockton,Exhibition Place,Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,Business Reply Mail Processing Centre 969 Eastern,0.030303,0.0,0.030303,0.030303,0.030303,0.0,0.0,0.0,0.0,0.0,0.030303,0.030303,0.030303,0.787879
4,"CN Tower,Bathurst Quay,Island airport,Harbourf...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [45]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [146]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)
neighborhoods_venues_sorted.fillna(value=0)
neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",University,College Academic Building,Student Center,Medical School,High School,College Engineering Building,College Communications Building,College Arts Building,General College & University,College Science Building
1,Berczy Park,University,High School,Student Center,Medical School,General College & University,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building
2,"Brockton,Exhibition Place,Parkdale Village",University,Student Center,Medical School,High School,General College & University,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building
3,Business Reply Mail Processing Centre 969 Eastern,University,Student Center,Medical School,High School,College Engineering Building,College Communications Building,College Arts Building,College Academic Building,General College & University,College Science Building
4,"CN Tower,Bathurst Quay,Island airport,Harbourf...",University,Student Center,Medical School,High School,General College & University,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building


In [147]:
# set number of clusters
kclusters = 6

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 2, 0, 2, 0, 0, 0, 4, 0])

In [148]:
# add clustering labels

neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df2
toronto_merged.rename(columns={"Neighbourhood": "Neighborhood"},inplace=True)
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge,Malvern",43.811525,-79.195517,2,,,,,,,,,,,
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.785665,-79.158725,2,,,,,,,,,,,
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.765815,-79.175193,2,,,,,,,,,,,
3,M1G,Scarborough,Woburn,43.768369,-79.21759,2,,,,,,,,,,,
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944,2,,,,,,,,,,,


In [149]:
toronto_merged=toronto_merged.fillna({"Cluster Labels":5})

In [150]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.viridis(np.linspace(0, 1, len(ys)))
rainbow = [Colors.rgb2hex(i) for i in colors_array]
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        
        #color=[rainbow[cluster-1] if cluster!='nan' else 'black'],
        fill=True,
        #fill_color=[rainbow[cluster-1] if cluster!='nan' else 'black'],
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [107]:
toronto_merged['Cluster Labels'].isna().count()

103

In [96]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
52,Downtown Toronto,0,0.0,University,College Academic Building,College Arts Building,Student Center,Medical School,College Library,College Lab,College Engineering Building,College Communications Building,High School
54,Downtown Toronto,0,0.0,University,College Academic Building,Student Center,Medical School,College Engineering Building,College Communications Building,College Arts Building,High School,General College & University,College Science Building
55,Downtown Toronto,0,0.0,University,College Academic Building,Student Center,Medical School,High School,College Engineering Building,College Communications Building,College Arts Building,General College & University,College Science Building
56,Downtown Toronto,0,0.0,University,High School,Student Center,Medical School,General College & University,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building
57,Downtown Toronto,0,0.0,University,College Academic Building,Student Center,Medical School,College Engineering Building,College Arts Building,College Library,College Lab,College Communications Building,High School
58,Downtown Toronto,0,0.0,University,College Academic Building,Student Center,Medical School,High School,College Engineering Building,College Communications Building,College Arts Building,General College & University,College Science Building
60,Downtown Toronto,0,0.0,University,Student Center,Medical School,High School,College Engineering Building,College Communications Building,College Arts Building,College Academic Building,General College & University,College Science Building
61,Downtown Toronto,0,0.0,University,Student Center,Medical School,High School,College Engineering Building,College Communications Building,College Arts Building,College Academic Building,General College & University,College Science Building
65,Central Toronto,0,0.0,University,College Residence Hall,College Academic Building,Student Center,Medical School,High School,General College & University,College Science Building,College Library,College Lab
66,Downtown Toronto,0,0.0,University,College Academic Building,College Science Building,Student Center,Medical School,College Residence Hall,College Library,College Lab,College Engineering Building,College Arts Building


In [97]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
46,Central Toronto,1,1.0,General College & University,University,Student Center,Medical School,High School,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building


In [98]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,North York,1,2.0,University,Student Center,Medical School,High School,General College & University,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building
29,North York,1,2.0,University,Student Center,Medical School,High School,General College & University,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building
42,East Toronto,0,2.0,University,Student Center,Medical School,High School,General College & University,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building
44,Central Toronto,1,2.0,University,Student Center,Medical School,High School,General College & University,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building
45,Central Toronto,1,2.0,University,Student Center,Medical School,High School,General College & University,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building
47,Central Toronto,0,2.0,University,Student Center,Medical School,High School,General College & University,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building
48,Central Toronto,0,2.0,University,Student Center,Medical School,High School,General College & University,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building
49,Central Toronto,0,2.0,University,Student Center,Medical School,High School,General College & University,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building
50,Downtown Toronto,0,2.0,University,Student Center,Medical School,High School,General College & University,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building
64,Central Toronto,0,2.0,University,Student Center,Medical School,High School,General College & University,College Science Building,College Residence Hall,College Library,College Lab,College Engineering Building


In [100]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,North York,1,3.0,University,College Administrative Building,Student Center,Medical School,High School,General College & University,College Science Building,College Residence Hall,College Library,College Lab


In [142]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,North York,1,4.0,University,College Administrative Building,Student Center,Medical School,High School,General College & University,College Science Building,College Residence Hall,College Library,College Lab


In [152]:
map_clusters.save('MapClusters.html')