# Coursera Capstone Project

#### This Notebook is made on IBM Watson Studio and is being used for the final project 'Coursera Capstone' of the Data Science IBM Professional Certificate course

In [1]:
import pandas as pd
import numpy as np
print('Libraries imported!')

Libraries imported!


In [2]:
print("Hello Capstone Project Course!")

Hello Capstone Project Course!


## Segmenting and Clustering Neighborhoods in Toronto

I'll use BeautifulSoup package to scrape the following Wikipedia page: https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, and get the data to explore Toronto's neighborhoods.

In [3]:
from bs4 import BeautifulSoup
import requests
import csv
print('Libraries imported!')

Libraries imported!


### Scrape data using BeautifulSoup

In [4]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')

csv_file = open('Toronto_zipcodes_wikipedia.csv', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Postcode', 'Borough', 'Neighbourhood'])

wikitable = soup.find('table', class_='wikitable sortable')
wikitable_rows = wikitable.find_all('tr')
for tr in wikitable_rows:
    td = tr.find_all('td')
    row = [i.text for i in td]
    csv_writer.writerow(row)

csv_file.close()

### Create and clean Dataframe with Pandas

In [5]:
df = pd.read_csv('Toronto_zipcodes_wikipedia.csv')

null = df[df['Borough'] == str('Not assigned')].index
df.drop(null, inplace=True)

df = df.groupby(by=['Postcode', 'Borough']).agg(lambda x:', '.join(x))
df.reset_index(level=['Postcode', 'Borough'], inplace=True)

df['Neighbourhood'] = df['Neighbourhood'].replace('\n','', regex=True)

print('Dataframe shape:', df.shape)
df.head()

Dataframe shape: (103, 3)


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## Get zipcodes

In [6]:
!pip install geocoder
import geocoder
print('Geocoder imported!')

Geocoder imported!


### Retrieve list of zipcodes and add them to the dataframe

In [7]:
code_list = df['Postcode'].tolist()
lat_list=[]
lng_list=[]

for postcode in code_list:
    latlng = None
    while (latlng is None):
        g = geocoder.arcgis('{}, Toronto, ON'.format(postcode))
        latlng = g.latlng
    lat_list.append(g.latlng[0])
    lng_list.append(g.latlng[1])

df['Latitude'] = lat_list
df['Longitude'] = lng_list

df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.785665,-79.158725
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.765815,-79.175193
3,M1G,Scarborough,Woburn,43.768369,-79.21759
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944


## Explore and Cluster Toronto Neighbourhoods

In [8]:
from pandas.io.json import json_normalize
import json
from geopy.geocoders import Nominatim
import matplotlib
import matplotlib.cm as cm
import matplotlib.colors as colors
%matplotlib inline
from sklearn.cluster import KMeans
!pip install folium
import folium
print('Libraries imported!')

Libraries imported!


### Visualize Toronto and its Neighborhoods

In [9]:
address = 'Toronto, ON'
geolocator = Nominatim(user_agent='ca_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('Toronto coordinates are: {},{}'.format(latitude, longitude))

Toronto coordinates are: 43.653963,-79.387207


In [10]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, neighbourhood, borough in zip(df['Latitude'], df['Longitude'], df['Neighbourhood'], df['Borough']):
    label = '{},{}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([lat, lng], radius=5, popup=label, color='blue', fill=True, fill_opacity=0.3, parse_html=False).add_to(map_toronto)
map_toronto

#### SEE MAP ON GITHUB: https://github.com/toom90034/Coursera_Capstone/blob/master/Toronto_Boroughs.png

### Foursquare Credentials

In [37]:
# @hidden cell
CLIENT_ID = 'YHUWGBGBBLPF5RGHJQV3JLAZC1QFCSTM5RT0EOIQ5EEHYYF0'
CLIENT_SECRET = 'YOAWQ5F5MFWR51Y1F00UAS2L3N0APZSQYRWTS2IOMUJ24UJR'
VERSION = '20180604'
print('Foursquare Credentials saved!')

Foursquare Credentials saved!


### We will focus our exploration on Downtown Toronto

In [12]:
downtown_df = df[df['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
downtown_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4W,Downtown Toronto,Rosedale,43.682205,-79.377945
1,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.66816,-79.366602
2,M4Y,Downtown Toronto,Church and Wellesley,43.666585,-79.381302
3,M5A,Downtown Toronto,Harbourfront,43.650295,-79.359166
4,M5B,Downtown Toronto,"Ryerson, Garden District",43.657363,-79.37818


### Get the coordinates of Downtown Toronto

In [13]:
address = 'Downtown Toronto, Toronto'

geolocator = Nominatim(user_agent="ca_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Downtown Toronto are 43.6563221, -79.3809161.


### Visualize Downtown Toronto Neighborhoods

In [14]:
# create map of Manhattan using latitude and longitude values
map_downtown = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(downtown_df['Latitude'], downtown_df['Longitude'], downtown_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown)  
    
map_downtown

#### SEE MAP ON GITHUB: https://github.com/toom90034/Coursera_Capstone/blob/master/Toronto_Downtown_Neighborhoods.png

### Top 100 venues in Downtown Toronto

In [15]:
# type your answer here
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 5000 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=YHUWGBGBBLPF5RGHJQV3JLAZC1QFCSTM5RT0EOIQ5EEHYYF0&client_secret=YOAWQ5F5MFWR51Y1F00UAS2L3N0APZSQYRWTS2IOMUJ24UJR&v=20180604&ll=43.6563221,-79.3809161&radius=5000&limit=100'

In [16]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ddbebae0f5968002827c17d'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 240,
  'suggestedBounds': {'ne': {'lat': 43.70132214500004,
    'lng': -79.31883396954885},
   'sw': {'lat': 43.61132205499995, 'lng': -79.44299823045114}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '57eda381498ebe0e6ef40972',
       'name': 'UNIQLO ユニクロ',
       'location': {'address': '220 Yonge St',
        'crossStreet': 'at Dundas St W',
        'lat': 43.65591027779457,
        'lng': -79.38064099181345,
        'labeledLatLngs': [{'label': 'display',
        

### Structure the results in a new Dataframe

In [17]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [18]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,UNIQLO ユニクロ,Clothing Store,43.65591,-79.380641
1,Banh Mi Boys,Sandwich Place,43.659292,-79.381949
2,Elgin And Winter Garden Theatres,Theater,43.653394,-79.378507
3,Silver Snail Comics,Comic Shop,43.657031,-79.381403
4,Downtown Toronto,Neighborhood,43.653232,-79.385296


In [19]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


### Explore the neighborhoods of Downtown

In [20]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [21]:
downtown_venues = getNearbyVenues(names=downtown_df['Neighbourhood'],
                                   latitudes=downtown_df['Latitude'],
                                   longitudes=downtown_df['Longitude']
                                  )

Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie


### Analyze each neighborhood

In [22]:
# one hot encoding
downtown_onehot = pd.get_dummies(downtown_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
downtown_onehot['Neighborhood'] = downtown_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [downtown_onehot.columns[-1]] + list(downtown_onehot.columns[:-1])
downtown_onehot = downtown_onehot[fixed_columns]
downtown_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,...,Trail,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
downtown_onehot.shape

(1252, 188)

### Group by neighborhood and the mean of frequency of occurence for each category

In [24]:
downtown_grouped = downtown_onehot.groupby('Neighborhood').mean().reset_index()
downtown_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Trail,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint
0,"Adelaide, King, Richmond",0.0,0.0,0.03,0.0,0.01,0.0,0.03,0.0,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.015385,...,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0
2,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.014085,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,...,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.0,0.0
3,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.010417,0.0,0.010417,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.010417,0.010417,0.010417,0.0,0.0
5,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.012048,0.012048,0.0,0.0,0.0,...,0.0,0.0,0.0,0.024096,0.0,0.0,0.048193,0.012048,0.0,0.0
6,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Church and Wellesley,0.0,0.011905,0.011905,0.0,0.0,0.011905,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.011905,0.011905
8,"Commerce Court, Victoria Hotel",0.0,0.0,0.03,0.0,0.01,0.0,0.01,0.0,0.01,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0
9,"Design Exchange, Toronto Dominion Centre",0.0,0.0,0.03,0.0,0.01,0.0,0.02,0.0,0.0,...,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0


### Check the new size

In [25]:
downtown_grouped.shape

(18, 188)

### Take a look at the Top5 most common venue for each neighborhood

In [26]:
num_top_venues = 5

for hood in downtown_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = downtown_grouped[downtown_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
         venue  freq
0  Coffee Shop  0.07
1         Café  0.06
2        Hotel  0.05
3   Steakhouse  0.04
4       Bakery  0.03


----Berczy Park----
                venue  freq
0         Coffee Shop  0.08
1          Restaurant  0.05
2              Bakery  0.05
3            Beer Bar  0.03
4  Seafood Restaurant  0.03


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
                  venue  freq
0           Coffee Shop  0.11
1    Italian Restaurant  0.07
2                   Bar  0.04
3  Gym / Fitness Center  0.04
4                   Pub  0.03


----Cabbagetown, St. James Town----
                venue  freq
0         Coffee Shop  0.08
1              Bakery  0.05
2  Italian Restaurant  0.05
3                Café  0.05
4         Pizza Place  0.05


----Central Bay Street----
            venue  freq
0     Coffee Shop  0.11
1  Clothing Store  0.06
2        Tea Room  0.03
3          Bakery  0.03

### Save the results in a new dataframe 

In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Hotel,Steakhouse,Gastropub,Asian Restaurant,Restaurant,Japanese Restaurant,Bakery,Bar
1,Berczy Park,Coffee Shop,Restaurant,Bakery,Hotel,Seafood Restaurant,Steakhouse,Café,Cheese Shop,Beer Bar,Lounge
2,"CN Tower, Bathurst Quay, Island airport, Harbo...",Coffee Shop,Italian Restaurant,Gym / Fitness Center,Bar,Café,Speakeasy,French Restaurant,Park,Restaurant,Sandwich Place
3,"Cabbagetown, St. James Town",Coffee Shop,Bakery,Café,Italian Restaurant,Restaurant,Pizza Place,Park,Sandwich Place,Chinese Restaurant,Pub
4,Central Bay Street,Coffee Shop,Clothing Store,Bakery,Tea Room,Burger Joint,Sushi Restaurant,Ice Cream Shop,Fast Food Restaurant,Sporting Goods Shop,Hotel


### Now Let's cluster the neighborhoods

In [29]:
# set number of clusters
kclusters = 5

downtown_grouped_clustering = downtown_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 3, 0, 3, 4, 0, 0, 0], dtype=int32)

In [30]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

downtown_merged = downtown_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
downtown_merged = downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

downtown_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4W,Downtown Toronto,Rosedale,43.682205,-79.377945,1,Playground,Park,Candy Store,Grocery Store,Dumpling Restaurant,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
1,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.66816,-79.366602,3,Coffee Shop,Bakery,Café,Italian Restaurant,Restaurant,Pizza Place,Park,Sandwich Place,Chinese Restaurant,Pub
2,M4Y,Downtown Toronto,Church and Wellesley,43.666585,-79.381302,0,Coffee Shop,Japanese Restaurant,Restaurant,Sushi Restaurant,Gay Bar,Fast Food Restaurant,Pub,Men's Store,Gastropub,Dance Studio
3,M5A,Downtown Toronto,Harbourfront,43.650295,-79.359166,0,Coffee Shop,Bakery,Theater,Boat or Ferry,Gym / Fitness Center,Shoe Store,Gastropub,Breakfast Spot,Spa,Brewery
4,M5B,Downtown Toronto,"Ryerson, Garden District",43.657363,-79.37818,0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Fast Food Restaurant,Restaurant,Burger Joint,Hotel,Bakery,Diner


### Visualize the results

In [31]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighbourhood'], downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### SEE MAP ON 

### Examine the clusters

In [32]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 0, downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Church and Wellesley,0,Coffee Shop,Japanese Restaurant,Restaurant,Sushi Restaurant,Gay Bar,Fast Food Restaurant,Pub,Men's Store,Gastropub,Dance Studio
3,Harbourfront,0,Coffee Shop,Bakery,Theater,Boat or Ferry,Gym / Fitness Center,Shoe Store,Gastropub,Breakfast Spot,Spa,Brewery
4,"Ryerson, Garden District",0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Fast Food Restaurant,Restaurant,Burger Joint,Hotel,Bakery,Diner
5,St. James Town,0,Coffee Shop,Café,Hotel,Restaurant,Bakery,Cosmetics Shop,Italian Restaurant,Clothing Store,Gastropub,Seafood Restaurant
6,Berczy Park,0,Coffee Shop,Restaurant,Bakery,Hotel,Seafood Restaurant,Steakhouse,Café,Cheese Shop,Beer Bar,Lounge
7,Central Bay Street,0,Coffee Shop,Clothing Store,Bakery,Tea Room,Burger Joint,Sushi Restaurant,Ice Cream Shop,Fast Food Restaurant,Sporting Goods Shop,Hotel
8,"Adelaide, King, Richmond",0,Coffee Shop,Café,Hotel,Steakhouse,Gastropub,Asian Restaurant,Restaurant,Japanese Restaurant,Bakery,Bar
10,"Design Exchange, Toronto Dominion Centre",0,Coffee Shop,Café,Hotel,Restaurant,Bar,Deli / Bodega,American Restaurant,Seafood Restaurant,Steakhouse,Italian Restaurant
11,"Commerce Court, Victoria Hotel",0,Coffee Shop,Café,Hotel,Restaurant,Japanese Restaurant,Italian Restaurant,Gastropub,Beer Bar,American Restaurant,Seafood Restaurant
14,"CN Tower, Bathurst Quay, Island airport, Harbo...",0,Coffee Shop,Italian Restaurant,Gym / Fitness Center,Bar,Café,Speakeasy,French Restaurant,Park,Restaurant,Sandwich Place


In [33]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 1, downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Rosedale,1,Playground,Park,Candy Store,Grocery Store,Dumpling Restaurant,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant


In [34]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 2, downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,"Harbourfront East, Toronto Islands, Union Station",2,Pier,Harbor / Marina,Park,Thrift / Vintage Store,Wings Joint,Dumpling Restaurant,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm


In [35]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 3, downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,"Cabbagetown, St. James Town",3,Coffee Shop,Bakery,Café,Italian Restaurant,Restaurant,Pizza Place,Park,Sandwich Place,Chinese Restaurant,Pub
12,"Harbord, University of Toronto",3,Café,Coffee Shop,Restaurant,Bakery,Bar,Italian Restaurant,Japanese Restaurant,Bookstore,Gym,Pharmacy
13,"Chinatown, Grange Park, Kensington Market",3,Café,Bar,Chinese Restaurant,Vietnamese Restaurant,Mexican Restaurant,Coffee Shop,Dumpling Restaurant,Comfort Food Restaurant,Cocktail Bar,Bakery


In [36]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 4, downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Christie,4,Café,Grocery Store,Playground,Italian Restaurant,Candy Store,Baby Store,Coffee Shop,Diner,Ethiopian Restaurant,Fish & Chips Shop


## We can clearly see the clustered neighborhoods and also notice that coffee shops are everywhere! 