# Applied Data Science Capstone Project

## Toronto Neighborhoods

### Part 1. Data Loading, Preparation and Wrangling

*Importing all necessary libraries*

In [1]:
from bs4 import BeautifulSoup
import urllib
import pandas as pd
import numpy as np

*Downloading and soup'ifying data*

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
html_doc = urllib.request.urlopen(url)

In [4]:
soup = BeautifulSoup(html_doc, 'html.parser')

*Converting data to a dataframe*

In [5]:
table = soup.find('table')
table_rows = table.find_all('tr')

l = []

for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td]
    if row: 
        l.append(row)
    
    
neigh = pd.DataFrame(l,columns=['Postcode','Borough','Neighborhood'])
neigh.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


*Removing rows not assigned to any borough*

In [6]:
dirty = ['Not assigned']
neigh = neigh[~neigh['Borough'].isin(dirty)]
neigh.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


*Replacing 'Not assigned' neighborhoods with borough names*

In [7]:
neigh['Neighborhood'].where(neigh['Neighborhood'] != 'Not assigned', neigh['Borough'], axis=0, inplace=True)
neigh.shape

(211, 3)

*Grouping rows by borough and postcode, and combining neighborhoods into lists*

In [8]:
neigh['Neighborhood'] = neigh.groupby(['Postcode','Borough'])['Neighborhood'].transform(lambda x: ', '.join(x))
neigh = neigh.drop_duplicates()

#Experimental approach to combine neighborhoods into a list, but it's more trouble than it's worth for the task at hand. 
#neigh = pd.DataFrame(neigh.groupby(['Postcode','Borough'])['Neighborhood'].apply(list)).reset_index()
neigh.head()

Unnamed: 0,Postcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Harbourfront, Regent Park"
6,M6A,North York,"Lawrence Heights, Lawrence Manor"
8,M7A,Queen's Park,Queen's Park


In [9]:
neigh.shape

(103, 3)

### Part 2: Getting neighborhood geodata

*Importing libraries* 

In [10]:
import geocoder

In [11]:
neigh.insert(len(neigh.columns),'Latitude', 0)
neigh.insert(len(neigh.columns),'Longitude', 0)
neigh.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
2,M3A,North York,Parkwoods,0,0
3,M4A,North York,Victoria Village,0,0
4,M5A,Downtown Toronto,"Harbourfront, Regent Park",0,0
6,M6A,North York,"Lawrence Heights, Lawrence Manor",0,0
8,M7A,Queen's Park,Queen's Park,0,0


*Defining the geocoder function that takes a postal code and returns lat and lon* 

In [12]:
def geolocator(postal_code):
    
    lat_lng_coords = None

    g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
    lat_lng_coords = g.latlng

    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    
    return latitude, longitude

*Going over the dataframe*

In [13]:
nbhood = neigh.copy()
nbhood.head()


Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
2,M3A,North York,Parkwoods,0,0
3,M4A,North York,Victoria Village,0,0
4,M5A,Downtown Toronto,"Harbourfront, Regent Park",0,0
6,M6A,North York,"Lawrence Heights, Lawrence Manor",0,0
8,M7A,Queen's Park,Queen's Park,0,0


In [14]:
nbhood['Coordinates'] = neigh['Postcode'].apply(geolocator)

In [15]:
nbhood.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Coordinates
2,M3A,North York,Parkwoods,0,0,"(43.752440000000036, -79.32927072599995)"
3,M4A,North York,Victoria Village,0,0,"(43.730420577000075, -79.31331999999998)"
4,M5A,Downtown Toronto,"Harbourfront, Regent Park",0,0,"(43.65512000000007, -79.36263979699999)"
6,M6A,North York,"Lawrence Heights, Lawrence Manor",0,0,"(43.72312500000004, -79.45158914699994)"
8,M7A,Queen's Park,Queen's Park,0,0,"(43.66110229800006, -79.39103499999999)"


In [16]:
nbhood[['Latitude','Longitude']] = pd.DataFrame(nbhood['Coordinates'].tolist(), index = nbhood.index)
nbhood.drop(['Coordinates'], axis=1, inplace=True)

In [17]:
nbhood.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.75244,-79.329271
3,M4A,North York,Victoria Village,43.730421,-79.31332
4,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65512,-79.36264
6,M6A,North York,"Lawrence Heights, Lawrence Manor",43.723125,-79.451589
8,M7A,Queen's Park,Queen's Park,43.661102,-79.391035


### Part 3: Visualization

*Importing libraries and setting up the map*

In [18]:
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
from geopy.geocoders import Nominatim
import json
import requests
from pandas.io.json import json_normalize

In [19]:
address = 'Toronto, Canada'

geoloc = Nominatim(user_agent="ny_explorer")
location = geoloc.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geographical coordinates of Toronto are 43.653963, -79.387207.


In [20]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

k = 0

# add markers to map
for lat, lng, borough, neighborhood in zip(nbhood['Latitude'], nbhood['Longitude'], nbhood['Borough'], nbhood['Neighborhood']):
    label = '{}: {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [21]:
# The code was removed by Watson Studio for sharing.

In [22]:
LIMIT = 100 
radius = 500 

In [23]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [24]:
def getNearbyVenues(names, latitudes, longitudes, radius):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [25]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [26]:
toronto_venues = getNearbyVenues(names=nbhood['Neighborhood'],
                                   latitudes=nbhood['Latitude'],
                                   longitudes=nbhood['Longitude'],
                                   radius=radius)

In [27]:
toronto_venues.groupby('Neighborhood').count()
print('There are {} unique categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 263 unique categories.


### Part 3a. Analysis

In [28]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [29]:
toronto_onehot.shape

(2479, 263)

In [30]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,"Adelaide, King, Richmond",0.000000,0.000000,0.000000,0.0,0.030000,0.000000,0.010000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.010000,0.000000,0.000000,0.000000,0.010000,0.000000,0.000000
1,Agincourt,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.071429,0.000000,0.000000,0.000000
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.066667,0.000000,0.000000,0.000000,0.000000
4,"Alderwood, Long Branch",0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,"Bathurst Manor, Downsview North, Wilson Heights",0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6,Bayview Village,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.00,0.000000,...,0.000000,0.333333,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7,"Bedford Park, Lawrence Manor East",0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8,Berczy Park,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.015625,0.00,0.000000,...,0.000000,0.000000,0.000000,0.015625,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
9,"Birch Cliff, Cliffside West",0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.00,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [31]:
toronto_grouped.shape

(100, 263)

In [32]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
         venue  freq
0         Café  0.08
1  Coffee Shop  0.07
2        Hotel  0.05
3   Steakhouse  0.03
4       Bakery  0.03


----Agincourt----
                venue  freq
0         Supermarket  0.14
1  Chinese Restaurant  0.14
2       Shopping Mall  0.14
3                Pool  0.07
4     Bubble Tea Shop  0.07


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
                        venue  freq
0                    Pharmacy  0.67
1            Sushi Restaurant  0.33
2                 Yoga Studio  0.00
3  Modern European Restaurant  0.00
4                      Museum  0.00


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                 venue  freq
0        Grocery Store  0.13
1  Japanese Restaurant  0.07
2     Sushi Restaurant  0.07
3       Sandwich Place  0.07
4         Liquor Store  0.07


----Alderwood, Long Branch----
                   venue  freq
0           

In [33]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Café,Coffee Shop,Hotel,Asian Restaurant,Bakery,Restaurant,Burger Joint,Gastropub,American Restaurant,Deli / Bodega
1,Agincourt,Shopping Mall,Supermarket,Chinese Restaurant,Hong Kong Restaurant,Pool,Bubble Tea Shop,Skating Rink,Shanghai Restaurant,Sushi Restaurant,Bakery
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Pharmacy,Sushi Restaurant,Women's Store,Falafel Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Farm
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Pizza Place,Park,Coffee Shop,Sandwich Place,Fast Food Restaurant,Liquor Store,Fried Chicken Joint,Beer Store,Sushi Restaurant
4,"Alderwood, Long Branch",Gym,Sandwich Place,Performing Arts Venue,Pub,Candy Store,Women's Store,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store


### Part 4. Clustering

In [34]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 1, 4, 0, 0, 4], dtype=int32)

In [35]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = nbhood.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M3A,North York,Parkwoods,43.75244,-79.329271,4.0,Food & Drink Shop,Fast Food Restaurant,Park,Dog Run,Fish Market,Fish & Chips Shop,Field,Farmers Market,Farm,Falafel Restaurant
3,M4A,North York,Victoria Village,43.730421,-79.31332,4.0,Grocery Store,French Restaurant,Park,Intersection,Women's Store,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
4,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65512,-79.36264,0.0,Coffee Shop,Restaurant,Breakfast Spot,Yoga Studio,Thai Restaurant,Bakery,Italian Restaurant,Pub,Electronics Store,Mexican Restaurant
6,M6A,North York,"Lawrence Heights, Lawrence Manor",43.723125,-79.451589,0.0,Clothing Store,Dessert Shop,Toy / Game Store,Cosmetics Shop,Men's Store,Food Court,Chocolate Shop,Furniture / Home Store,Pharmacy,Electronics Store
8,M7A,Queen's Park,Queen's Park,43.661102,-79.391035,0.0,Coffee Shop,Café,Sandwich Place,Indian Restaurant,Bubble Tea Shop,Italian Restaurant,Pharmacy,Burrito Place,Juice Bar,Bike Rental / Bike Share


In [36]:
toronto_merged.dropna(inplace=True)
toronto_merged['Cluster Labels'] = toronto_merged['Cluster Labels'].astype(int)

In [37]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [38]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Downtown Toronto,0,Coffee Shop,Restaurant,Breakfast Spot,Yoga Studio,Thai Restaurant,Bakery,Italian Restaurant,Pub,Electronics Store,Mexican Restaurant
6,North York,0,Clothing Store,Dessert Shop,Toy / Game Store,Cosmetics Shop,Men's Store,Food Court,Chocolate Shop,Furniture / Home Store,Pharmacy,Electronics Store
8,Queen's Park,0,Coffee Shop,Café,Sandwich Place,Indian Restaurant,Bubble Tea Shop,Italian Restaurant,Pharmacy,Burrito Place,Juice Bar,Bike Rental / Bike Share
10,Etobicoke,0,Pharmacy,Café,Park,Skating Rink,Shopping Mall,Bank,Grocery Store,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant
15,East York,0,Pizza Place,Fast Food Restaurant,Athletics & Sports,Pharmacy,Pub,Rock Climbing Spot,Café,Breakfast Spot,Gastropub,Bank
17,Downtown Toronto,0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Middle Eastern Restaurant,Lingerie Store,Tea Room,Fast Food Restaurant,Japanese Restaurant,Hotel
19,North York,0,Pizza Place,Fast Food Restaurant,Grocery Store,Sushi Restaurant,Japanese Restaurant,Italian Restaurant,Rental Car Location,Latin American Restaurant,Asian Restaurant,Mediterranean Restaurant
22,Etobicoke,0,Pizza Place,Sandwich Place,Tea Room,Chinese Restaurant,Bank,Farmers Market,Farm,Falafel Restaurant,Event Space,Dog Run
27,Scarborough,0,Bar,Women's Store,Donut Shop,Flower Shop,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm
31,North York,0,Grocery Store,Coffee Shop,Smoke Shop,Gym,Beer Store,Intersection,Event Space,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant


In [39]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
64,North York,1,Men's Store,Women's Store,Food,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant


In [40]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
110,North York,2,Music Venue,Food,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space


In [41]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Scarborough,3,Home Service,Food & Drink Shop,Flower Shop,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant


In [42]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,North York,4,Food & Drink Shop,Fast Food Restaurant,Park,Dog Run,Fish Market,Fish & Chips Shop,Field,Farmers Market,Farm,Falafel Restaurant
3,North York,4,Grocery Store,French Restaurant,Park,Intersection,Women's Store,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
14,North York,4,Burger Joint,Soccer Field,Resort,Park,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Women's Store
42,Scarborough,4,Gym / Fitness Center,Construction & Landscaping,Tea Room,Park,Women's Store,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space
49,York,4,Park,Women's Store,Gym,Mexican Restaurant,Fast Food Restaurant,Market,Sporting Goods Shop,Beer Store,Bakery,Pharmacy
53,Scarborough,4,Coffee Shop,Korean Restaurant,Park,Business Service,Women's Store,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
82,East York,4,Bar,Italian Restaurant,Park,Farmers Market,Women's Store,Falafel Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space
83,Downtown Toronto,4,Harbor / Marina,Music Venue,Park,Athletics & Sports,Falafel Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space
94,North York,4,Construction & Landscaping,Park,Trail,Women's Store,Falafel Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space
97,East Toronto,4,Discount Store,Fast Food Restaurant,Grocery Store,Park,Bus Line,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
