# Question 1: Scraping
Below I have scraped: https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M to complete Q1 

*Please scroll down further if grading Question 2 or 3*

In [108]:
import pandas as pd
import numpy as np

# the below requires lxml to be installed.  I used pip to install it on local machine
#!pip3 install lxml # uncomment this if you need lxml you may have to restart the jupyter notebook after installation
tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', header = 0)

tables

[    Postal Code           Borough  \
 0           M1A      Not assigned   
 1           M2A      Not assigned   
 2           M3A        North York   
 3           M4A        North York   
 4           M5A  Downtown Toronto   
 ..          ...               ...   
 175         M5Z      Not assigned   
 176         M6Z      Not assigned   
 177         M7Z      Not assigned   
 178         M8Z         Etobicoke   
 179         M9Z      Not assigned   
 
                                           Neighborhood  
 0                                         Not assigned  
 1                                         Not assigned  
 2                                            Parkwoods  
 3                                     Victoria Village  
 4                            Regent Park, Harbourfront  
 ..                                                 ...  
 175                                       Not assigned  
 176                                       Not assigned  
 177                

**From above we can see that read_html has returned a list of tables as DataFrames and we can see that the first table in the list is the one we need.**

In [109]:
table = tables[0]
table.head()


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [110]:
# remove Not assigned Boroughs from table
table = table[table.Borough != 'Not assigned'].reset_index()
table = table[['Postal Code', 'Borough', 'Neighborhood']]
table.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


**Once I removed all of the Not assigned Boroughs above, there are no remaining Neighborhoods that have the label Not assigned.  I check this with the code below.**

In [111]:
try:
    print(table.Neighborhood.value_counts()['Not assigned'])
except:
    print("There are no neighborhoods containing Not assigned after the Not assigned Boroughs are removed")


There are no neighborhoods containing Not assigned after the Not assigned Boroughs are removed


**Finally I print out the shape of my table**

In [112]:
table.shape

(103, 3)

# Question 2 
In this question I get the latitude and longitude of each neighborhood

**I could not get geocoder to work so I ended up using the provided csv given on the course page**

In [113]:
coordinates = pd.read_csv('http://cocl.us/Geospatial_data')
coordinates

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


**I then merge the original table with its coordinates to get the expected output**


In [114]:
table = pd.merge(table, coordinates)
table.head(15)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


# Question 3
In this section I explore and cluster the neighborhoods in Toronto

I begin by gathering the coordinates of Toronto

In [115]:
import folium
from geopy.geocoders import Nominatim
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent='toronto_explorer')
location = geolocator.geocode(address)
lat = location.latitude
long = location.longitude
print(lat, long)

43.6534817 -79.3839347


**Below I create a map of all the neighborhoods in Ontario**

In [116]:
map = folium.Map(location=[lat, long], zoom_start=10)

for lat, lng, borough, neighborhood in zip(table.Latitude, table.Longitude, table.Borough, table.Neighborhood):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map)
map

**Now we are going to look at only those neighborhoods in North York**

In [117]:
address = 'North York, Ontario'
geolocator = Nominatim(user_agent='ny_explorer')
location = geolocator.geocode(address)
lat = location.latitude
long = location.longitude
print(lat, long)

43.7543263 -79.44911696639593


In [118]:
north_york_data = table[table['Borough'] == 'North York'].reset_index(drop=True)
north_york_data.shape

(24, 5)

In [119]:
map = folium.Map(location=[lat, long], zoom_start=12)

for lat, lng, borough, neighborhood in zip(north_york_data.Latitude, north_york_data.Longitude, north_york_data.Borough, north_york_data.Neighborhood):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map)
map

In [120]:
CLIENT_ID = 'RKRACHUNPQLEQHK1NMYXSP3DR2YE3BRYBDRCBBC2LB1SR503'# your Foursquare ID
CLIENT_SECRET = 'DJQGSGGER5JVH4NREDDUJCDVIQ52VPM0IXDVGRXYQ4QGSUNB' # your Foursquare Secret
VERSION = '20200610'
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RKRACHUNPQLEQHK1NMYXSP3DR2YE3BRYBDRCBBC2LB1SR503
CLIENT_SECRET:DJQGSGGER5JVH4NREDDUJCDVIQ52VPM0IXDVGRXYQ4QGSUNB


**I now use the function from the lab to help us locate venues around each neighborhood above**

In [121]:
def getNearbyVenues(names, latitudes, longitudes, CLIENT_ID, CLIENT_SECRET, radius=500):
    
    LIMIT = 100
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [122]:
import requests
ny_venues = getNearbyVenues(names=north_york_data.Neighborhood,
                            latitudes=north_york_data.Latitude,
                            longitudes=north_york_data.Longitude, 
                           CLIENT_ID=CLIENT_ID,
                           CLIENT_SECRET=CLIENT_SECRET)

Parkwoods
Victoria Village
Lawrence Manor, Lawrence Heights
Don Mills
Glencairn
Don Mills
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Fairview, Henry Farm, Oriole
Northwood Park, York University
Bayview Village
Downsview
York Mills, Silver Hills
Downsview
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Willowdale, Newtonbrook
Downsview
Bedford Park, Lawrence Manor East
Humberlea, Emery
Willowdale, Willowdale East
Downsview
York Mills West
Willowdale, Willowdale West


In [123]:
ny_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.332140,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Parkwoods,43.753259,-79.329656,Corrosion Service Company Limited,43.752432,-79.334661,Construction & Landscaping
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
...,...,...,...,...,...,...,...
238,"Willowdale, Willowdale West",43.782736,-79.442259,Shoppers Drug Mart,43.784847,-79.446028,Pharmacy
239,"Willowdale, Willowdale West",43.782736,-79.442259,RBC Royal Bank,43.783894,-79.446603,Bank
240,"Willowdale, Willowdale West",43.782736,-79.442259,Tim Hortons,43.780940,-79.444231,Coffee Shop
241,"Willowdale, Willowdale West",43.782736,-79.442259,Price Chopper,43.783237,-79.446339,Grocery Store


I'm checking the size of the resulting data fram above and also determining how many venues were returned for each neighborhood

In [124]:
print(ny_venues.shape)
ny_venues.groupby('Neighborhood').count()

(243, 7)


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Wilson Heights, Downsview North",22,22,22,22,22,22
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",22,22,22,22,22,22
Don Mills,28,28,28,28,28,28
Downsview,15,15,15,15,15,15
"Fairview, Henry Farm, Oriole",65,65,65,65,65,65
Glencairn,4,4,4,4,4,4
Hillcrest Village,5,5,5,5,5,5
Humber Summit,1,1,1,1,1,1
"Humberlea, Emery",2,2,2,2,2,2


In [125]:
print('There are {} unique categories.'.format(len(ny_venues['Venue Category'].unique())))

There are 102 unique categories.


**I'm going to perform one hot encoding on the venues**

In [126]:
ny_onehot = pd.get_dummies(ny_venues[['Venue Category']], prefix="", prefix_sep="")
ny_onehot
ny_onehot['Neighborhood'] = ny_venues['Neighborhood']
fixed_columns = [ny_onehot.columns[-1]] + list(ny_onehot.columns[:-1])
ny_onehot = ny_onehot[fixed_columns]
ny_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,...,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


**I now group rows by neighborhodd and mean of frequency of occurrence**

In [127]:
ny_grouped = ny_onehot.groupby('Neighborhood').mean().reset_index()
ny_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,...,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,...,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.0
3,Don Mills,0.0,0.0,0.0,0.035714,0.0,0.071429,0.0,0.0,0.0,...,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Downsview,0.0,0.066667,0.0,0.0,0.0,0.0,0.066667,0.0,0.066667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [128]:
num_top_venues = 5

for hood in ny_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = ny_grouped[ny_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor, Wilson Heights, Downsview North----
         venue  freq
0  Coffee Shop  0.09
1         Bank  0.09
2  Pizza Place  0.05
3        Diner  0.05
4   Restaurant  0.05


----Bayview Village----
                 venue  freq
0   Chinese Restaurant  0.25
1                 Café  0.25
2                 Bank  0.25
3  Japanese Restaurant  0.25
4    Accessories Store  0.00


----Bedford Park, Lawrence Manor East----
                     venue  freq
0           Sandwich Place  0.09
1       Italian Restaurant  0.09
2              Coffee Shop  0.09
3               Restaurant  0.09
4  Comfort Food Restaurant  0.05


----Don Mills----
                 venue  freq
0           Restaurant  0.07
1          Coffee Shop  0.07
2     Asian Restaurant  0.07
3                  Gym  0.07
4  Japanese Restaurant  0.07


----Downsview----
           venue  freq
0  Grocery Store  0.20
1           Park  0.13
2           Bank  0.07
3    Snack Place  0.07
4   Liquor Store  0.07


----Fairview, Henry Fa

In [129]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [130]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = ny_grouped['Neighborhood']

for ind in np.arange(ny_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ny_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(15)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Gift Shop,Shopping Mall,Middle Eastern Restaurant,Mobile Phone Shop,Park,Deli / Bodega,Pharmacy,Pizza Place
1,Bayview Village,Chinese Restaurant,Bank,Café,Japanese Restaurant,Food Truck,Food Court,Construction & Landscaping,Fried Chicken Joint,Convenience Store,Cosmetics Shop
2,"Bedford Park, Lawrence Manor East",Restaurant,Coffee Shop,Sandwich Place,Italian Restaurant,Pizza Place,Pharmacy,Grocery Store,Greek Restaurant,Indian Restaurant,Comfort Food Restaurant
3,Don Mills,Café,Asian Restaurant,Coffee Shop,Japanese Restaurant,Beer Store,Gym,Restaurant,Art Gallery,Clothing Store,Bus Line
4,Downsview,Grocery Store,Park,Discount Store,Liquor Store,Airport,Food Truck,Athletics & Sports,Bank,Baseball Field,Gym / Fitness Center
5,"Fairview, Henry Farm, Oriole",Clothing Store,Coffee Shop,Fast Food Restaurant,Restaurant,Japanese Restaurant,Cosmetics Shop,Convenience Store,Bank,Bakery,Mobile Phone Shop
6,Glencairn,Park,Japanese Restaurant,Pizza Place,Pub,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
7,Hillcrest Village,Golf Course,Mediterranean Restaurant,Athletics & Sports,Pool,Dog Run,Women's Store,Diner,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
8,Humber Summit,Pizza Place,Women's Store,Chinese Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
9,"Humberlea, Emery",Fabric Shop,Baseball Field,Women's Store,Distribution Center,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store


# Now I run k means clustering to cluster the neighborhoods


In [131]:
#!pip3 install sklearn  #uncomment to install sklearn
from sklearn.cluster import KMeans



In [159]:
kclusters = 5

ny_grouped_clusters = ny_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ny_grouped_clusters)

#check cluster labels genreated for each row in the datafram
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 3, 1, 4], dtype=int32)

In [160]:
try:
    neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
except:
    pass
    
ny_merged = north_york_data
ny_merged = ny_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

ny_merged = ny_merged.dropna() 
ny_merged['Cluster Labels'] = ny_merged['Cluster Labels'].astype(int)
ny_merged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,2,Construction & Landscaping,Food & Drink Shop,Park,Women's Store,Diner,Coffee Shop,Comfort Food Restaurant,Convenience Store,Cosmetics Shop,Deli / Bodega
1,M4A,North York,Victoria Village,43.725882,-79.315572,0,Coffee Shop,French Restaurant,Hockey Arena,Portuguese Restaurant,Discount Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0,Clothing Store,Furniture / Home Store,Women's Store,Miscellaneous Shop,Boutique,Coffee Shop,Event Space,Vietnamese Restaurant,Gift Shop,Accessories Store
3,M3B,North York,Don Mills,43.745906,-79.352188,0,Café,Asian Restaurant,Coffee Shop,Japanese Restaurant,Beer Store,Gym,Restaurant,Art Gallery,Clothing Store,Bus Line
4,M6B,North York,Glencairn,43.709577,-79.445073,0,Park,Japanese Restaurant,Pizza Place,Pub,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega


**Now I visualize the clusters**

In [161]:
# create map
import matplotlib.cm as cm
import matplotlib.colors as colors
map_clusters = folium.Map(location=[lat, long], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(ny_merged['Latitude'], ny_merged['Longitude'], ny_merged['Neighborhood'], ny_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Finally I examine the clusters to see what each cluster has in common

In [162]:
ny_merged.loc[ny_merged['Cluster Labels'] == 0, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,0,Coffee Shop,French Restaurant,Hockey Arena,Portuguese Restaurant,Discount Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
2,North York,0,Clothing Store,Furniture / Home Store,Women's Store,Miscellaneous Shop,Boutique,Coffee Shop,Event Space,Vietnamese Restaurant,Gift Shop,Accessories Store
3,North York,0,Café,Asian Restaurant,Coffee Shop,Japanese Restaurant,Beer Store,Gym,Restaurant,Art Gallery,Clothing Store,Bus Line
4,North York,0,Park,Japanese Restaurant,Pizza Place,Pub,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
5,North York,0,Café,Asian Restaurant,Coffee Shop,Japanese Restaurant,Beer Store,Gym,Restaurant,Art Gallery,Clothing Store,Bus Line
7,North York,0,Coffee Shop,Bank,Gift Shop,Shopping Mall,Middle Eastern Restaurant,Mobile Phone Shop,Park,Deli / Bodega,Pharmacy,Pizza Place
8,North York,0,Clothing Store,Coffee Shop,Fast Food Restaurant,Restaurant,Japanese Restaurant,Cosmetics Shop,Convenience Store,Bank,Bakery,Mobile Phone Shop
9,North York,0,Furniture / Home Store,Miscellaneous Shop,Caribbean Restaurant,Massage Studio,Bar,Coffee Shop,Diner,Construction & Landscaping,Convenience Store,Cosmetics Shop
10,North York,0,Chinese Restaurant,Bank,Café,Japanese Restaurant,Food Truck,Food Court,Construction & Landscaping,Fried Chicken Joint,Convenience Store,Cosmetics Shop
11,North York,0,Grocery Store,Park,Discount Store,Liquor Store,Airport,Food Truck,Athletics & Sports,Bank,Baseball Field,Gym / Fitness Center


In [163]:
ny_merged.loc[ny_merged['Cluster Labels'] == 1, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,North York,1,Pizza Place,Women's Store,Chinese Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store


In [164]:
ny_merged.loc[ny_merged['Cluster Labels'] == 2, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,2,Construction & Landscaping,Food & Drink Shop,Park,Women's Store,Diner,Coffee Shop,Comfort Food Restaurant,Convenience Store,Cosmetics Shop,Deli / Bodega
14,North York,2,Park,Construction & Landscaping,Bakery,Basketball Court,Discount Store,Coffee Shop,Comfort Food Restaurant,Convenience Store,Cosmetics Shop,Deli / Bodega
22,North York,2,Construction & Landscaping,Convenience Store,Bar,Park,Women's Store,Discount Store,Coffee Shop,Comfort Food Restaurant,Cosmetics Shop,Deli / Bodega


In [165]:
ny_merged.loc[ny_merged['Cluster Labels'] == 3, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,North York,3,Golf Course,Mediterranean Restaurant,Athletics & Sports,Pool,Dog Run,Women's Store,Diner,Comfort Food Restaurant,Construction & Landscaping,Convenience Store


In [166]:
ny_merged.loc[ny_merged['Cluster Labels'] == 4, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,North York,4,Fabric Shop,Baseball Field,Women's Store,Distribution Center,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
