In [1]:
#%%===== Initialization =====%%#
#===== Import Libs =====#
print('Importing libraries...')

import numpy as np
import pandas as pd
import json as js
import requests
import geocoder # import geocoder
import folium

from bs4 import BeautifulSoup
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.pyplot as plt # plotting library
import matplotlib.colors as colors

from sklearn.cluster import KMeans 
from sklearn.datasets.samples_generator import make_blobs

print('Libraries imported.')
#===========================#
#===== Define Foursquare Credentials and other variables =====#
print('Defining Foursquare credentials...')
CLIENT_ID = 'BIJW5DEUCZQWIW4DIUQHYVJDRNBIFH0PHNPWUX1PFYRVTJJY' # your Foursquare ID
CLIENT_SECRET = 'NM1S4N0GGK2F2FDXRQFL50AR4K4SSY0USJWVOTMCCITL3DQ2' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
searchEverywhere = False # Search the entire Toronto metropolitan area. Caution: Very large, may exceed the Foursquare daily call limit!
searchOnly_Borough = 'Downtown Toronto' # Define the target area/borough
explore_radius = 500 # The radius around a neighborhood to be explored
venue_limit = 100 # The first 100 venues near a neighborhood
num_clusters = 5 # Number of clusters in the K-Mean clustering method
print('Explore radius: {}, venue limit: {}'.format(explore_radius,venue_limit))
if searchEverywhere == False:
    print('Only search the venues in this borough: {}'.format(searchOnly_Borough))
#=====================================================#
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
print('Foursquare credentials defined successfully.')
print('Search all the neiborhoods in the Toronto metropolitan area: {}'.format(searchEverywhere))
if searchEverywhere == True:
    print('Warning: You are searching the venues in the entire Toronto metropolitan area. May exceed the Foursquare daily call limit. Proceed with caution!')
    print("If 'KeyError: groups' appears, you reach the Foursquare daily call limit. Please search for a smaller area/less venues/less premium contents, or search 24 hours later.")
#=====================================================#
##========================##

Importing libraries...
Libraries imported.
Defining Foursquare credentials...
Explore radius: 500, venue limit: 100
Only search the venues in this borough: Downtown Toronto
Your credentails:
CLIENT_ID: BIJW5DEUCZQWIW4DIUQHYVJDRNBIFH0PHNPWUX1PFYRVTJJY
CLIENT_SECRET:NM1S4N0GGK2F2FDXRQFL50AR4K4SSY0USJWVOTMCCITL3DQ2
Foursquare credentials defined successfully.
Search all the neiborhoods in the Toronto metropolitan area: False


In [2]:
#%%===== Read the postal code table from Wiki =====%%#
print('Reading Canadian zip data from Wiki...')

Url_Wiki = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
# Read the zip codes on Wiki #
Wiki_html = requests.get(Url_Wiki).text

zipCodes_Soup = BeautifulSoup(Wiki_html,'lxml')
# print(Can_Zip_Soup.prettify()) #For test use. "soup.prettify is a method that is used to display the html file in a readable way"

zipCodes_table = zipCodes_Soup.find('table', class_='wikitable sortable')
# print(zipCodes_table.prettify()) # For test use. Display the html in a readable way
# print(type(zipCodes_table)) # For test use.

zipCodes_list = [] # Create an empty array that can be used to store the zip codes
borough_list = [] # Create an empty array that can be used to store the boroughs
neighborhood_list = [] # Create an empty array that can be used to store the neighborhoods

for row in zipCodes_table.findAll("tr"):
    cells = row.findAll("td")
    # For each "tr", assign each "td" to a variable.
    if len(cells) == 3: # Number of columns in the table
        zipCodes_txt = cells[0].find(text=True) # Index from the first element
        borough_txt = cells[1].find(text=True)
        neighborhood_txt = cells[2].find(text=True)
#        print('Post code=',postCodes_txt,',', 'Borough=',borough_txt,',', 'Neighborhood=',neighborhood_txt) # For test use
        
        zipCodes_list.append(zipCodes_txt)
        borough_list.append(borough_txt)
        neighborhood_list.append(neighborhood_txt)

df_CanZipCodes = pd.DataFrame()
df_CanZipCodes['Postal Code'] = zipCodes_list
df_CanZipCodes['Borough'] = borough_list
df_CanZipCodes['Neighborhood'] = neighborhood_list
df_CanZipCodes = df_CanZipCodes[df_CanZipCodes.Borough != 'Not assigned']
print(df_CanZipCodes.head())
print('Canadian Postal Code dataframe has been created successfully.')
##==========================##

Reading Canadian zip data from Wiki...
  Postal Code           Borough      Neighborhood
2         M3A        North York         Parkwoods
3         M4A        North York  Victoria Village
4         M5A  Downtown Toronto      Harbourfront
5         M5A  Downtown Toronto       Regent Park
6         M6A        North York  Lawrence Heights
Canadian Postal Code dataframe has been created successfully.


In [3]:
#%%===== Get the Longitudes and Latitudes of rach neighborhood =====%%#
print('Collecting the geological information of each neighborhood...')
# Try to use the Geocoder lib first #
print('Trying to use the Geocoders lib...')
# initialize your variable to None
lat_lng_coords = None
tryCount = 0 # Initialize a counter
geoLibFail = False # Initialize a failer indicator

# loop until you get the coordinates
while(geoLibFail == False):
    while(lat_lng_coords is None):
        if tryCount == 5:
            geoLibFail = True
            print('Geocoder lib failed. Will read the geological information csv file instead.')
            break
        g = geocoder.google('{}, Toronto, Ontario'.format('M2H'))
        lat_lng_coords = g.latlng
        tryCount = tryCount + 1
    if tryCount < 5:
        latitude = lat_lng_coords[0]
        longitude = lat_lng_coords[1]
if geoLibFail == True:
    # Read the csv file for the geological information
    print('Reading the csv file geological information...')
    geoCsvUrl = 'http://cocl.us/Geospatial_data' # The URL of the geo csv file
    df_geoInfo = pd.read_csv(geoCsvUrl) # Read the geo csv file
    print(df_geoInfo.head())
    print('Geological information of each neighborhood has been successfully collected.')
##====================================================##
#%%===== Insert the longitudinal information to the postal code DataFrame =====%%#
    df_geoInfo.set_index('Postal Code', inplace=True)
    df_CanZipCodes.set_index('Postal Code', inplace=True)
    df_CanZipCodes['Latitude'] = ''
    df_CanZipCodes['Longitude'] = ''
    allZipCodes = df_geoInfo.index # Get all of the zip codes
    for postCode in allZipCodes: # Access every row in the geoInfo dataframe
        df_CanZipCodes.loc[postCode, 'Latitude'] = df_geoInfo.loc[postCode,'Latitude']
        df_CanZipCodes.loc[postCode, 'Longitude'] = df_geoInfo.loc[postCode,'Longitude']
#        print('Postal Code: ',postCode, 'Latitude: ',df_CanZipCodes.loc[postCode, 'Latitude'], 'Longitude: ',df_CanZipCodes.loc[postCode, 'Longitude'])
    print(df_CanZipCodes.head())
    # Test if there is any 'Not assigned' in the dataframe
    df_notAssigned = df_CanZipCodes.loc[df_CanZipCodes['Neighborhood'] == 'Not assigned', ['Neighborhood']]
    if df_notAssigned.empty:
        print ("Test passed successfully. No 'Not assigned' value in the 'Neighborhood' column.")
##===========================================================================##

Collecting the geological information of each neighborhood...
Trying to use the Geocoders lib...
Geocoder lib failed. Will read the geological information csv file instead.
Reading the csv file geological information...
  Postal Code   Latitude  Longitude
0         M1B  43.806686 -79.194353
1         M1C  43.784535 -79.160497
2         M1E  43.763573 -79.188711
3         M1G  43.770992 -79.216917
4         M1H  43.773136 -79.239476
Geological information of each neighborhood has been successfully collected.
                      Borough      Neighborhood Latitude Longitude
Postal Code                                                       
M3A                North York         Parkwoods  43.7533  -79.3297
M4A                North York  Victoria Village  43.7259  -79.3156
M5A          Downtown Toronto      Harbourfront  43.6543  -79.3606
M5A          Downtown Toronto       Regent Park  43.6543  -79.3606
M6A                North York  Lawrence Heights  43.7185  -79.4648
Test passed succes

In [4]:
#%%===== Display the map of Toronto, ON =====%%#
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="Coursera_Data Science_Capstone Project")
Toronto_location = geolocator.geocode(address)
print(Toronto_location)
Toronto_latitude = Toronto_location.latitude
Toronto_longitude = Toronto_location.longitude
print('The geograpical coordinate of Toronto, ON, Canada are {}, {}.'.format(Toronto_latitude, Toronto_longitude))

# create map of Toronto, ON using latitude and longitude values
map_Toronto = folium.Map(location=[Toronto_latitude, Toronto_longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_CanZipCodes['Latitude'], df_CanZipCodes['Longitude'], df_CanZipCodes['Borough'], df_CanZipCodes['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label) # , parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,).add_to(map_Toronto)  

map_Toronto
##====================================================================##

Toronto, Ontario, Canada
The geograpical coordinate of Toronto, ON, Canada are 43.653963, -79.387207.


In [5]:
#%%===== Define a function to explore the neighborhoods =====%%#
# The function does not have bugs
def getNearbyVenues(names, latitudes, longitudes, radius, LIMIT):
#                                   names = df_targetBorough['Neighborhood'], 
#                                   latitudes = df_targetBorough['Latitude'], 
#                                   longitudes = df_targetBorough['Longitude'],
#                                   radius = explore_radius,
#                                   LIMIT = venue_limit
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name) # Til here, the neighborhoods that have the same nambe but different zips can be distinguished
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)
##=========================================================##

In [6]:
#%%===== Use the defined function to explore the neighborhoods in Toronto =====%%#
if searchEverywhere == False:
    df_targetBorough = df_CanZipCodes[df_CanZipCodes.Borough == searchOnly_Borough]
    df_targetBorough.drop_duplicates(subset ="Neighborhood", 
                                     keep = 'first', inplace = True) 
    print(df_targetBorough.head())
else:
    df_targetBorough = df_CanZipCodes

TargetBorough_venues = getNearbyVenues(
                                   names = df_targetBorough['Neighborhood'], 
                                   latitudes = df_targetBorough['Latitude'], 
                                   longitudes = df_targetBorough['Longitude'],
                                   radius = explore_radius,
                                   LIMIT = venue_limit
                                   )
print(TargetBorough_venues.head())
print(TargetBorough_venues.groupby('Neighborhood').count()) 
print('There are {} uniques categories.'.format(len(TargetBorough_venues['Venue Category'].unique())))
##========================================================================##

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


                      Borough      Neighborhood Latitude Longitude
Postal Code                                                       
M5A          Downtown Toronto      Harbourfront  43.6543  -79.3606
M5A          Downtown Toronto       Regent Park  43.6543  -79.3606
M5B          Downtown Toronto           Ryerson  43.6572  -79.3789
M5B          Downtown Toronto  Garden District
  43.6572  -79.3789
M5C          Downtown Toronto    St. James Town  43.6515  -79.3754
Harbourfront
Regent Park
Ryerson
Garden District

St. James Town
Berczy Park
Central Bay Street

Christie

Adelaide
King
Richmond

Harbourfront East

Toronto Islands
Union Station
Design Exchange
Toronto Dominion Centre
Commerce Court
Victoria Hotel

Harbord

University of Toronto
Chinatown
Grange Park
Kensington Market
CN Tower
Bathurst Quay

Island airport

Harbourfront West

King and Spadina
Railway Lands
South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade

Cabbagetown
First Canadian Place
Underground city
Church and We

In [7]:
#%%===== Analyze each neighborhood =====%%#
# one hot encoding
TargetBorough_onehot = pd.get_dummies(TargetBorough_venues[['Venue Category']], prefix="", prefix_sep="") # Convert categorical variable into dummy/indicator variables (True/False)
TargetBorough_onehot.head()
# add neighborhood column back to dataframe
TargetBorough_onehot['Neighborhood'] = TargetBorough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [TargetBorough_onehot.columns[-1]] + list(TargetBorough_onehot.columns[:-1])
TargetBorough_onehot.head()
TargetBorough_onehot = TargetBorough_onehot[fixed_columns]

print(TargetBorough_onehot.head())

TargetBorough_grouped = TargetBorough_onehot.groupby('Neighborhood').mean().reset_index()
print(TargetBorough_grouped.head())

num_top_venues = 5

for hood in TargetBorough_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = TargetBorough_grouped[TargetBorough_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')
#=========================================#

   Yoga Studio  Adult Boutique  Afghan Restaurant  Airport  \
0            0               0                  0        0   
1            0               0                  0        0   
2            0               0                  0        0   
3            0               0                  0        0   
4            0               0                  0        0   

   Airport Food Court  Airport Gate  Airport Lounge  Airport Service  \
0                   0             0               0                0   
1                   0             0               0                0   
2                   0             0               0                0   
3                   0             0               0                0   
4                   0             0               0                0   

   Airport Terminal  American Restaurant      ...        Toy / Game Store  \
0                 0                    0      ...                       0   
1                 0                    0

4     Boat or Ferry  0.07


----Regent Park----
            venue  freq
0     Coffee Shop  0.15
1          Bakery  0.06
2             Pub  0.06
3            Park  0.06
4  Breakfast Spot  0.04


----Richmond
----
                 venue  freq
0          Coffee Shop  0.07
1                 Café  0.05
2  American Restaurant  0.04
3      Thai Restaurant  0.04
4           Steakhouse  0.04


----Rosedale----
         venue  freq
0         Park  0.50
1   Playground  0.25
2        Trail  0.25
3  Yoga Studio  0.00
4  Music Venue  0.00


----Ryerson----
                 venue  freq
0          Coffee Shop  0.08
1       Clothing Store  0.07
2       Cosmetics Shop  0.04
3                 Café  0.04
4  Japanese Restaurant  0.03


----South Niagara----
              venue  freq
0   Airport Service  0.14
1  Airport Terminal  0.14
2    Airport Lounge  0.14
3             Plane  0.07
4     Boat or Ferry  0.07


----St. James Town----
          venue  freq
0   Coffee Shop  0.07
1          Café  0.06
2    R

In [8]:
#%%===== Define a function to sort the venues in descending order =====%%#
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]
##=================================================================##

In [9]:
#%%===== Display the top 10 venues in the neiborhoods =====%%#
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = TargetBorough_grouped['Neighborhood']

for ind in np.arange(TargetBorough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(TargetBorough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted
##=========================================================##

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Thai Restaurant,Steakhouse,American Restaurant,Gym,Restaurant,Burger Joint,Hotel,Clothing Store
1,Bathurst Quay,Airport Lounge,Airport Service,Airport Terminal,Boat or Ferry,Plane,Sculpture Garden,Boutique,Airport,Airport Food Court,Airport Gate
2,Berczy Park,Coffee Shop,Restaurant,Cocktail Bar,Steakhouse,Farmers Market,Pub,Bakery,Seafood Restaurant,Beer Bar,Café
3,CN Tower,Airport Lounge,Airport Service,Airport Terminal,Boat or Ferry,Plane,Sculpture Garden,Boutique,Airport,Airport Food Court,Airport Gate
4,Cabbagetown,Coffee Shop,Restaurant,Pub,Bakery,Indian Restaurant,Italian Restaurant,Pizza Place,Café,Pharmacy,Chinese Restaurant
5,Central Bay Street,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Burger Joint,Bar,Ice Cream Shop,Bubble Tea Shop,Spa,Chinese Restaurant
6,Chinatown,Café,Bar,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Chinese Restaurant,Coffee Shop,Bakery,Mexican Restaurant,Dumpling Restaurant,Gaming Cafe
7,Christie,Grocery Store,Café,Park,Italian Restaurant,Baby Store,Diner,Nightclub,Restaurant,Convenience Store,Coffee Shop
8,Church and Wellesley,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Burger Joint,Restaurant,Gastropub,Pizza Place,Dance Studio,Café
9,Commerce Court,Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Gym,Gastropub,Deli / Bodega,Seafood Restaurant,Steakhouse


In [10]:
#%%===== Cluster neighborhoods -- Using K-mean clustering =====%%#
# set number of clusters
kclusters = num_clusters

TargetBorough_grouped_clustering = TargetBorough_grouped.drop('Neighborhood', 1) # Drop the neighborhood name column

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(TargetBorough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

TargetBorough_merged = df_targetBorough
# 2 St. James Towns with different Zips
TargetBorough_merged.drop_duplicates(subset=['Borough', 'Neighborhood'], keep=False)

# add clustering labels
TargetBorough_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
TargetBorough_merged = TargetBorough_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

TargetBorough_merged.head() # check the last columns!
##=======================================================================##

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
M5A,Downtown Toronto,Harbourfront,43.6543,-79.3606,0,Coffee Shop,Park,Pub,Bakery,Theater,Mexican Restaurant,Café,Breakfast Spot,Event Space,Spa
M5A,Downtown Toronto,Regent Park,43.6543,-79.3606,2,Coffee Shop,Park,Pub,Bakery,Theater,Mexican Restaurant,Café,Breakfast Spot,Event Space,Spa
M5B,Downtown Toronto,Ryerson,43.6572,-79.3789,0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Japanese Restaurant,Middle Eastern Restaurant,Bar,Restaurant,Pizza Place,Sandwich Place
M5B,Downtown Toronto,Garden District,43.6572,-79.3789,2,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Japanese Restaurant,Middle Eastern Restaurant,Bar,Restaurant,Pizza Place,Sandwich Place
M5C,Downtown Toronto,St. James Town,43.6515,-79.3754,0,Coffee Shop,Café,Restaurant,Hotel,Italian Restaurant,Gastropub,Park,Clothing Store,Cosmetics Shop,Cocktail Bar


In [11]:
#%%===== Visualize the result =====%%#
# create map
map_clusters = folium.Map(location=[Toronto_latitude, Toronto_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(TargetBorough_merged['Latitude'], TargetBorough_merged['Longitude'], TargetBorough_merged['Neighborhood'], TargetBorough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
map_clusters
##==============================================================##

In [12]:
#%%===== Examine the clustering results =====%%#
# Cluster 1
TargetBorough_merged.loc[TargetBorough_merged['Cluster Labels'] == 0, TargetBorough_merged.columns[[1] + list(range(5, TargetBorough_merged.shape[1]))]]
# Cluster 2
TargetBorough_merged.loc[TargetBorough_merged['Cluster Labels'] == 1, TargetBorough_merged.columns[[1] + list(range(5, TargetBorough_merged.shape[1]))]]
# Cluster 3
TargetBorough_merged.loc[TargetBorough_merged['Cluster Labels'] == 2, TargetBorough_merged.columns[[1] + list(range(5, TargetBorough_merged.shape[1]))]]
# Cluster 4
TargetBorough_merged.loc[TargetBorough_merged['Cluster Labels'] == 3, TargetBorough_merged.columns[[1] + list(range(5, TargetBorough_merged.shape[1]))]]
# Cluster 5
TargetBorough_merged.loc[TargetBorough_merged['Cluster Labels'] == 4, TargetBorough_merged.columns[[1] + list(range(5, TargetBorough_merged.shape[1]))]]

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
M6G,Christie,Grocery Store,Café,Park,Italian Restaurant,Baby Store,Diner,Nightclub,Restaurant,Convenience Store,Coffee Shop
