# QUESTION 1

### DATA COLLECTION

In [207]:
#//////////////////////////////////////////////////////////////////////// IMPORTING LIBRARIES ////////////////////////////////////////////////////////////////////////
   
    from bs4 import BeautifulSoup                                         # Beautiful Soup Library so format the data.
    import requests                                                       # Request Library to retrieve the data from an HTML WebPage.
    import pandas as pd                                                   # Pandas Library in order to work with the retrieved Data.
    import numpy as np                                                    # Numpy Library in order to work with tuples.
    #!pip install pgeocode                                                 # pgeocode to search the coordinates given a postal code
    import pgeocode
    from geopy.geocoders import Nominatim                                 # Nominatim library to work with latitudes and longitudes of a given place
    #!conda install -c conda-forge folium=0.5.0 --yes
    import folium # map rendering library
    print('Libraries installed')
    from pandas.io.json import json_normalize
    from sklearn.cluster import KMeans
    import matplotlib.cm as cm
    import matplotlib.colors as colors

#//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

Libraries installed


In [29]:
#///////////////////////////////////////////////////////////////// RETRIEVING DATA WITH BEAUTIFULSOUP /////////////////////////////////////////////////////////////////

URL = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"   # Setting URL from Wikipedia.
Wiki_Source = requests.get(URL).text                                      # Requesting Text from the source.
XML_Soup = BeautifulSoup(Wiki_Source, 'lxml')                             # Parsing the requested text into XML format.
XML_Table = XML_Soup.find('table')                                        # Finding tha Table in the Wikipedia URL.


XML_tittle = XML_Table.find_all('th')                                     # Finding all the Table headers
Tittle_data=[]                                                            # Defining the temporal Row for the headers
for th_cell in XML_Table.find_all('th'):                                  # Iteratinf through all the table headers (Header = th)
    Tittle_data.append(th_cell.text.strip())                              # Setting the headers into the temporal variable
Neighborhood_df = pd.DataFrame(columns = Tittle_data)                     # Setting the headers into the Neighborhood Dataframe


for tr_cell in XML_Table.find_all('tr'):                                  # Iterating through the table. (Rows = tr)
    row_data=[]                                                           # Setting a temporal variable to store the Row.
    for td_cell in tr_cell.find_all('td'):                                # Iterating through the table. (Column = td)
        row_data.append(td_cell.text.strip())                             # Setting the Row Data.
    if len(row_data)==len(Neighborhood_df.columns):                                    # Stopping the Columns' iteration when it matches with the lenght of columns (3 columns).
        Neighborhood_df.loc[len(Neighborhood_df)] = row_data              # Setting the data from the Temporal Row into the Neighborhood Dataframe

        
Neighborhood_df.head(10)                                                  # Showing the raw Dataframe

#//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


### DATA PREPARATION

In [32]:
#//////////////////////////////////////////////////////////////////////// FORMATTING DATA FRAME ////////////////////////////////////////////////////////////////////////

indexed_Boroughs = Neighborhood_df [Neighborhood_df['Borough'] == 'Not assigned'].index         # Indenxing all Boroughs matching with 'Not assigned'.
Neighborhood_df.drop(indexed_Boroughs, inplace=True)                                            # Dropping all the rows indexed as 'Not assigned'.
Neighborhood_df                                                                                 # Displaying new filtered Neighborhood Dataframe.


# Replacing the borough according to the following condition: If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
Neighborhood_df['Neighborhood'] = np.where(Neighborhood_df['Borough'] != 'Not assigned', \
                                  Neighborhood_df['Neighborhood'], np.where(Neighborhood_df\
                                  ['Neighborhood'] == 'Not assigned',Neighborhood_df\
                                  ['Borough'], Neighborhood_df['Neighborhood'])) 


Neighborhood_df.groupby(['Postal Code','Borough'], sort = False).agg( ', '.join)                # Grouping all the values by Postal Code and Borough and joining all duplicates neighborhoods 
print('Toronto Neighborhood Dataframe shape: ', Neighborhood_df.shape)                          # Displaying the shape of the Neighborhood Dataframe.
Neighborhood_df.head(10)                                                                        # Displaying new filtered Neighborhood Dataframe.

#///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

Toronto Neighborhood Dataframe shape:  (103, 3)


Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


# QUESTION 2

In [112]:
# NOTE: For some reason I was having troubles importing the geocode Library, so I'm using the pgeocode library instead

nomi = pgeocode.Nominatim('CA')                                                    # Defining the country code (CA = Canada).
postal_Codes_Array = np.array(Neighborhood_df['Postal Code'])                      # Parsing the list of Postal codes into an array.
zipcodes_df = pd.DataFrame(nomi.query_postal_code(postal_Codes_Array))             # Creating a new Dataframe to store all the information associates to the postal codes .
zipcodes_df.dropna(subset = ["latitude"], inplace=True)                            # Dropping any postal code not found due to changes in the data.
sub_zipcodes_df = zipcodes_df[['postal_code', 'latitude', 'longitude']]            # Creating a new Sub Dataframe containing the latitudes and longitudes found

merged_Neighborhood_df = pd.merge(Neighborhood_df, sub_zipcodes_df, left_on= \
                         'Postal Code', right_on='postal_code', how='outer')       # Merging the Dataframe with the coordinates and the inicial dataframe

merged_Neighborhood_df.dropna(subset = ["latitude"], inplace=True)                 # Dropping any missing value in the merged Dataframe
merged_Neighborhood_df.drop(['postal_code'], axis=1, inplace = True)               # Due to the merging, we have to drop the duplicated Postal Code column 
merged_Neighborhood_df.head(10)                                                    # Showing the final DataFrame

Unnamed: 0,Postal Code,Borough,Neighborhood,latitude,longitude
0,M3A,North York,Parkwoods,43.7545,-79.33
1,M4A,North York,Victoria Village,43.7276,-79.3148
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7223,-79.4504
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.6662,-79.5282
6,M1B,Scarborough,"Malvern, Rouge",43.8113,-79.193
7,M3B,North York,Don Mills,43.745,-79.359
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.7063,-79.3094
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3783


# QUESTION 3

### DATA ANALYSIS

NOTE: I've decided to evaluate the cluster for all the Boroughs in Toronto in order to evaluate what are the differences with the results in the lesson on week 3 

In [352]:
address = 'TORONTO'                                                                        # Defining an initial location to render the map
geolocator = Nominatim(user_agent="ON_EXPLORER")                                           # Initializing geolocator variable as Nominatim
location = geolocator.geocode(address)                                                     # Assigning location variable from geocode
latitude = location.latitude                                                               # Assigning latitude variable
longitude = location.longitude                                                             # Assigning longitude variable

In [355]:
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10, width=500,height=500)    # Creating map of Toronto using latitude and longitude values

for lat, lng, borough, neighborhood in zip(merged_Neighborhood_df['latitude'], merged_Neighborhood_df['longitude'], merged_Neighborhood_df['Borough'], merged_Neighborhood_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],                                                                              # Setting initial values to render the map
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto                                                                                      # Rendering the map

#### FOURSQUARE SETTINGS

In [357]:
CLIENT_ID = 'GST5GRMIRSDY0USVISIBIQ2NKR2XVYTQXVWN3OCFBRYQPXJY'                                      # My Foursquare ID
CLIENT_SECRET = 'UOJEW5ZYRDR3CIRR3IYAPQSWKNEOVZFLLVG1YSTL35MEHH4Z'                                  # My Foursquare Secret
VERSION = '20200614'                                                                                # Foursquare API version
neighborhood_latitude = merged_Neighborhood_df.loc[0, 'latitude']                                   # Neighborhood latitude value
neighborhood_longitude = merged_Neighborhood_df.loc[0, 'longitude']                                 # Neighborhood longitude value
neighborhood_name = merged_Neighborhood_df.loc[0, 'Neighborhood']                                   # Neighborhood name
LIMIT = 100                                                                                         # Limiting the number of venues returned by Foursquare API
radius = 500                                                                                        # Define the radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude,                                                                          # Formatting the URL in order to make the API calls  
    neighborhood_longitude, 
    radius, 
    LIMIT)

#### Get Category Type Function

In [180]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']                                                  # function that extracts the category of the venue
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#### Get Nearby Venues Function

In [359]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Processing Data

In [361]:
Toronto_venues = getNearbyVenues(names=merged_Neighborhood_df['Neighborhood'],
                                   latitudes=merged_Neighborhood_df['latitude'],                   # Retrieving the data from Foursquare through the function 
                                   longitudes=merged_Neighborhood_df['longitude']
                                 )
print(Toronto_venues.shape)                                                                        # Checking the new Dataframe shape
Toronto_venues.head()                                                                              # Previewing the Toronto Venues Dataframe

(2165, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.7545,-79.33,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.7545,-79.33,GTA Restoration,43.753396,-79.333477,Fireworks Store
2,Parkwoods,43.7545,-79.33,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.7276,-79.3148,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.7276,-79.3148,Tim Hortons,43.725517,-79.313103,Coffee Shop


In [363]:
Toronto_venues.groupby('Neighborhood').count()                                                    # Grouping the data for quick review

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Alderwood, Long Branch",9,9,9,9,9,9
"Bathurst Manor, Wilson Heights, Downsview North",8,8,8,8,8,8
Bayview Village,3,3,3,3,3,3
"Bedford Park, Lawrence Manor East",24,24,24,24,24,24
Berczy Park,89,89,89,89,89,89
"Birch Cliff, Cliffside West",4,4,4,4,4,4
"Brockton, Parkdale Village, Exhibition Place",39,39,39,39,39,39
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",14,14,14,14,14,14
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",57,57,57,57,57,57


In [364]:
print('There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))                 # Checking the ammount of categories

There are 261 uniques categories.


#### Analazing Data

In [191]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Neighborhood'] = Toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [192]:
Toronto_onehot.shape

(2165, 261)

In [194]:
Toronto_grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()
Toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Agincourt,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
1,"Alderwood, Long Branch",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
2,"Bathurst Manor, Wilson Heights, Downsview North",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
3,Bayview Village,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
4,"Bedford Park, Lawrence Manor East",0.000000,0.000000,0.000000,0.000000,0.041667,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
5,Berczy Park,0.011236,0.000000,0.000000,0.000000,0.011236,0.022472,0.000000,0.000000,0.000000,...,0.011236,0.000000,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
6,"Birch Cliff, Cliffside West",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
7,"Brockton, Parkdale Village, Exhibition Place",0.000000,0.025641,0.000000,0.000000,0.000000,0.025641,0.000000,0.025641,0.000000,...,0.000000,0.000000,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
8,"Business reply mail Processing Centre, South C...",0.071429,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
9,"CN Tower, King and Spadina, Railway Lands, Har...",0.017544,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.017544,...,0.000000,0.000000,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.017544,0.000000


In [196]:
Toronto_grouped.shape

(97, 261)

In [197]:
num_top_venues = 10

for hood in Toronto_grouped['Neighborhood']:
    #print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    #print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    #print('\n')

In [198]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [251]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Toronto_grouped['Neighborhood']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Badminton Court,Breakfast Spot,Skating Rink,Latin American Restaurant,Women's Store,Farmers Market,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
1,"Alderwood, Long Branch",Athletics & Sports,Pharmacy,Convenience Store,Pizza Place,Sandwich Place,Coffee Shop,Dance Studio,Pub,Gym,Ethiopian Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Spa,Pizza Place,Mediterranean Restaurant,Middle Eastern Restaurant,Deli / Bodega,Coffee Shop,Fried Chicken Joint,Grocery Store,Event Space,Eastern European Restaurant
3,Bayview Village,Park,Gas Station,Trail,Women's Store,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Falafel Restaurant
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Restaurant,Sandwich Place,Italian Restaurant,Comfort Food Restaurant,Thai Restaurant,Pharmacy,Pizza Place,Pub,Café


#### Cluster Analisys

In [347]:
# set number of clusters
kclusters = 5

Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 2, 2, 4, 2, 2, 2, 2, 2, 2], dtype=int32)

In [348]:
Toronto_merged = merged_Neighborhood_df

Toronto_merged = Toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Toronto_merged.dropna(inplace = True)

#### Creating Map

In [365]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10, width=500,height=500)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['latitude'], Toronto_merged['longitude'], Toronto_merged['Neighborhood'], Toronto_merged['Cluster Label']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    flag=flag+1
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

['#8000ff', '#00b5eb', '#80ffb4', '#ffb360', '#ff0000']


#### Displaying clusters

In [367]:
Toronto_merged.loc[Toronto_merged['Cluster Label'] == 1, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Label
6,Scarborough,Home Service,Food & Drink Shop,Flea Market,Fish Market,Fish & Chips Shop,Fireworks Store,Field,Fast Food Restaurant,Farmers Market,Falafel Restaurant,1.0
50,North York,Home Service,Furniture / Home Store,Donut Shop,Flea Market,Fish Market,Fish & Chips Shop,Fireworks Store,Field,Fast Food Restaurant,Farmers Market,1.0
62,Central Toronto,Home Service,Food & Drink Shop,Flea Market,Fish Market,Fish & Chips Shop,Fireworks Store,Field,Fast Food Restaurant,Farmers Market,Falafel Restaurant,1.0


In [368]:
Toronto_merged.loc[Toronto_merged['Cluster Label'] == 2, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Label
1,North York,Hockey Arena,French Restaurant,Coffee Shop,Intersection,Portuguese Restaurant,Pizza Place,Park,Field,Fast Food Restaurant,Farmers Market,2.0
2,Downtown Toronto,Coffee Shop,Restaurant,Breakfast Spot,Yoga Studio,Theater,Gym / Fitness Center,Italian Restaurant,Food Truck,Event Space,Electronics Store,2.0
3,North York,Clothing Store,Coffee Shop,Women's Store,Restaurant,Cosmetics Shop,Toy / Game Store,Sushi Restaurant,Bakery,Men's Store,Electronics Store,2.0
4,Downtown Toronto,Sushi Restaurant,Diner,Burger Joint,Ethiopian Restaurant,Ramen Restaurant,Beer Bar,Coffee Shop,Bubble Tea Shop,Distribution Center,Italian Restaurant,2.0
5,Etobicoke,Pharmacy,Grocery Store,Park,Skating Rink,Bank,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,2.0
8,East York,Pizza Place,Pharmacy,Gym / Fitness Center,Pet Store,Fast Food Restaurant,Café,Gastropub,Intersection,Bank,Curling Ice,2.0
9,Downtown Toronto,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Cosmetics Shop,Café,Japanese Restaurant,Italian Restaurant,Hotel,Theater,Lingerie Store,2.0
10,North York,Pizza Place,Grocery Store,Asian Restaurant,Sushi Restaurant,Fish Market,Latin American Restaurant,Japanese Restaurant,Italian Restaurant,Ice Cream Shop,Gas Station,2.0
11,Etobicoke,Pizza Place,Chinese Restaurant,Sandwich Place,Coffee Shop,Tea Room,Electronics Store,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,2.0
12,Scarborough,Bar,Women's Store,Dumpling Restaurant,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Fireworks Store,Field,Fast Food Restaurant,2.0


In [369]:
Toronto_merged.loc[Toronto_merged['Cluster Label'] == 3, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Label
85,Scarborough,Pharmacy,Sushi Restaurant,Doner Restaurant,Fish Market,Fish & Chips Shop,Fireworks Store,Field,Fast Food Restaurant,Farmers Market,Falafel Restaurant,3.0


In [370]:
Toronto_merged.loc[Toronto_merged['Cluster Label'] == 4, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Label
0,North York,Food & Drink Shop,Fireworks Store,Park,Doner Restaurant,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Falafel Restaurant,4.0
7,North York,Home Service,Park,Pool,Construction & Landscaping,Furniture / Home Store,Gym,Yoga Studio,Video Store,Distribution Center,Fireworks Store,4.0
13,North York,Home Service,Park,Pool,Construction & Landscaping,Furniture / Home Store,Gym,Yoga Studio,Video Store,Distribution Center,Fireworks Store,4.0
21,York,Park,Women's Store,Gym,Sporting Goods Shop,Mexican Restaurant,Beer Store,Bakery,Dance Studio,Deli / Bodega,Fish & Chips Shop,4.0
27,North York,Residential Building (Apartment / Condo),Park,Women's Store,Falafel Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Farmers Market,4.0
32,Scarborough,Spa,Park,Grocery Store,Falafel Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Farmers Market,4.0
36,Downtown Toronto,Park,Music Venue,Harbor / Marina,Café,Women's Store,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,4.0
39,North York,Park,Gas Station,Trail,Women's Store,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Falafel Restaurant,4.0
61,Central Toronto,Photography Studio,Park,Farmers Market,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Fast Food Restaurant,4.0
64,York,Park,Home Service,Convenience Store,Afghan Restaurant,Donut Shop,Flea Market,Fish Market,Fish & Chips Shop,Fireworks Store,Field,4.0
