### Scrape Wikipedia Page to retrieve neighborhoods in Toronto

This notebook retrieves PostalCode, Borough and Neighborhood from this [Wikipedia page](https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M) and creates a pandas DataFrame.
Coordinates are retrieved and added to the DataFrame.

In [1]:
# Import stuffs
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from geopy.geocoders import Nominatim

Sample DataFrame as below:
![Sample DataFrame](https://github.com/swmk/Coursera_Capstone/raw/master/sample_df_wiki.png)

#### Scrape the content from the Wikipage.

In [3]:
# URL to Wikipedia page
wiki_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

# Sends a request to Wiki page and retrieves html source
site_text = requests.get(wiki_url).text

# Creates BeautifulSoup instance
soup = BeautifulSoup(site_text, 'lxml')

# Retrieves html table containing the postal codes
postal_table = soup.find('table', {'class': 'wikitable sortable'})

# Retrieves all content rows without header row
all_trs = postal_table.find_all('tr')[1:]

#### Transform content into pandas DataFrame
Items with "Not assigned" borough are ignored.

In [4]:
# Variable to holds rows data for DataFrame
df_cols = ['PostalCode', 'Borough', 'Neighborhood']
df = pd.DataFrame(columns=df_cols)

In [5]:
# Extract all html rows.
#  From each row, extract the cell text and convert them to a dictionary.
#   Add to the dataframe if borough cell is not Not assigned. 
for tr in all_trs:
    tds = tr.findAll('td')
    # Extract a row as a dictionary
    df_row = {col: val.text.rstrip() for col, val in zip(df_cols,tds)}
    # Add the row to the DataFrame
    if df_row['Borough'] != 'Not assigned':
        df = df.append(df_row, ignore_index=True)

#### Clean and prepare data.
* Replace "Not assigned" Neighborhood with value from Borough.
* Multiple Neighborhood values of the same PostalCode are merged to one row in the dataframe.

In [6]:
# Clean and prep the data frame

# Replace 'Not assigned' neighborhood with borough value.
df['Neighborhood'].replace(to_replace='Not assigned', value=df['Borough'], inplace=True)

# Merge two cells of the same Postal Code.
df = df.groupby(['PostalCode','Borough'])['Neighborhood'].apply(lambda x: ', '.join(x)).reset_index()

In [7]:
df.shape

(103, 3)

In [8]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


#### Load cooridinates data

In [9]:
# Load Geospatial data
geo_df = pd.read_csv('Geospatial_Coordinates.csv')

In [12]:
# Merge two dfs
merged_df = pd.merge(df, geo_df, how='left', left_on='PostalCode', right_on='Postal Code')

In [13]:
# Drop an extra column
merged_df.drop(['Postal Code'], axis=1, inplace=True)

In [14]:
merged_df.head(5)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [15]:
# Credentials for Foursquare API
CLIENT_ID = 'CWPBIARZ3HQU5GAEHLW4123MNPXWZWVSXRR00QDNTLOT4BLS'
CLIENT_SECRET = 'PEWJLZ2E3QSJZKGPNHCTIGXYAATVZS33DS51I32OZOZOIYRD'
VERSION = '20190505'
LIMIT = 30

In [16]:
# Function to perform nearby venues search
def getNearbyVenues(postalcodes, borough, neighborhood, lats, longs, radius=500):
    """
    Perform venue search for the given latitudes and longitudes through Foursquare API.
    """
    # Variable to hold venues result
    venues_list = []
    for postalcode, borough, neighborhood, lat, long in zip(postalcodes, borough, neighborhood, lats, longs):
        # URL to perform search query
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
        CLIENT_ID, CLIENT_SECRET, lat, long, VERSION, radius, LIMIT)
        
        # Make a call to Foursquare API
        results = requests.get(url).json()['response']['venues']

        # Extract Venue Name, Venue Coordinates and Venue Categories from the results
        # Some venues do not have categories assigned.
        # For such venues, 'Not assigned' is populated.
        venues_list.append([
            (postalcode, 
            borough, 
            neighborhood,
            lat, 
            long, 
            v['name'], 
            v['location']['lat'], 
            v['location']['lng'], 
            v['categories'][0]['name'] if len(v['categories']) > 0 else None) for v in results])

    # Creates a dataframe from the results
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postal Code', 'Borough', 'Neighborhood', 'Postal Latitude', 'Postal Longitude', 'Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category']
        
    return nearby_venues

In [17]:
toronto_venues = getNearbyVenues(merged_df['PostalCode'], merged_df['Borough'], merged_df['Neighborhood'], merged_df['Latitude'], merged_df['Longitude'])

In [18]:
toronto_venues.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Postal Latitude,Postal Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Shell,43.803227,-79.192414,Gas Station
1,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Alvin Curling Public School,43.808683,-79.190103,Elementary School
2,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Quick Lane Tire & Auto Centre,43.801101,-79.189315,Auto Garage
3,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Second Cup,43.802165,-79.196114,Coffee Shop
4,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Dupont Canada,43.805927,-79.197695,Building


In [19]:
toronto_venues.shape

(3087, 9)

In [20]:
# Saving to a csv so as to not query the data again.
# May be removed
toronto_venues.to_csv('toronto_venues.csv', index=False)

In [21]:
# Load from csv file.
# May be removed
toronto_venues = pd.read_csv('toronto_venues.csv')

In [22]:
toronto_venues.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Postal Latitude,Postal Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Shell,43.803227,-79.192414,Gas Station
1,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Alvin Curling Public School,43.808683,-79.190103,Elementary School
2,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Quick Lane Tire & Auto Centre,43.801101,-79.189315,Auto Garage
3,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Second Cup,43.802165,-79.196114,Coffee Shop
4,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Dupont Canada,43.805927,-79.197695,Building


In [23]:
# Drop rows without Venue Category assigned and assigned to the df
toronto_df = toronto_venues.dropna(subset=['Venue Category'], axis=0)

In [24]:
# Drop rows with Venue Category as 'Neighborhood'
toronto_df.drop(toronto_df[toronto_df['Venue Category'] == 'Neighborhood'].index, axis=0, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [25]:
print(f"Total unique venue categories: {len(toronto_df['Venue Category'].unique())}")

Total unique venue categories: 362


### Analyze each neighborhood 

In [26]:
# One hot encoding
toronto_onehot = pd.get_dummies(toronto_df[['Venue Category']], prefix='', prefix_sep='')

# Add neighborhood back
toronto_onehot.insert(loc=0, column='Neighborhood', value=toronto_df['Neighborhood'])

In [27]:
toronto_onehot.shape

(2459, 363)

In [28]:
# Group rows by Neighborhood and average(mean) of the frequency of occurrence
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()

In [29]:
toronto_grouped.shape

(103, 363)

In [30]:
# Print out top 5 venues
num_top_venues = 5

for neighborhood in toronto_grouped['Neighborhood']:
    print(f'====={neighborhood}=====')
    f_label = toronto_grouped['Neighborhood'] == neighborhood
    # Get the trasponse of each row (each neighborhood)
    t_neighborhood = toronto_grouped[f_label].T.reset_index()
    # Set columns
    t_neighborhood.columns = ['Venue', 'Frequency']
    # Skip the first row as it is for the Neighborhood
    t_neighborhood = t_neighborhood[1:]
    # Cast datatype to Frequency column
    t_neighborhood['Frequency'] = t_neighborhood['Frequency'].astype(float)
    # Print top values
    print(t_neighborhood.sort_values('Frequency', ascending=False).reset_index(drop=True).head(num_top_venues))

=====Adelaide, King, Richmond=====
                 Venue  Frequency
0                Hotel   0.103448
1   Italian Restaurant   0.068966
2             Tea Room   0.034483
3  Fried Chicken Joint   0.034483
4           Food Court   0.034483
=====Agincourt=====
         Venue  Frequency
0          Spa   0.217391
1     Building   0.130435
2       Office   0.086957
3  Post Office   0.043478
4  Coffee Shop   0.043478
=====Agincourt North, L'Amoreaux East, Milliken, Steeles East=====
            Venue  Frequency
0        Building   0.142857
1  Medical Center   0.142857
2          School   0.095238
3          Bakery   0.095238
4            Park   0.095238
=====Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown=====
              Venue  Frequency
0  Spiritual Center   0.137931
1            Bakery   0.103448
2       Pizza Place   0.103448
3              Farm   0.068966
4     Grocery Store   0.068966
=====Alderwood, Long Branch=====
     

In [31]:
toronto_grouped.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,Adult Boutique,Advertising Agency,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Gate,...,Voting Booth,Warehouse,Warehouse Store,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
# Create a pandas DataFrame for top venues
def get_top_venues(row, num_top_venues):
    """
    Return a sorted Series.
    """
    # row -> ['Neighborhood', 'ATM', 'Accessories Stores'....]
    cats = row.iloc[1:]
    sorted_cats = cats.sort_values(ascending=False)
    # sorted_cats ->
    #     Spa                           0.2
    #     Office                       0.08
    #     Storage Facility             0.08
    #     Automotive Shop              0.04
    #     Coffee Shop                  0.04
    #     Bar                          0.04
    #     Rental Car Location          0.04
    # Return top venues
    return sorted_cats.index.values[0: num_top_venues]

In [33]:
# Get top 10 venues
num_top_venues = 10

# First, Second, Third
position_ind = ['st', 'nd', 'rd']

# Prep empty columns
columns = ['Neighborhood']
for i in range(num_top_venues):
    try:
        columns.append(f'{i+1}{position_ind[i]} Most Common Venue')
    except:
        columns.append(f'{i+1}th Most Common Venue')

# Create an empty DataFrame using the columns
sorted_venues_df = pd.DataFrame(columns=columns)

# Assigned the neighborhoods
sorted_venues_df['Neighborhood'] = toronto_grouped['Neighborhood']

# Loop through all rows (all neighborhoods) to get the top venues
for i in range(sorted_venues_df.shape[0]):
    # Fill up the top venues columns for each row
    sorted_venues_df.iloc[i, 1:] = get_top_venues(toronto_grouped.iloc[i, :], num_top_venues)

sorted_venues_df.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Hotel,Italian Restaurant,Train Station,City,Fried Chicken Joint,Office,Chinese Restaurant,Thai Restaurant,Food Court,Tea Room
1,Agincourt,Spa,Building,Office,Print Shop,Factory,Miscellaneous Shop,Bar,Latin American Restaurant,Automotive Shop,Train Station
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Building,Medical Center,School,Bakery,Park,Spa,Tech Startup,Bookstore,Bus Line,Chinese Restaurant
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Spiritual Center,Bakery,Pizza Place,Farm,Grocery Store,Coffee Shop,Caribbean Restaurant,Casino,Bagel Shop,Library
4,"Alderwood, Long Branch",Conference Room,Bank,Dentist's Office,Convenience Store,Asian Restaurant,Office,Dance Studio,Salon / Barbershop,Pharmacy,Hungarian Restaurant


### Cluster Neighborhoods 

In [34]:
# Drop Neighborhood column for clustering
X = toronto_grouped.iloc[:, 1:]
X.shape

(103, 362)

In [45]:
# Determine the optimal number of ks
s_avg = [] # Silhouette averages
K = range(3, 20)
for k in K:
    km = KMeans(n_clusters=k)
    labels = km.fit_predict(X)
    # Get silhouette scores
    silhouette_avg = silhouette_score(X, labels, metric='euclidean')
    s_avg.append(silhouette_avg)
k_value = s_avg.index(max(s_avg)) + 2
print(f'Max silhouette average: {max(s_avg)}. k value: {k_value}.')

Max silhouette average: 0.04509825327062916. k value: 6.


In [48]:
# Create a final KMeans
k_value = 6
km = KMeans(n_clusters=k_value).fit(X)

# Print out the cluster labels
print(f'Cluster Labels: {km.labels_}')

Cluster Labels: [1 2 2 4 1 2 0 1 3 2 1 3 0 2 1 1 3 0 4 1 1 0 3 5 2 2 3 0 1 3 3 3 0 0 4 1 1
 1 3 1 5 5 0 4 4 0 2 2 5 1 3 0 0 0 1 0 3 0 3 5 5 0 1 4 5 1 1 0 5 5 2 0 2 0
 3 0 3 2 0 1 1 1 3 3 3 1 3 2 5 0 1 5 0 4 2 1 1 3 2 0 5 5 3]


In [49]:
# Add the cluster label to the sorted venues (top 10 venues)
if sorted_venues_df.columns.contains('Cluster Labels'):
    sorted_venues_df.drop('Cluster Labels', axis=1, inplace=True)
sorted_venues_df.insert(0, 'Cluster Labels', km.labels_)

In [51]:
sorted_venues_df

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,"Adelaide, King, Richmond",Hotel,Italian Restaurant,Train Station,City,Fried Chicken Joint,Office,Chinese Restaurant,Thai Restaurant,Food Court,Tea Room
1,2,Agincourt,Spa,Building,Office,Print Shop,Factory,Miscellaneous Shop,Bar,Latin American Restaurant,Automotive Shop,Train Station
2,2,"Agincourt North, L'Amoreaux East, Milliken, St...",Building,Medical Center,School,Bakery,Park,Spa,Tech Startup,Bookstore,Bus Line,Chinese Restaurant
3,4,"Albion Gardens, Beaumond Heights, Humbergate, ...",Spiritual Center,Bakery,Pizza Place,Farm,Grocery Store,Coffee Shop,Caribbean Restaurant,Casino,Bagel Shop,Library
4,1,"Alderwood, Long Branch",Conference Room,Bank,Dentist's Office,Convenience Store,Asian Restaurant,Office,Dance Studio,Salon / Barbershop,Pharmacy,Hungarian Restaurant
5,2,"Bathurst Manor, Downsview North, Wilson Heights",Spa,Bank,Coffee Shop,Medical Center,Middle Eastern Restaurant,Synagogue,Sushi Restaurant,Supermarket,Doctor's Office,Shopping Mall
6,0,Bayview Village,Office,Grocery Store,Church,Spa,Massage Studio,Dance Studio,Fire Station,Japanese Restaurant,Residential Building (Apartment / Condo),Chinese Restaurant
7,1,"Bedford Park, Lawrence Manor East",Salon / Barbershop,Italian Restaurant,Gas Station,Jewish Restaurant,Other Nightlife,Boutique,Bike Shop,Fast Food Restaurant,Sushi Restaurant,Dentist's Office
8,3,Berczy Park,Residential Building (Apartment / Condo),Office,Parking,Building,Antique Shop,Tech Startup,Dentist's Office,Food Truck,Rental Car Location,Library
9,2,"Birch Cliff, Cliffside West",Pizza Place,Office,Pub,Dessert Shop,Japanese Restaurant,Fast Food Restaurant,Church,Student Center,Gas Station,Bus Line


In [52]:
# Merge top 10 venues with initial data to get the coordinates
# toronto_merged = merged_df[merged_df['Borough'].str.contains('Toronto')]
toronto_merged = merged_df
toronto_merged.drop(['Borough', 'PostalCode'], axis=1, inplace=True)

In [53]:
# Join with top 10 venues
toronto_merged = toronto_merged.join(sorted_venues_df.set_index('Neighborhood'), on='Neighborhood')

print(toronto_merged.shape)
toronto_merged

(103, 14)


Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Rouge, Malvern",43.806686,-79.194353,0,Factory,Coffee Shop,Automotive Shop,Building,Office,Coworking Space,Residential Building (Apartment / Condo),Design Studio,Salon / Barbershop,Shopping Mall
1,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,0,General Entertainment,Office,Assisted Living,Medical Center,Middle Eastern Restaurant,Bus Line,Storage Facility,Salon / Barbershop,Bar,Tanning Salon
2,"Guildwood, Morningside, West Hill",43.763573,-79.188711,5,Electronics Store,Bus Line,Bank,Breakfast Spot,Bakery,Thrift / Vintage Store,Athletics & Sports,Funeral Home,Laundry Service,Intersection
3,Woburn,43.770992,-79.216917,0,Bank,Office,Building,Park,Storage Facility,Music Store,Korean Restaurant,Laundry Service,Mexican Restaurant,Financial or Legal Service
4,Cedarbrae,43.773136,-79.239476,4,Building,Bakery,Caribbean Restaurant,Coworking Space,Bridal Shop,Skating Rink,Burger Joint,Thai Restaurant,Office,Athletics & Sports
5,Scarborough Village,43.744734,-79.239476,1,Convenience Store,Playground,Salon / Barbershop,Residential Building (Apartment / Condo),Electronics Store,Elementary School,Pizza Place,Building,Fast Food Restaurant,Business Service
6,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029,5,Bus Line,Residential Building (Apartment / Condo),Church,Pharmacy,Metro Station,Convenience Store,Sporting Goods Shop,Playground,Fast Food Restaurant,Automotive Shop
7,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577,5,Bus Line,Park,Meeting Room,Storage Facility,Playground,Parking,Community College,Cemetery,Restaurant,Café
8,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476,2,Spa,Pizza Place,Office,Harbor / Marina,Movie Theater,Bar,Filipino Restaurant,Beach,Medical Supply Store,Doctor's Office
9,"Birch Cliff, Cliffside West",43.692657,-79.264848,2,Pizza Place,Office,Pub,Dessert Shop,Japanese Restaurant,Fast Food Restaurant,Church,Student Center,Gas Station,Bus Line


##### Map of clusters

In [54]:
# Get location of Toronto
geolocator = Nominatim(user_agent='Toronto_Finder')
location = geolocator.geocode('Toronto, Canada')
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

43.653963 -79.387207


In [55]:
# Create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

xs = np.arange(k_value)
ys = ys = [i + xs + (i*xs)**2 for i in range(k_value)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
i = 1
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

In [56]:
map_clusters

#### Cluster 1 

In [63]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[0] + list(range(4, toronto_merged.shape[1]))]]
# Mostly looks like an area with offices and industrial buildings

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Rouge, Malvern",Factory,Coffee Shop,Automotive Shop,Building,Office,Coworking Space,Residential Building (Apartment / Condo),Design Studio,Salon / Barbershop,Shopping Mall
1,"Highland Creek, Rouge Hill, Port Union",General Entertainment,Office,Assisted Living,Medical Center,Middle Eastern Restaurant,Bus Line,Storage Facility,Salon / Barbershop,Bar,Tanning Salon
3,Woburn,Bank,Office,Building,Park,Storage Facility,Music Store,Korean Restaurant,Laundry Service,Mexican Restaurant,Financial or Legal Service
11,"Maryvale, Wexford",Intersection,Office,Coffee Shop,Hardware Store,School,Sandwich Place,Food & Drink Shop,Electronics Store,Burger Joint,Caribbean Restaurant
19,Bayview Village,Office,Grocery Store,Church,Spa,Massage Studio,Dance Studio,Fire Station,Japanese Restaurant,Residential Building (Apartment / Condo),Chinese Restaurant
26,Don Mills North,Office,Trade School,Tennis Court,Pool,Veterinarian,Radio Station,Housing Development,Basketball Court,Baseball Field,Building
27,"Flemingdon Park, Don Mills South",Office,Sporting Goods Shop,Automotive Shop,Coffee Shop,Restaurant,Nail Salon,Chinese Restaurant,Japanese Restaurant,Italian Restaurant,Asian Restaurant
39,Thorncliffe Park,Office,Indian Restaurant,Middle Eastern Restaurant,Dentist's Office,Church,Intersection,Sandwich Place,Convenience Store,Dessert Shop,Mosque
41,"The Danforth West, Riverdale",Metro Station,Intersection,Office,Massage Studio,Other Great Outdoors,Spa,Bridge,Medical Center,Miscellaneous Shop,Fast Food Restaurant
53,"Harbourfront, Regent Park",Art Gallery,Moving Target,Auto Dealership,Office,Cocktail Bar,Bakery,Rental Car Location,Café,Automotive Shop,Sandwich Place


#### Cluster 2

In [64]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[0] + list(range(4, toronto_merged.shape[1]))]]
# Cool neighborhoods

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Scarborough Village,Convenience Store,Playground,Salon / Barbershop,Residential Building (Apartment / Condo),Electronics Store,Elementary School,Pizza Place,Building,Fast Food Restaurant,Business Service
15,L'Amoreaux West,Bank,Library,Chinese Restaurant,Grocery Store,Fast Food Restaurant,Pharmacy,Post Office,Cafeteria,Sandwich Place,Thrift / Vintage Store
17,Hillcrest Village,Housing Development,Pool,Assisted Living,Japanese Restaurant,Flower Shop,Library,Lingerie Store,Fast Food Restaurant,Medical School,Mobile Phone Shop
32,Downsview Central,Government Building,Speakeasy,Coffee Shop,Library,Baseball Field,Bus Stop,Gas Station,Dive Bar,Other Nightlife,General Entertainment
33,Downsview Northwest,Mobile Phone Shop,Gas Station,Residential Building (Apartment / Condo),Shopping Mall,Field,Liquor Store,Financial or Legal Service,Basketball Court,Baseball Field,Taco Place
38,Leaside,Bank,Auto Dealership,Electronics Store,Fast Food Restaurant,Sushi Restaurant,Laundry Service,Event Space,Sports Bar,Sporting Goods Shop,Medical School
43,Studio District,Coffee Shop,Sandwich Place,Fast Food Restaurant,School,Dentist's Office,Moving Target,BBQ Joint,Automotive Shop,Bar,Sushi Restaurant
47,Davisville,Candy Store,Flower Shop,Library,Toy / Game Store,Comfort Food Restaurant,Bookstore,Dessert Shop,Nail Salon,Costume Shop,Italian Restaurant
51,"Cabbagetown, St. James Town",Ethiopian Restaurant,Other Nightlife,Spa,Building,Dive Bar,Diner,Market,Design Studio,Café,Restaurant
54,"Ryerson, Garden District",College Academic Building,Parking,Building,Café,University,Church,Farmers Market,Student Center,Indian Restaurant,Other Great Outdoors


#### Cluster 3 

In [65]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[0] + list(range(4, toronto_merged.shape[1]))]]
# All about recreation

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,"Cliffcrest, Cliffside, Scarborough Village West",Spa,Pizza Place,Office,Harbor / Marina,Movie Theater,Bar,Filipino Restaurant,Beach,Medical Supply Store,Doctor's Office
9,"Birch Cliff, Cliffside West",Pizza Place,Office,Pub,Dessert Shop,Japanese Restaurant,Fast Food Restaurant,Church,Student Center,Gas Station,Bus Line
12,Agincourt,Spa,Building,Office,Print Shop,Factory,Miscellaneous Shop,Bar,Latin American Restaurant,Automotive Shop,Train Station
13,"Clarks Corners, Sullivan, Tam O'Shanter",Doctor's Office,Spa,Bus Line,Automotive Shop,Church,Breakfast Spot,Miscellaneous Shop,Building,Shopping Mall,Fast Food Restaurant
14,"Agincourt North, L'Amoreaux East, Milliken, St...",Building,Medical Center,School,Bakery,Park,Spa,Tech Startup,Bookstore,Bus Line,Chinese Restaurant
24,Willowdale West,Park,Cemetery,Medical Center,Synagogue,Coffee Shop,Furniture / Home Store,Church,Gas Station,Residential Building (Apartment / Condo),General College & University
28,"Bathurst Manor, Downsview North, Wilson Heights",Spa,Bank,Coffee Shop,Medical Center,Middle Eastern Restaurant,Synagogue,Sushi Restaurant,Supermarket,Doctor's Office,Shopping Mall
29,"Northwood Park, York University",Spa,Building,Lounge,General Travel,Middle Eastern Restaurant,Medical Center,Storage Facility,Massage Studio,Design Studio,Café
30,"CFB Toronto, Downsview East",Coffee Shop,Building,Office,Spa,Tattoo Parlor,Bar,Medical Center,Snack Place,Electronics Store,Bus Line
34,Victoria Village,Government Building,Bus Stop,Auto Dealership,Pizza Place,Fire Station,Bank,Car Wash,Automotive Shop,Gas Station,Intersection


#### Cluster 4 

In [66]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[0] + list(range(4, toronto_merged.shape[1]))]]
# Definitely a residential area.

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,"Silver Hills, York Mills",Park,High School,Cooking School,Building,Synagogue,Bank,School,Tennis Court,Cafeteria,Residential Building (Apartment / Condo)
22,Willowdale South,Residential Building (Apartment / Condo),Restaurant,Park,Hotel,Church,Electronics Store,Shopping Mall,Movie Theater,Dentist's Office,Financial or Legal Service
23,York Mills West,Residential Building (Apartment / Condo),Medical Center,Convenience Store,Veterinarian,Temple,Bank,Chinese Restaurant,Intersection,General Entertainment,Arcade
25,Parkwoods,Residential Building (Apartment / Condo),Caribbean Restaurant,Spiritual Center,School,Fast Food Restaurant,Elementary School,Spa,Miscellaneous Shop,Building,Burger Joint
31,Downsview West,Doctor's Office,Residential Building (Apartment / Condo),Speakeasy,Fireworks Store,Non-Profit,Caribbean Restaurant,Library,Moving Target,Shopping Mall,City Hall
45,Davisville North,Residential Building (Apartment / Condo),Jewelry Store,Voting Booth,Scenic Lookout,Office,Restaurant,Dentist's Office,Lounge,Park,Flower Shop
49,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",Residential Building (Apartment / Condo),Embassy / Consulate,Office,Light Rail Station,Government Building,Elementary School,Supermarket,Diner,Convenience Store,Doctor's Office
50,Rosedale,Residential Building (Apartment / Condo),Park,Event Space,Music Venue,Food Truck,Italian Restaurant,Bus Line,Other Great Outdoors,Bike Trail,Conference Room
52,Church and Wellesley,Residential Building (Apartment / Condo),Spa,Bank,General Entertainment,Korean Restaurant,Bar,Dessert Shop,Salon / Barbershop,Theme Restaurant,Pub
55,St. James Town,Residential Building (Apartment / Condo),Japanese Restaurant,Church,Office,Pharmacy,Hotel,Building,Medical Lab,Dog Run,Mosque


#### Cluster 5

In [61]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[0] + list(range(4, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Cedarbrae,Building,Bakery,Caribbean Restaurant,Coworking Space,Bridal Shop,Skating Rink,Burger Joint,Thai Restaurant,Office,Athletics & Sports
10,"Dorset Park, Scarborough Town Centre, Wexford ...",Automotive Shop,Building,Caribbean Restaurant,Bakery,Doctor's Office,Thrift / Vintage Store,Furniture / Home Store,Performing Arts Venue,Auto Garage,Factory
16,Upper Rouge,Temple,Farm,Cosmetics Shop,Zoo Exhibit,Ski Area,Food Court,Bakery,Pizza Place,Men's Store,Dog Run
18,"Fairview, Henry Farm, Oriole",Clothing Store,Shoe Store,Pharmacy,Metro Station,Doctor's Office,Bus Stop,Library,Bank,Laboratory,Kids Store
70,"First Canadian Place, Underground city",Bakery,Deli / Bodega,Juice Bar,Clothing Store,Gluten-free Restaurant,Candy Store,Food Court,Bagel Shop,Cupcake Shop,Café
71,"Lawrence Heights, Lawrence Manor",Clothing Store,Furniture / Home Store,Bridal Shop,Design Studio,Office,Hot Dog Joint,Sports Bar,Event Space,Boutique,Miscellaneous Shop
101,"Albion Gardens, Beaumond Heights, Humbergate, ...",Spiritual Center,Bakery,Pizza Place,Farm,Grocery Store,Coffee Shop,Caribbean Restaurant,Casino,Bagel Shop,Library


#### Cluster 6 

In [67]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 5, toronto_merged.columns[[0] + list(range(4, toronto_merged.shape[1]))]]
# Quite convenient to be in these neighborhoods

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,"Guildwood, Morningside, West Hill",Electronics Store,Bus Line,Bank,Breakfast Spot,Bakery,Thrift / Vintage Store,Athletics & Sports,Funeral Home,Laundry Service,Intersection
6,"East Birchmount Park, Ionview, Kennedy Park",Bus Line,Residential Building (Apartment / Condo),Church,Pharmacy,Metro Station,Convenience Store,Sporting Goods Shop,Playground,Fast Food Restaurant,Automotive Shop
7,"Clairlea, Golden Mile, Oakridge",Bus Line,Park,Meeting Room,Storage Facility,Playground,Parking,Community College,Cemetery,Restaurant,Café
21,"Newtonbrook, Willowdale",Bus Line,Office,College Academic Building,Salon / Barbershop,Laundry Service,Church,Assisted Living,Seafood Restaurant,Optical Shop,Trail
35,"Woodbine Gardens, Parkview Hill",Pet Store,Bus Line,Flower Shop,Residential Building (Apartment / Condo),Automotive Shop,Breakfast Spot,Café,Church,Athletics & Sports,Intersection
36,Woodbine Heights,Office,Café,Fire Station,Park,Bus Stop,Laundry Service,Rental Car Location,Beer Store,Dance Studio,Convenience Store
40,East Toronto,Park,Bus Stop,Playground,Toy / Game Store,Rental Car Location,Bus Line,Cocktail Bar,Salon / Barbershop,Piano Bar,Speakeasy
42,"The Beaches West, India Bazaar",Park,Light Rail Station,College Classroom,Racetrack,Nightlife Spot,Daycare,Laundry Service,Intersection,Rental Car Location,Convenience Store
44,Lawrence Park,College Classroom,Bus Line,School,Sushi Restaurant,Office,University,Lake,Nightlife Spot,Park,Jewelry Store
48,"Moore Park, Summerhill East",Park,Elementary School,School,Tech Startup,Bank,Event Space,Smoke Shop,Bridge,Miscellaneous Shop,Moving Target
