<b><h1>Segmenting and Clustering Neighborhoods in Toronto</h1></b>

<h2>Part 1: Dataset Preparation</h2>

<h3>Install and Import Libraries</h3>

In [1]:
!pip install folium

from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import os
import requests
import folium

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes


<h3>Extract Toronto Postal Codes Using Web Scraping</h3>

In [2]:
source = requests.get("https://www.zipcodesonline.com/2020/06/postal-code-of-toronto-in-2020.html").text
soup = BeautifulSoup(source, 'lxml')

table1 = soup.find("table", { 'class': "MsoNormalTable" })

In [3]:
def extract_data_from_table(table_soup_html: BeautifulSoup) -> list:
    data_list = []
    
    for rows in table_soup_html.find_all("tr"):
        row_data = []
        data = {}
        
        for row in rows.find_all("td"):
            for div in row.find_all("div"):
                row_data.append(div.text.strip())
        data_list.append(row_data)
    
    # REMOVING LIST MEMBERS WHICH ARE NOT REQUIRED
    data_list.pop(0)
    data_list.pop(0)
    
    return data_list

<h3>Convert the Extracted Data into a Clean Format.</h3>

In [4]:
def process_data(data_list: list) -> list:
    processed_data_list = list()
    
    for data in data_list:
        if (len(data[2]) == 3):
            data = {
                "Postal Code": data[2],
                "Borough": data[3],
                "Neighbourhood": data[1],
            }

            processed_data_list.append(data)
        
    return processed_data_list

<h3>Create Pandas Dataframe</h3>

In [5]:
final_data = extract_data_from_table(table1)
final_data = process_data(final_data)

df = pd.DataFrame(final_data)

df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M5H,Downtown Toronto,Adelaide
1,M1V,Scarborough,Agincourt North
2,M1S,Scarborough,Agincourt
3,M9V,Etobicoke,Albion Gardens
4,M8W,Etobicoke,Alderwood
...,...,...,...
197,M4B,East York,"Woodbine Gardens,"
198,M4C,East York,Woodbine Heights
199,M2P,North York,York Mills West
200,M2L,North York,York Mills


In [6]:
#df = df.groupby(['Postal Code', 'Borough']).agg(', '.join)
df.shape

(202, 3)

<h2>Part 2: Extract Latitudes and Longitudes</h2>

<h3>Use GeoSpatial Dataset to Create Dataframe</h3>

In [7]:
#Download the csv file
!wget -q -O 'Geospatial_Coordinates.csv' http://cocl.us/Geospatial_data
print('Data downloaded!')

Data downloaded!


In [8]:
lat_long_df = pd.read_csv('Geospatial_Coordinates.csv')
lat_long_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


<h3>Create a New DataFrame By Merging Postal Code Data and Coordinates Data</h3>

In [9]:
final_df = pd.merge(df, lat_long_df, on="Postal Code", how = "inner")
final_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5H,Downtown Toronto,Adelaide,43.650571,-79.384568
1,M5H,Downtown Toronto,Richmond,43.650571,-79.384568
2,M1V,Scarborough,Agincourt North,43.815252,-79.284577
3,M1V,Scarborough,L'Amoreaux East,43.815252,-79.284577
4,M1V,Scarborough,Milliken,43.815252,-79.284577


In [10]:
final_df.to_csv("dataset.csv")

In [11]:
final_df.shape

(202, 5)

<h2>Part 3: Explore and Cluster the Toronto Neighborhoods</h2>

<h3>Checking How Many Different Boroughs and Neighborhoods in Toronto</h3>

In [12]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(final_df['Borough'].unique()),
        final_df.shape[0]
    )
)
print('The different buroughs are',final_df['Borough'].unique())

The dataframe has 11 boroughs and 202 neighborhoods.
The different buroughs are ['Downtown Toronto' 'Scarborough' 'Etobicoke' 'North York' 'East York'
 'West Toronto' 'East Toronto' 'York' 'Mississauga' 'Central Toronto'
 'North York,']


<h3>Mapping All Neighborhoods in Toronto</h3>

In [15]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="mel_coursera")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('Coordinates of Toronto City are {}, {}.'.format(latitude, longitude))

Coordinates of Toronto City are 43.6534817, -79.3839347.


In [17]:
torontomap = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, label in zip(final_df['Latitude'], final_df['Longitude'], final_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False
    ).add_to(torontomap)  
    
torontomap

<h3>Define Foursquare API Credentials</h3>

In [18]:
CLIENT_ID = 'O31P3SFLWOP0ISJ2GRXPQ12QVPW05LI4PBHO1FFL4N4Q44EP' # your Foursquare ID
CLIENT_SECRET = 'WERWKO20KKZO52BY4MNHTF2Q34QSFBASR1CNPCLDVVTDUXCE' # your Foursquare Secret
ACCESS_TOKEN = 'SWYH20JF32GMT4TNAUXM2Z5RCKECV0FSENSASIZ0GEHPD1DS' # your FourSquare Access Token
VERSION = '20180604' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

<h3>Fetch Nearby Venues For All Neighborhoods With Foursquare API</h3>

In [19]:
def get_nearby_venues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT
        )
        
        result = requests.get(url).json()["response"]['groups'][0]['items']
        for venue in result:
            venues_list.append({
                'Neighbourhood': name,
                'Neighbourhood Latitude': lat, 
                'Neighbourhood Longitude': lng, 
                'Venue': venue['venue']['name'], 
                'Venue Latitude': venue['venue']['location']['lat'], 
                'Venue Longitude': venue['venue']['location']['lng'], 
                'Venue Category': venue['venue']['categories'][0]['name']
            })

    nearby_venues = pd.DataFrame(venues_list)
    return nearby_venues

<h3>Generate Dataframe to Store Nearby Venue Data</h3>

In [23]:
toronto_venues = get_nearby_venues(
    names=final_df['Neighbourhood'],
    latitudes=final_df['Latitude'],
    longitudes=final_df['Longitude']
    )

In [24]:
print(toronto_venues.shape)
toronto_venues.head()

(4061, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Adelaide,43.650571,-79.384568,Nathan Phillips Square,43.65227,-79.383516,Plaza
1,Adelaide,43.650571,-79.384568,Four Seasons Centre for the Performing Arts,43.650592,-79.385806,Concert Hall
2,Adelaide,43.650571,-79.384568,Rosalinda,43.650252,-79.385156,Vegetarian / Vegan Restaurant
3,Adelaide,43.650571,-79.384568,The Keg Steakhouse + Bar - York Street,43.649987,-79.384103,Restaurant
4,Adelaide,43.650571,-79.384568,Hy's Steakhouse,43.649505,-79.382919,Steakhouse


<h3>Check How Many Venues For Each Neighborhood</h3>

In [25]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,98,98,98,98,98,98
Agincourt,4,4,4,4,4,4
Agincourt North,3,3,3,3,3,3
Albion Gardens,9,9,9,9,9,9
Alderwood,8,8,8,8,8,8
...,...,...,...,...,...,...
Woburn,4,4,4,4,4,4
"Woodbine Gardens,",11,11,11,11,11,11
Woodbine Heights,6,6,6,6,6,6
York Mills West,3,3,3,3,3,3


<h3>Onehot Encode Each Venue Category To Generate Top Venues</h3>

In [28]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Adelaide,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Adelaide,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Adelaide,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,Adelaide,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Adelaide,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


<h3>Group Neighborhood Rows By Calculating the Mean Frequency of Venue Occurrence</h3>

In [31]:
torontogroup = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
torontogroup

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Adelaide,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,...,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Agincourt North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Albion Gardens,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Alderwood,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
190,"Woodbine Gardens,",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
191,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
192,York Mills West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0


<h3>Top 5 Most Common Venues For Each Neighborhood</h3>

In [32]:
num_top_venues = 5

for hood in torontogroup['Neighbourhood']:
    print(f"For {hood}")
    
    temp = torontogroup[torontogroup['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['Venue','Frequency']
    temp = temp.iloc[1:]
    temp['Frequency'] = temp['Frequency'].astype(float)
    temp = temp.round({'Frequency': 2})
    
    print(temp.sort_values('Frequency', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

For Adelaide
             Venue  Frequency
0      Coffee Shop       0.09
1             Café       0.05
2  Thai Restaurant       0.04
3       Restaurant       0.04
4   Clothing Store       0.03


For Agincourt
                       Venue  Frequency
0  Latin American Restaurant       0.25
1                     Lounge       0.25
2             Breakfast Spot       0.25
3               Skating Rink       0.25
4  Middle Eastern Restaurant       0.00


For Agincourt North
                       Venue  Frequency
0               Intersection       0.33
1                 Playground       0.33
2                       Park       0.33
3  Middle Eastern Restaurant       0.00
4        Moroccan Restaurant       0.00


For Albion Gardens
            Venue  Frequency
0   Grocery Store       0.22
1     Pizza Place       0.11
2  Sandwich Place       0.11
3        Pharmacy       0.11
4      Beer Store       0.11


For Alderwood
            Venue  Frequency
0     Pizza Place       0.25
1        Pharmacy   

<h3>Generate Dataframe to Store Top Most Common Venues</h3>

In [33]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

<h3>Dataframe For Top 10 Most Common Venues For Each Neighborhood</h3>

In [34]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighbourhood']

for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = torontogroup['Neighbourhood']

for ind in np.arange(torontogroup.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(torontogroup.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Thai Restaurant,Restaurant,Gym,Deli / Bodega,Clothing Store,Pizza Place,Cosmetics Shop,Steakhouse
1,Agincourt,Lounge,Latin American Restaurant,Skating Rink,Breakfast Spot,Escape Room,Electronics Store,Ethiopian Restaurant,Eastern European Restaurant,Fabric Shop,Dumpling Restaurant
2,Agincourt North,Intersection,Park,Playground,Doner Restaurant,Department Store,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run
3,Albion Gardens,Grocery Store,Pharmacy,Sandwich Place,Beer Store,Fast Food Restaurant,Discount Store,Fried Chicken Joint,Pizza Place,Comic Shop,Diner
4,Alderwood,Pizza Place,Coffee Shop,Gym,Pharmacy,Sandwich Place,Playground,Pub,Distribution Center,Department Store,Dessert Shop
...,...,...,...,...,...,...,...,...,...,...,...
189,Woburn,Coffee Shop,Indian Restaurant,Korean BBQ Restaurant,Ethiopian Restaurant,Escape Room,Event Space,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Department Store
190,"Woodbine Gardens,",Pizza Place,Gym / Fitness Center,Athletics & Sports,Gastropub,Intersection,Flea Market,Pharmacy,Café,Bank,Pet Store
191,Woodbine Heights,Park,Skating Rink,Athletics & Sports,Curling Ice,Intersection,Beer Store,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dessert Shop
192,York Mills West,Convenience Store,Park,Flower Shop,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Yoga Studio


<h3>Segmenting Neighborhoods with K-Means Cluster</h3>

In [35]:
torontogroup_clustering = torontogroup.drop('Neighbourhood', 1)

kclusters = 5
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(torontogroup_clustering)
kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

<h3>Merge All Dataframes With Clusters For Further Analysis</h3>

In [36]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_merged = final_df
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged = toronto_merged.dropna()
toronto_merged["Cluster Labels"] = toronto_merged["Cluster Labels"].astype(int)
toronto_merged.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5H,Downtown Toronto,Adelaide,43.650571,-79.384568,1,Coffee Shop,Café,Thai Restaurant,Restaurant,Gym,Deli / Bodega,Clothing Store,Pizza Place,Cosmetics Shop,Steakhouse
1,M5H,Downtown Toronto,Richmond,43.650571,-79.384568,1,Coffee Shop,Café,Thai Restaurant,Restaurant,Gym,Deli / Bodega,Clothing Store,Pizza Place,Cosmetics Shop,Steakhouse
2,M1V,Scarborough,Agincourt North,43.815252,-79.284577,1,Intersection,Park,Playground,Doner Restaurant,Department Store,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run
3,M1V,Scarborough,L'Amoreaux East,43.815252,-79.284577,1,Intersection,Park,Playground,Doner Restaurant,Department Store,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run
4,M1V,Scarborough,Milliken,43.815252,-79.284577,1,Intersection,Park,Playground,Doner Restaurant,Department Store,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run


<h3>Map the Formed Clusters in Toronto</h3>

In [37]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h3>CLUSTER 1</h3>

In [38]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
115,Scarborough,0,Bar,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Farmers Market
116,Scarborough,0,Bar,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Farmers Market
117,Scarborough,0,Bar,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Farmers Market


<h3>CLUSTER 2</h3>

In [39]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,1,Coffee Shop,Café,Thai Restaurant,Restaurant,Gym,Deli / Bodega,Clothing Store,Pizza Place,Cosmetics Shop,Steakhouse
1,Downtown Toronto,1,Coffee Shop,Café,Thai Restaurant,Restaurant,Gym,Deli / Bodega,Clothing Store,Pizza Place,Cosmetics Shop,Steakhouse
2,Scarborough,1,Intersection,Park,Playground,Doner Restaurant,Department Store,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run
3,Scarborough,1,Intersection,Park,Playground,Doner Restaurant,Department Store,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run
4,Scarborough,1,Intersection,Park,Playground,Doner Restaurant,Department Store,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run
...,...,...,...,...,...,...,...,...,...,...,...,...
197,Etobicoke,1,Pizza Place,Chinese Restaurant,Coffee Shop,Playground,Sandwich Place,Discount Store,Intersection,Middle Eastern Restaurant,Yoga Studio,Dessert Shop
198,York,1,Convenience Store,Park,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
199,Scarborough,1,Coffee Shop,Indian Restaurant,Korean BBQ Restaurant,Ethiopian Restaurant,Escape Room,Event Space,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Department Store
200,East York,1,Park,Skating Rink,Athletics & Sports,Curling Ice,Intersection,Beer Store,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dessert Shop


<h3>CLUSTER 3</h3>

In [40]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
91,North York,2,Baseball Field,Fabric Shop,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
92,North York,2,Baseball Field,Fabric Shop,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
122,Etobicoke,2,Baseball Field,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Farmers Market
123,Etobicoke,2,Baseball Field,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Farmers Market
124,Etobicoke,2,Baseball Field,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Farmers Market
125,Etobicoke,2,Baseball Field,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Farmers Market
126,Etobicoke,2,Baseball Field,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Farmers Market
127,Etobicoke,2,Baseball Field,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Farmers Market
128,Etobicoke,2,Baseball Field,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Farmers Market
129,Etobicoke,2,Baseball Field,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Farmers Market


<h3>CLUSTER 4</h3>

In [41]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
63,Etobicoke,3,Bakery,Brewery,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
64,Etobicoke,3,Bakery,Brewery,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
65,Etobicoke,3,Bakery,Brewery,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
66,Etobicoke,3,Bakery,Brewery,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant


<h3>CLUSTER 5</h3>

In [42]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
161,Etobicoke,4,River,Yoga Studio,Doner Restaurant,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop,Deli / Bodega
162,Etobicoke,4,River,Yoga Studio,Doner Restaurant,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop,Deli / Bodega
163,Etobicoke,4,River,Yoga Studio,Doner Restaurant,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop,Deli / Bodega


<h2>Toronto Neighborhood Clusters Analysis</h2>

<ul><b>Cluster 1:</b> This cluster is in Scarborough and they share top 10 common venues such as Bar, Yoga Studio, Drug Store, and Discount Store.</ul>
<ul><b>Cluster 2:</b> The cluster is colored purple and is spread out everywhere in Toronto. Top common venues differ depending on the borough.</ul>
<ul><b>Cluster 3:</b> This is within North York and Etobicoke boroughs. The baseball fields in these boroughs have formed this cluster.</ul>
<ul><b>Cluster 4:</b> This is in Etobicoke borough and it shares top common venues such as Bakery, Brewery, and Yoga Studio.</ul>
<ul><b>Cluster 5:</b> This is also in Etobicoke borough. Unlike Cluster 4, it is near the river and they share common venues such as Doner Restaurants and Dessert Shops.</ul>