<h1>Segmenting and Clustering Neighborhoods in Toronto</h1>
<h2 style="margin-top:10px">Applied Data Science Capstone Project (Week 3)</h2>

<h3>Part 1</h3>

In [3]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import requests

In [4]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
r = requests.get(url)
soup = BeautifulSoup(r.text)

In [5]:
table_contents = []
table = soup.find('table')

for row in table.findAll('td'):
    my_cell = {}
    if row.span.text=='Not assigned':    #Ignore cells with a borough that is "Not assigned"
        pass
    else:
        my_cell['PostalCode'] = row.p.text[:3]
        my_cell['Borough'] = (row.span.text).split('(')[0]
        #If a cell has a borough but a "Not assigned" neighborhood, then the neighborhood will be the same as the borough:
        my_cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')     
        table_contents.append(my_cell)

df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


In [6]:
#Prints the number of rows of df.
df.shape[0]

103

<h3>Part 2</h3>

In [7]:
!pip install geocoder
import geocoder

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 9.8 MB/s  eta 0:00:01
Collecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [None]:
coords = []
for postal_code in df['PostalCode']:
    print("Searching coordinates for " + postal_code + "...")
    # initialize your variable to None
    lat_lng_coords = None
    attempts = 0

    # loop until you get the coordinates
    while(lat_lng_coords is None):
      g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
      lat_lng_coords = g.latlng
    attempts += 1

    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    coords.append(lat_lng_coords)
    print("Done (" + str(attempts) + " attempts).\n")

In [9]:
# Since geocoder library did not work, I used geopy in order to convert an address into latitude and longitude values.
from geopy.geocoders import Nominatim

In [None]:
# Unfortunately, some of the results were repeatedly returned as None, so it did not work as well.

latitudes = []
longitudes = []

for postal_code in df['PostalCode']:
    address = '{}, Toronto, Ontario'.format(postal_code)
    location = geolocator.geocode(address)
    
    while (location is None):
        location = geolocator.geocode(address)
    
    latitude = location.latitude
    print(latitude)
    longitude = location.longitude
    latitudes.append(latitude)
    longitudes.append(longitude)
    
print(latitudes)
print(longitudes)

In [12]:
# Hence, I had to import GeoSpatial Dataset containing geographical coordinates of each postal code (I named it "coordinates").
# The code which imported the dataset is hidden as it contained sensitive data.
coordinates = coordinates.rename(columns={"Postal Code":"PostalCode"})
coordinates.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [13]:
df_merged = df.merge(coordinates, on="PostalCode") #merges coordinates dataframe with df (containing boroughs and neighborhoods).
df_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


<h3>Part 3</h3>

In [14]:
df_toronto = df_merged[df_merged["Borough"].str.contains("Toronto")].reset_index(drop=True) # Extracts boroughs that contain the word "Toronto" (as instructed)
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [15]:
df_toronto.shape

(39, 5)

In [17]:
#Let's explore the first neighbourhood in the dataframe. 
df_toronto.loc[0, 'Neighborhood']

'Regent Park, Harbourfront'

In [18]:
neighborhood_latitude = df_toronto.loc[0, 'Latitude']
neighborhood_longitude = df_toronto.loc[0, 'Longitude']
neighborhood_name = df_toronto.loc[0, 'Neighborhood']

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Regent Park, Harbourfront are 43.6542599, -79.3606359.


In [19]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, neighborhood_latitude, neighborhood_longitude, radius, LIMIT)

In [20]:
results = requests.get(url).json()

In [26]:
results

{'meta': {'code': 200, 'requestId': '60edeabba41a746a51aa53d4'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 44,
  'suggestedBounds': {'ne': {'lat': 43.6587599045, 'lng': -79.3544279001486},
   'sw': {'lat': 43.6497598955, 'lng': -79.36684389985142}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label': 'display',
 

In [33]:
def return_venue_category(row): #Returns a category of a venue in a given row
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [35]:
venues = results['response']['groups'][0]['items'] #Extracts the items from the results
nearby_venues = pd.json_normalize(venues) #Normalizes semi-structured JSON data into a flat table
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns] #Extracts four columns from the nearby_venues table
nearby_venues['venue.categories'] = nearby_venues.apply(return_venue_category, axis=1)
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns] #Deletes "venues." from the names of the columns
print('{} venues were returned.'.format(nearby_venues.shape[0]))

44 venues were returned.


In [36]:
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,Body Blitz Spa East,Spa,43.654735,-79.359874
4,Impact Kitchen,Restaurant,43.656369,-79.35698


In [37]:
#Let's repeat this process for every neighborhood in df_toronto dataframe.
def getNearbyVenues(names, latitudes, longitudes, radius=500): 
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)
        temp_results = requests.get(url).json()["response"]['groups'][0]['items']
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in temp_results])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [38]:
toronto_venues = getNearbyVenues(names=df_toronto['Neighborhood'],
                                   latitudes=df_toronto['Latitude'],
                                   longitudes=df_toronto['Longitude'])

In [39]:
print(toronto_venues.shape)
toronto_venues.head()

(1599, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


In [40]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,57,57,57,57,57,57
"Brockton, Parkdale Village, Exhibition Place",22,22,22,22,22,22
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",18,18,18,18,18,18
Central Bay Street,68,68,68,68,68,68
Christie,16,16,16,16,16,16
Church and Wellesley,79,79,79,79,79,79
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,33,33,33,33,33,33
Davisville North,12,12,12,12,12,12
"Dufferin, Dovercourt Village",14,14,14,14,14,14


In [41]:
print('There are {} unique venue categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 230 unique venue categories.


In [94]:
#Now, using one hot encoding we are going to create 230 new columns, one for each venue category.
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot["NeighborhoodName"] = toronto_venues["Neighborhood"]
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]
toronto_onehot.head()

Unnamed: 0,NeighborhoodName,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [95]:
toronto_onehot.shape

(1599, 231)

In [97]:
toronto_grouped = toronto_onehot.groupby('NeighborhoodName').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,NeighborhoodName,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"CN Tower, King and Spadina, Railway Lands, Har...",0.055556,0.055556,0.055556,0.111111,0.166667,0.111111,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.0,0.014706
4,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [100]:
#Let's list five first neighborhoods and top 5 of their most common venues
number_of_top_venues = 5

for hood in toronto_grouped['NeighborhoodName'].head():
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['NeighborhoodName'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(number_of_top_venues))
    print('\n')

----Berczy Park----
                venue  freq
0         Coffee Shop  0.05
1        Cocktail Bar  0.05
2              Bakery  0.05
3  Seafood Restaurant  0.04
4            Beer Bar  0.04


----Brockton, Parkdale Village, Exhibition Place----
                venue  freq
0                Café  0.14
1      Breakfast Spot  0.09
2         Coffee Shop  0.09
3  Italian Restaurant  0.05
4        Climbing Gym  0.05


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
              venue  freq
0   Airport Service  0.17
1    Airport Lounge  0.11
2  Airport Terminal  0.11
3           Airport  0.06
4          Boutique  0.06


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.16
1      Sandwich Place  0.06
2  Italian Restaurant  0.06
3                Café  0.04
4        Burger Joint  0.03


----Christie----
                venue  freq
0       Grocery Store  0.25
1                Café  0.19
2         Coffee

In [102]:
def most_common_venues(row, number_of_top_venues): #Returns most common categories of venues in a given neighborhood
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:number_of_top_venues]

In [105]:
number_of_top_venues = 10
st_or_nd = ['st', 'nd']

columns_names = ["Neighborhood"]

# Creates a given number of columns 
for num in (np.arange(number_of_top_venues) + 1):
    if num < 3:
        columns_names.append("{}{} Most Common Venue".format(num, st_or_nd[num-1]))
    else:
        columns_names.append("{}rd Most Common Venue".format(num))

columns_names

['Neighborhood',
 '1st Most Common Venue',
 '2nd Most Common Venue',
 '3rd Most Common Venue',
 '4rd Most Common Venue',
 '5rd Most Common Venue',
 '6rd Most Common Venue',
 '7rd Most Common Venue',
 '8rd Most Common Venue',
 '9rd Most Common Venue',
 '10rd Most Common Venue']

In [107]:
venues_sorted = pd.DataFrame(columns=columns_names)
venues_sorted["Neighborhood"] = toronto_grouped["NeighborhoodName"]

for ind in np.arange(toronto_grouped.shape[0]):
    venues_sorted.iloc[ind, 1:] = most_common_venues(toronto_grouped.iloc[ind, :], number_of_top_venues)

venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4rd Most Common Venue,5rd Most Common Venue,6rd Most Common Venue,7rd Most Common Venue,8rd Most Common Venue,9rd Most Common Venue,10rd Most Common Venue
0,Berczy Park,Cocktail Bar,Bakery,Coffee Shop,Pharmacy,Cheese Shop,Pub,Restaurant,Farmers Market,Beer Bar,Seafood Restaurant
1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Pet Store,Stadium,Burrito Place,Restaurant,Climbing Gym,Grocery Store,Bakery
2,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Sculpture Garden,Airport Food Court,Airport Gate,Bar,Boat or Ferry,Boutique
3,Central Bay Street,Coffee Shop,Sandwich Place,Italian Restaurant,Café,Sculpture Garden,Salad Place,Bubble Tea Shop,Japanese Restaurant,Burger Joint,Yoga Studio
4,Christie,Grocery Store,Café,Coffee Shop,Park,Nightclub,Candy Store,Italian Restaurant,Baby Store,Restaurant,Eastern European Restaurant


In [128]:
#Let's cluster the neighborhoods, using the k-means algorithm.
from sklearn.cluster import KMeans
kclusters = 3
toronto_grouped_clustering = toronto_grouped.drop('NeighborhoodName', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)
kmeans.labels_

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0], dtype=int32)

In [None]:
#Let's insert the clustering labels to the venues_sorted table:
venues_sorted.insert(0, "Cluster labels", kmeans.labels_)

In [115]:
toronto_merged = df_toronto
toronto_merged = toronto_merged.join(venues_sorted.set_index("Neighborhood"), on="Neighborhood")
toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4rd Most Common Venue,5rd Most Common Venue,6rd Most Common Venue,7rd Most Common Venue,8rd Most Common Venue,9rd Most Common Venue,10rd Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Café,Theater,Yoga Studio,Dessert Shop,Shoe Store
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Clothing Store,Coffee Shop,Café,Cosmetics Shop,Italian Restaurant,Japanese Restaurant,Middle Eastern Restaurant,Bubble Tea Shop,Bookstore,Ramen Restaurant
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Café,Restaurant,Beer Bar,Cocktail Bar,Cosmetics Shop,Lingerie Store,Clothing Store,Park,Bakery
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,2,Health Food Store,Neighborhood,Trail,Pub,Yoga Studio,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Electronics Store
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Cocktail Bar,Bakery,Coffee Shop,Pharmacy,Cheese Shop,Pub,Restaurant,Farmers Market,Beer Bar,Seafood Restaurant


In [120]:
#Let's visualize our data on a map, using a folium library:
!pip install folium
import folium

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
Collecting folium
  Downloading folium-0.12.1-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 5.3 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.1


In [124]:
import matplotlib.cm as cm
import matplotlib.colors as colors

latitude = toronto_merged["Latitude"].mean()
longitude = toronto_merged["Longitude"].mean()
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h3>Cluster analysis</h3>

In [125]:
# Now, let's examine each cluster.

In [132]:
toronto_merged.loc[toronto_merged['Cluster labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4rd Most Common Venue,5rd Most Common Venue,6rd Most Common Venue,7rd Most Common Venue,8rd Most Common Venue,9rd Most Common Venue,10rd Most Common Venue
0,Downtown Toronto,0,Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Café,Theater,Yoga Studio,Dessert Shop,Shoe Store
1,Downtown Toronto,0,Clothing Store,Coffee Shop,Café,Cosmetics Shop,Italian Restaurant,Japanese Restaurant,Middle Eastern Restaurant,Bubble Tea Shop,Bookstore,Ramen Restaurant
2,Downtown Toronto,0,Coffee Shop,Café,Restaurant,Beer Bar,Cocktail Bar,Cosmetics Shop,Lingerie Store,Clothing Store,Park,Bakery
4,Downtown Toronto,0,Cocktail Bar,Bakery,Coffee Shop,Pharmacy,Cheese Shop,Pub,Restaurant,Farmers Market,Beer Bar,Seafood Restaurant
5,Downtown Toronto,0,Coffee Shop,Sandwich Place,Italian Restaurant,Café,Sculpture Garden,Salad Place,Bubble Tea Shop,Japanese Restaurant,Burger Joint,Yoga Studio
6,Downtown Toronto,0,Grocery Store,Café,Coffee Shop,Park,Nightclub,Candy Store,Italian Restaurant,Baby Store,Restaurant,Eastern European Restaurant
7,Downtown Toronto,0,Coffee Shop,Café,Thai Restaurant,Restaurant,Clothing Store,Hotel,Gym,Deli / Bodega,Pizza Place,Sushi Restaurant
8,West Toronto,0,Bakery,Pharmacy,Pet Store,Middle Eastern Restaurant,Music Venue,Park,Café,Brewery,Bar,Supermarket
10,Downtown Toronto,0,Coffee Shop,Aquarium,Café,Hotel,Restaurant,Brewery,Fried Chicken Joint,Scenic Lookout,Pizza Place,Plaza
11,West Toronto,0,Bar,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Café,Restaurant,Men's Store,Coffee Shop,Asian Restaurant,Yoga Studio,New American Restaurant


<p><b>As we can see, the first cluster is characterized primarily by the frequent presence of coffee shops and cafes.</b></p>

In [133]:
toronto_merged.loc[toronto_merged['Cluster labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4rd Most Common Venue,5rd Most Common Venue,6rd Most Common Venue,7rd Most Common Venue,8rd Most Common Venue,9rd Most Common Venue,10rd Most Common Venue
9,East York/East Toronto,1,Park,Convenience Store,Metro Station,Yoga Studio,Dog Run,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Escape Room
18,Central Toronto,1,Park,Bus Line,Swim School,Yoga Studio,Dog Run,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Escape Room
21,Central Toronto,1,Park,Jewelry Store,Trail,Sushi Restaurant,Distribution Center,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Escape Room
29,Central Toronto,1,Park,Trail,Summer Camp,Restaurant,Convenience Store,Distribution Center,Falafel Restaurant,Event Space,Ethiopian Restaurant,Escape Room
33,Downtown Toronto,1,Park,Playground,Trail,Yoga Studio,Discount Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store


<p><b>The second cluster, on the other hand, is characterized by a large number of parks, farmers markets, escape rooms, event spaces, and falafel and Ethiopian restaurants.</b></p>

In [134]:
toronto_merged.loc[toronto_merged['Cluster labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4rd Most Common Venue,5rd Most Common Venue,6rd Most Common Venue,7rd Most Common Venue,8rd Most Common Venue,9rd Most Common Venue,10rd Most Common Venue
3,East Toronto,2,Health Food Store,Neighborhood,Trail,Pub,Yoga Studio,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Electronics Store


<p><b>As can be seen, the third cluster is clearly different from the first and second clusters.</b></p>

<h3>Thank you for reviewing my project!</h3>