In [1]:
import numpy as np
import pandas as pd

import json
import requests

from geopy.geocoders import Nominatim

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium

<h2>Part 1: This part creates a dataframe of the boroughs and neighbourhoods in Toronto.</h2>

<h3>Scraping the Wikipedia page</h3>

In [2]:
url = 'http://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
table = pd.read_html(url)[0]

In [3]:
table.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,M1ANot assigned,M2ANot assigned,M3ANorth York(Parkwoods),M4ANorth York(Victoria Village),M5ADowntown Toronto(Regent Park / Harbourfront),M6ANorth York(Lawrence Manor / Lawrence Heights),M7AQueen's Park(Ontario Provincial Government),M8ANot assigned,M9AEtobicoke(Islington Avenue)
1,M1BScarborough(Malvern / Rouge),M2BNot assigned,M3BNorth York(Don Mills)North,M4BEast York(Parkview Hill / Woodbine Gardens),"M5BDowntown Toronto(Garden District, Ryerson)",M6BNorth York(Glencairn),M7BNot assigned,M8BNot assigned,M9BEtobicoke(West Deane Park / Princess Garden...
2,M1CScarborough(Rouge Hill / Port Union / Highl...,M2CNot assigned,M3CNorth York(Don Mills)South(Flemingdon Park),M4CEast York(Woodbine Heights),M5CDowntown Toronto(St. James Town),M6CYork(Humewood-Cedarvale),M7CNot assigned,M8CNot assigned,M9CEtobicoke(Eringate / Bloordale Gardens / Ol...
3,M1EScarborough(Guildwood / Morningside / West ...,M2ENot assigned,M3ENot assigned,M4EEast Toronto(The Beaches),M5EDowntown Toronto(Berczy Park),M6EYork(Caledonia-Fairbanks),M7ENot assigned,M8ENot assigned,M9ENot assigned
4,M1GScarborough(Woburn),M2GNot assigned,M3GNot assigned,M4GEast York(Leaside),M5GDowntown Toronto(Central Bay Street),M6GDowntown Toronto(Christie),M7GNot assigned,M8GNot assigned,M9GNot assigned


In [4]:
rows,columns = table.shape

In [5]:
toronto = pd.DataFrame(columns=['PostalCode','Borough','Neighborhood'])

for column in range(columns):
    for row in range(rows):
        
        # each field in the above table
        field = table[column][row]
        
        # ignoring fields which have "not assigned"
        if "Not assigned" in field:
            continue
        
        # locating the postal code, borough and neighbourhoods
        postal_code = field[:3]
        bracket = field.find('(')
        borough = field[3:bracket]
        neighborhood = field[bracket:].replace('(','').replace(')','').replace(' /',',')
        
        # appending to the dataframe
        toronto = toronto.append({'PostalCode':postal_code,
                                  'Borough':borough,
                                  'Neighborhood':neighborhood},
                                 ignore_index=True)

In [6]:
toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


<h3>Cleaning the dataset</h3>

In [7]:
# checking out what labels require cleaning
toronto['Borough'].values

array(['Scarborough', 'Scarborough', 'Scarborough', 'Scarborough',
       'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough',
       'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough',
       'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough',
       'Scarborough', 'North York', 'North York', 'North York',
       'North York', 'North York', 'North York', 'North York',
       'North York', 'North York', 'North York', 'North York',
       'North York', 'North York', 'North York', 'North York',
       'North York', 'North York', 'North York', 'East York', 'East York',
       'East Toronto', 'East York', 'East York', 'East YorkEast Toronto',
       'East Toronto', 'East Toronto', 'East Toronto', 'Central Toronto',
       'Central Toronto', 'Central Toronto', 'Central Toronto',
       'Central Toronto', 'Central Toronto', 'Downtown Toronto',
       'Downtown Toronto', 'Downtown Toronto', 'Downtown Toronto',
       'Downtown Toronto', 'Downtown Toronto', 'Downtown Tor

In [8]:
# replacing the labels of the boroughs
toronto.loc[toronto['Borough'] == 'East YorkEast Toronto','Borough'] = 'East York / East Toronto'
toronto.loc[toronto['Borough'] == 'MississaugaCanada Post Gateway Processing Centre','Borough'] = 'Mississauga'
toronto.loc[toronto['Borough'] == 'East TorontoBusiness reply mail Processing Centre969 Eastern','Borough'] = 'East Toronto'
toronto.loc[toronto['Borough'] == 'Downtown TorontoStn A PO Boxes25 The Esplanade','Borough'] = 'Downtown Toronto'

In [9]:
toronto

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [10]:
toronto.shape

(103, 3)

<h2>Part 2: This part reads in the latitudes and longitudes using the geospatial csv.</h2>

In [11]:
locations = pd.read_csv('Geospatial_Coordinates.csv')

In [12]:
# sort postal codes alphabetically
toronto.sort_values(by=['PostalCode'],inplace=True)

In [13]:
toronto['Latitude'] = locations['Latitude']
toronto['Longitude'] = locations['Longitude']

In [14]:
toronto

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


<h2>Part 3: Clustering</h2>

In [15]:
df = toronto.copy()

In [16]:
df.reset_index(drop=True,inplace=True)

In [17]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


<h3>I've decided to explore the cuisines available in the various neighbourhoods in all of Toronto. General feeling is that cuisines would track the ethnic demographics in the city (except for Downtown, which would generally be more worky and affluent). </h3>

<strong>Trying out with one neighbourhood - The Beaches</strong>

In [19]:
latitude = df.loc[0,'Latitude']
longitude = df.loc[0,'Longitude']
print(latitude,longitude)

section = 'food'
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&section={}&ll={},{}&v={}&radius{}&limit={}'.format(
    CLIENT_ID,CLIENT_SECRET,section,latitude,longitude,VERSION,radius,LIMIT)

43.806686299999996 -79.19435340000001


In [20]:
results = requests.get(url).json()

In [21]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['shortName']

In [22]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,"Lamanna's Bakery, Cafe & Fine Foods",Bakery,43.797971,-79.148432
1,Fratelli Village Pizzeria,Italian,43.784008,-79.169787
2,The Real McCoy Burgers And Pizza,Burgers,43.774081,-79.230496
3,Mona's Roti,Caribbean,43.791613,-79.251015
4,Paul Wong's Chinese Restaurant,Chinese,43.810856,-79.129157


In [23]:
nearby_venues['categories'].value_counts()

Pizza            11
Sandwiches        9
Fast Food         9
Restaurant        9
Breakfast         7
Chinese           6
Indian            6
Bakery            5
Burgers           4
Fried Chicken     4
Café              3
Greek             3
Caribbean         3
Diner             2
Fish & Chips      2
American          2
Mexican           2
Filipino          2
Sushi             1
African           1
Bistro            1
Sri Lankan        1
Afghan            1
Hakka             1
Japanese          1
Thai              1
Asian             1
Italian           1
Vietnamese        1
Name: categories, dtype: int64

<h3>Getting nearby food venues with all the neighbourhoods in Toronto</h3>

In [24]:
def getNearbyVenues(names, latitudes, longitudes):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        section = 'food'
        radius = 500
        url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&section={}&ll={},{}&v={}&radius{}&limit={}'.format(
            CLIENT_ID,CLIENT_SECRET,section,lat,lng,VERSION,radius,LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['shortName']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [25]:
names = df['Neighborhood']
latitudes = df['Latitude']
longitudes = df['Longitude']

toronto_venues = getNearbyVenues(names,latitudes,longitudes)

In [26]:
toronto_venues.shape

(9658, 7)

In [27]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,"Lamanna's Bakery, Cafe & Fine Foods",43.797971,-79.148432,Bakery
1,"Malvern, Rouge",43.806686,-79.194353,Fratelli Village Pizzeria,43.784008,-79.169787,Italian
2,"Malvern, Rouge",43.806686,-79.194353,The Real McCoy Burgers And Pizza,43.774081,-79.230496,Burgers
3,"Malvern, Rouge",43.806686,-79.194353,Mona's Roti,43.791613,-79.251015,Caribbean
4,"Malvern, Rouge",43.806686,-79.194353,Paul Wong's Chinese Restaurant,43.810856,-79.129157,Chinese


In [28]:
len(toronto_venues['Venue Category'].unique())

121

In [29]:
features = toronto_venues[['Neighborhood','Venue Category']]

In [30]:
onehot = pd.get_dummies(data=features,columns=['Venue Category'],drop_first=True,prefix='',prefix_sep='')

In [31]:
onehot.head()

Unnamed: 0,Neighborhood,African,American,Arepas,Argentinian,Asian,BBQ,Bagels,Bakery,Belgian,...,Tex-Mex,Thai,Theme Restaurant,Tibetan,Turkish,Udon,Vegetarian / Vegan,Vietnamese,Wings,Xinjiang
0,"Malvern, Rouge",0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
onehot.shape

(9658, 121)

In [33]:
grouped = onehot.groupby('Neighborhood').mean().reset_index()

In [34]:
grouped.shape

(103, 121)

In [35]:
grouped

Unnamed: 0,Neighborhood,African,American,Arepas,Argentinian,Asian,BBQ,Bagels,Bakery,Belgian,...,Tex-Mex,Thai,Theme Restaurant,Tibetan,Turkish,Udon,Vegetarian / Vegan,Vietnamese,Wings,Xinjiang
0,Agincourt,0.0,0.03,0.0,0.0,0.010000,0.010000,0.000000,0.070000,0.0,...,0.0,0.010000,0.0,0.0,0.000000,0.00,0.0,0.020000,0.02,0.00
1,"Alderwood, Long Branch",0.0,0.02,0.0,0.0,0.010000,0.000000,0.000000,0.050000,0.0,...,0.0,0.020000,0.0,0.0,0.000000,0.00,0.0,0.030000,0.02,0.00
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.01,0.0,0.0,0.010000,0.000000,0.020000,0.030000,0.0,...,0.0,0.030000,0.0,0.0,0.000000,0.00,0.0,0.010000,0.00,0.00
3,Bayview Village,0.0,0.00,0.0,0.0,0.010204,0.010204,0.010204,0.020408,0.0,...,0.0,0.020408,0.0,0.0,0.000000,0.00,0.0,0.030612,0.00,0.00
4,"Bedford Park, Lawrence Manor East",0.0,0.03,0.0,0.0,0.030000,0.010000,0.040000,0.080000,0.0,...,0.0,0.030000,0.0,0.0,0.010000,0.00,0.0,0.000000,0.01,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,WillowdaleWest,0.0,0.00,0.0,0.0,0.010000,0.000000,0.020000,0.000000,0.0,...,0.0,0.010000,0.0,0.0,0.000000,0.01,0.0,0.020000,0.02,0.00
99,Woburn,0.0,0.02,0.0,0.0,0.010000,0.000000,0.000000,0.050000,0.0,...,0.0,0.010000,0.0,0.0,0.000000,0.00,0.0,0.010000,0.02,0.01
100,Woodbine Heights,0.0,0.02,0.0,0.0,0.030000,0.030000,0.000000,0.060000,0.0,...,0.0,0.030000,0.0,0.0,0.030000,0.00,0.0,0.000000,0.00,0.00
101,York Mills West,0.0,0.00,0.0,0.0,0.027397,0.013699,0.000000,0.054795,0.0,...,0.0,0.041096,0.0,0.0,0.013699,0.00,0.0,0.000000,0.00,0.00


In [43]:
columns = ['Neighborhood'] + [x for x in range(1,11)]
all_rankings = pd.DataFrame(columns=columns)

for hood in grouped['Neighborhood']:
    top = {}
    rankings = grouped[grouped['Neighborhood'] == hood]
    rankings.set_index('Neighborhood',inplace=True)
    top_10 = rankings.sort_values(by=hood,axis=1,ascending=False).iloc[0][:10].index.to_list()
    count = 1
    for x in top_10:
        top[count] = x
        count += 1
    top['Neighborhood'] = hood
    all_rankings = all_rankings.append(top,ignore_index=True)

<h3>Ranking by most popular foods</h3>

In [44]:
all_rankings.head()

Unnamed: 0,Neighborhood,1,2,3,4,5,6,7,8,9,10
0,Agincourt,Chinese,Restaurant,Sandwiches,Bakery,Indian,Caribbean,Fast Food,American,Breakfast,Pizza
1,"Alderwood, Long Branch",Restaurant,Sandwiches,Burgers,Fast Food,Breakfast,Pizza,Bakery,Italian,Middle Eastern,Burritos
2,"Bathurst Manor, Wilson Heights, Downsview North",Sandwiches,Restaurant,Pizza,Fast Food,Middle Eastern,Italian,Café,Korean,Sushi,Japanese
3,Bayview Village,Korean,Sandwiches,Pizza,Chinese,Café,Restaurant,Sushi,Fried Chicken,Ramen,Vietnamese
4,"Bedford Park, Lawrence Manor East",Italian,Sandwiches,Bakery,Restaurant,Sushi,Café,Fast Food,Pizza,Bagels,Asian


In [45]:
kclusters = 6

grouped_clustering = grouped.drop('Neighborhood',1)

kmeans = KMeans(n_clusters=kclusters,random_state=0).fit(grouped_clustering)

kmeans.labels_[0:10]

array([4, 1, 1, 2, 5, 0, 1, 0, 3, 5], dtype=int32)

In [46]:
all_rankings.insert(0, 'Cluster Labels', kmeans.labels_)
merged = df
merged = merged.join(all_rankings.set_index('Neighborhood'),on='Neighborhood')
merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1,2,3,4,5,6,7,8,9,10
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,1,Pizza,Fast Food,Restaurant,Sandwiches,Breakfast,Chinese,Indian,Bakery,Fried Chicken,Burgers
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,1,Fast Food,Pizza,Sandwiches,Restaurant,Chinese,Fried Chicken,Breakfast,Burgers,Bakery,Mexican
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1,Sandwiches,Fast Food,Pizza,Restaurant,Fried Chicken,Chinese,Indian,Bakery,Breakfast,Burgers
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1,Restaurant,Pizza,Fast Food,Sandwiches,Indian,Bakery,Fried Chicken,Chinese,Breakfast,Greek
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,4,Chinese,Fast Food,Sandwiches,Restaurant,Bakery,Pizza,Breakfast,Indian,Caribbean,Wings


<h3>Mapping the clusters</h3>

In [47]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighborhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h3>Checking out the various clusters</h3>

<strong>General observations</strong>: <br><br>
    There are six clusters, which can be roughly described as follows:<br>
    <ul>
    <li>Downtown Toronto (Label 0) - full of cafes and restaurants. Italian and Japanese cuisine are quite popular as well.
    <li>Downtown / East / West Toronto but outside the core-Downtown area (Label 3) - full of cafes and restaurants as well but with more variety in cuisine - there are bakeries and pizzerias and Indian food.
    <li>North and East Toronto (Label 1) - sandwichy, fast-foody, quick and dirty. Good selection of Asian food as well - might be because they are more suburban / cheaper.
    <li>Korean cluster (Label 2) - small area in North Toronto.
    <li>Chinese cluster (Label 4) - tracks the Chinese population in Toronto.
    <li>Italian cluster (Label 5) - tracks the Italian popularion in Toronto.
        </ul>

<p><strong>Cluster Label 0</strong>: Mostly Downtown Toronto, where cafes are very popular. There are two outliers - York Mills and Leaside, which are both affluent neighbourhoods and hence may explain its closeness in profile to Downtown Toronto.</p>

In [72]:
merged[merged['Cluster Labels'] == 0][1].value_counts()

Café          10
Restaurant     4
Italian        3
Sandwiches     1
Japanese       1
Name: 1, dtype: int64

In [73]:
merged[merged['Cluster Labels'] == 0][2].value_counts()

Restaurant    8
Italian       4
Japanese      3
Café          3
Sushi         1
Name: 2, dtype: int64

In [54]:
merged[merged['Cluster Labels'] == 0]['Borough'].value_counts()

Downtown Toronto    13
West Toronto         2
Central Toronto      1
East York            1
North York           1
Queen's Park         1
Name: Borough, dtype: int64

<p><strong>Cluster Label 1</strong>: Mostly North York and Scarborough (North and East Toronto). There are many sandwich shops.</p>

In [78]:
merged[merged['Cluster Labels'] == 1][1].value_counts()

Sandwiches        19
Pizza              8
Restaurant         4
Fast Food          4
Middle Eastern     1
Vietnamese         1
Name: 1, dtype: int64

In [79]:
merged[merged['Cluster Labels'] == 1][2].value_counts()

Sandwiches    10
Pizza          9
Restaurant     6
Fast Food      5
Chinese        3
Italian        2
Vietnamese     1
American       1
Name: 2, dtype: int64

In [58]:
merged[merged['Cluster Labels'] == 1]['Borough'].value_counts()

North York            15
Scarborough           12
Etobicoke              6
York                   1
East York              1
Mississauga            1
EtobicokeNorthwest     1
Name: Borough, dtype: int64

<p><strong>Cluster Label 2</strong>: Small cluster of Korean food places. Interesting given that Koreatown is actually in Downtown Toronto, but there is apparently a strong niche Korean cuisine in North York, indicating that it may be a Korean residential area.</p>

In [63]:
merged[merged['Cluster Labels'] == 2]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1,2,3,4,5,6,7,8,9,10
19,M2K,North York,Bayview Village,43.786947,-79.385975,2,Korean,Sandwiches,Pizza,Chinese,Café,Restaurant,Sushi,Fried Chicken,Ramen,Vietnamese
21,M2M,North York,"Willowdale, Newtonbrook",43.789053,-79.408493,2,Korean,Pizza,Sushi,Café,Middle Eastern,Japanese,Ramen,Sandwiches,Restaurant,Fast Food
22,M2N,North York,WillowdaleSouth,43.77012,-79.408493,2,Korean,Japanese,Café,Ramen,Pizza,Sushi,Burgers,Poutine Place,Diner,Seafood
24,M2R,North York,WillowdaleWest,43.782736,-79.442259,2,Korean,Middle Eastern,Sandwiches,Café,Pizza,Restaurant,Fast Food,Sushi,Ramen,Japanese


In [61]:
merged[merged['Cluster Labels'] == 2][1].value_counts()

Korean    4
Name: 1, dtype: int64

In [62]:
merged[merged['Cluster Labels'] == 2]['Borough'].value_counts()

North York    4
Name: Borough, dtype: int64

<p><strong>Cluster Label 3</strong>: Downtown, East and West Toronto but similar to Downtown Toronto in that there are many cafes.</p>

In [74]:
merged[merged['Cluster Labels'] == 3][1].value_counts()

Café     17
Greek     1
Pizza     1
Name: 1, dtype: int64

In [76]:
merged[merged['Cluster Labels'] == 3][2].value_counts()

Bakery                6
Pizza                 3
Italian               3
Indian                2
Restaurant            1
Sandwiches            1
Vegetarian / Vegan    1
Greek                 1
Café                  1
Name: 2, dtype: int64

In [77]:
merged[merged['Cluster Labels'] == 3][3].value_counts()

Bakery        5
Restaurant    3
Italian       2
Sandwiches    2
Pizza         1
Sushi         1
Mexican       1
Vietnamese    1
American      1
Korean        1
Café          1
Name: 3, dtype: int64

In [65]:
merged[merged['Cluster Labels'] == 3]['Borough'].value_counts()

Downtown Toronto            5
East Toronto                5
West Toronto                4
East York                   2
York                        1
Etobicoke                   1
East York / East Toronto    1
Name: Borough, dtype: int64

<p><strong>Cluster Label 4</strong>: Chinese food around Scarborough / North York. This mostly tracks the strong Chinese population in Scarborough / Agincourt.</p>

In [66]:
merged[merged['Cluster Labels'] == 4][1].value_counts()

Chinese    7
Name: 1, dtype: int64

In [68]:
merged[merged['Cluster Labels'] == 4]['Borough'].value_counts()

Scarborough    5
North York     2
Name: Borough, dtype: int64

<p><strong>Cluster Label 5</strong>: Italian cuisine mostly in central and west Toronto. This tracks Little Italy, which is in west-central Toronto, and also reflects the strong Italian popultion in West Toronto (esp. since there are many Italians living in the neighbouring city of Mississauga.</p>

In [67]:
merged[merged['Cluster Labels'] == 5][1].value_counts()

Italian    14
Sushi       3
Name: 1, dtype: int64

In [69]:
merged[merged['Cluster Labels'] == 5]['Borough'].value_counts()

Central Toronto    8
Etobicoke          4
York               3
North York         2
Name: Borough, dtype: int64

<h1>Thanks for watching! :)</h1>