## IBM Data Science Capstone Project

### Topic: Find Optimal Locations for Opening Up a Bubble Tea Shop in Toronto

In [2]:
# import libaries
import numpy as np
import pandas as pd
import requests
!conda install -c conda-forge folium=0.5.0 --yes
import folium
from sklearn.cluster import KMeans
#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import matplotlib.cm as cm
import matplotlib.colors as colors

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2020.6.20          |   py36h9f0ad1d_0         151 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    openssl-1.1.1g             |       h516909a_1         2.1 MB  conda-forge
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ------------------------------------------------------------
                       

In [3]:
# scrap data from Wikipedia page into a DataFrame
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df = pd.read_html(url,na_values = ['Not assigned'])[0]
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [4]:
# find and drop cells of "Not Assigned" in Borough & Neighbourhood 
df.dropna(subset = ['Borough'],inplace = True)
n_empty = df[df['Neighbourhood'].isna()].shape[0]
if n_empty == 0:
    print('Good!')
else:
    print('Number of rows on which Neighborhood column is empty: {}'.format(n_empty_neighborhood))

Good!


In [5]:
# group neighborhoods in the same borough
trt_df = df.groupby(['Postal Code','Borough'])['Neighbourhood'].apply(','.join).reset_index()
trt_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [6]:
# load the coordinates from the csv file on Coursera
geo_df = pd.read_csv('http://cocl.us/Geospatial_data')
geo_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [7]:
# merge two datasets
final_df = trt_df.merge(geo_df, on = 'Postal Code',how = 'left')
final_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [8]:
# check the number of neighborhoods
final_df.groupby('Borough').count()['Neighbourhood']

Borough
Central Toronto      9
Downtown Toronto    19
East Toronto         5
East York            5
Etobicoke           12
Mississauga          1
North York          24
Scarborough         17
West Toronto         6
York                 5
Name: Neighbourhood, dtype: int64

In [9]:
# filter boroughs which name contains the word "Toronto"
toronto_df = final_df[final_df['Borough'].str.contains('East York|North York|Scarborough|Toronto')].reset_index()
toronto_df.drop('index', axis=1, inplace=True)
toronto_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [10]:
# check the number of neighborhoods of Toronto
toronto_df.groupby('Borough').count()['Neighbourhood']

Borough
Central Toronto      9
Downtown Toronto    19
East Toronto         5
East York            5
North York          24
Scarborough         17
West Toronto         6
Name: Neighbourhood, dtype: int64

In [11]:
# get geographical coordinates (latitude and longtitude data) of Toronto using Geopy library
address = 'Toronto'
geolocator = Nominatim(user_agent = 'trt_explorer')
loc = geolocator.geocode(address)
lat = loc.latitude
long = loc.longitude
print('The geographical coordinates of Toronto is {},{}.'.format(lat,long))

The geographical coordinates of Toronto is 43.6534817,-79.3839347.


In [12]:
# create map of Toronto using latitude and longitude values
map_trt = folium.Map(location=[lat, long], zoom_start=11)
# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighbourhood']):
    label = '{},{}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_trt)  
    
map_trt

In [13]:
# Use the Foursquare API to retrieve venues data
CLIENT_ID = 'RIQVLEX5VPTRO23IVBEJGWVBN03ORRNM1ARKUZLC4P2X3QPD' # Foursquare ID
CLIENT_SECRET = 'MQAMG03LA5YOUOC334R1GT2HEY0ZWOF3PWLVXUHB4OKCSLV0' # Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

In [14]:
def getVenues(df):
    venues = []
    for lat, lng, post, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Postal Code'], df['Borough'], df['Neighbourhood']):
        # create URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        lat, 
        lng, 
        radius, 
        LIMIT)

        results = requests.get(url).json()
        # clean the json and structure it into a pandas dataframe.
        venuesList = results["response"]['groups'][0]['items']
        for venue in venuesList:
            venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            lng, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))
    return venues   

In [15]:
venues = getVenues(toronto_df)
venues_df = pd.DataFrame(venues)
venues_df.columns = ['PostalCode', 'Borough', 'Neighbourhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
print(venues_df.shape)
venues_df.head(6)

(2046, 9)


Unnamed: 0,PostalCode,Borough,Neighbourhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,RBC Royal Bank,43.76679,-79.191151,Bank
3,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Sail Sushi,43.765951,-79.191275,Restaurant
5,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant


In [16]:
# check how many venues were returned for each PostalCode
venues_df.groupby(['PostalCode','Borough','Neighbourhood']).count().head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
PostalCode,Borough,Neighbourhood,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M1B,Scarborough,"Malvern, Rouge",1,1,1,1,1,1
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",1,1,1,1,1,1
M1E,Scarborough,"Guildwood, Morningside, West Hill",8,8,8,8,8,8
M1G,Scarborough,Woburn,4,4,4,4,4,4
M1H,Scarborough,Cedarbrae,8,8,8,8,8,8


In [17]:
# check how many unique categories can be curated from all the returned venues
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 258 uniques categories.


In [18]:
# print out the list of categories
venues_df['VenueCategory'].unique()[:100]

array(['Fast Food Restaurant', 'Bar', 'Bank', 'Electronics Store',
       'Restaurant', 'Mexican Restaurant', 'Rental Car Location',
       'Medical Center', 'Intersection', 'Breakfast Spot', 'Coffee Shop',
       'Korean Restaurant', 'Soccer Field', 'Caribbean Restaurant',
       'Hakka Restaurant', 'Thai Restaurant', 'Athletics & Sports',
       'Bakery', 'Gas Station', 'Fried Chicken Joint', 'Playground',
       'Pizza Place', 'Department Store', 'Convenience Store',
       'Discount Store', 'Chinese Restaurant', 'Hobby Shop',
       'Ice Cream Shop', 'Bus Line', 'Metro Station', 'Bus Station',
       'Park', 'Motel', 'American Restaurant', 'Café',
       'General Entertainment', 'Skating Rink', 'College Stadium',
       'Indian Restaurant', 'Pet Store', 'Vietnamese Restaurant',
       'Light Rail Station', 'Gaming Cafe', 'Sandwich Place',
       'Middle Eastern Restaurant', 'Auto Garage', 'Lounge',
       'Latin American Restaurant', 'Italian Restaurant', 'Noodle House',
       'Ph

In [19]:
# analyze each neighborhood

# one hot encoding
toronto_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column back to dataframe
toronto_onehot['PostalCode'] = venues_df['PostalCode'] 
toronto_onehot['Borough'] = venues_df['Borough'] 
toronto_onehot['Neighbourhood'] = venues_df['Neighbourhood'] 

# move postal, borough and neighborhood column to the first column
fixed_columns = list(toronto_onehot.columns[-3:]) + list(toronto_onehot.columns[:-3])
toronto_onehot = toronto_onehot[fixed_columns]


toronto_onehot.drop(['PostalCode','Borough'], axis = 1,inplace = True) 
print(toronto_onehot.shape)
toronto_onehot.head()

(2046, 259)


Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
trt_grouped = toronto_onehot.groupby(["Neighbourhood"]).mean().reset_index()
trt_grouped.shape
trt_grouped.head()

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
len(trt_grouped[trt_grouped["Bubble Tea Shop"] > 0])

6

In [22]:
len(trt_grouped[trt_grouped["Chinese Restaurant"] > 0])

14

In [23]:
to_cn = trt_grouped[["Neighbourhood","Chinese Restaurant"]]
to_cn.head()

Unnamed: 0,Neighbourhood,Chinese Restaurant
0,Agincourt,0.0
1,"Bathurst Manor, Wilson Heights, Downsview North",0.045455
2,Bayview Village,0.25
3,"Bedford Park, Lawrence Manor East",0.0
4,Berczy Park,0.0


In [71]:
# set number of clusters
kclusters = 3

to_clustering = to_cn.drop(["Neighbourhood"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(to_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 2, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [72]:
to_merged =  to_cn.copy()
# add clustering labels
to_merged["Cluster Labels"] = kmeans.labels_
to_merged.head()

Unnamed: 0,Neighbourhood,Chinese Restaurant,Cluster Labels
0,Agincourt,0.0,0
1,"Bathurst Manor, Wilson Heights, Downsview North",0.045455,2
2,Bayview Village,0.25,1
3,"Bedford Park, Lawrence Manor East",0.0,0
4,Berczy Park,0.0,0


In [73]:
to_merged = to_merged.merge(venues_df, on="Neighbourhood")
to_merged.sort_values(["Cluster Labels"], inplace=True)
to_merged.head()

Unnamed: 0,Neighbourhood,Chinese Restaurant,Cluster Labels,PostalCode,Borough,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Agincourt,0.0,0,M1S,Scarborough,43.7942,-79.262029,Panagio's Breakfast & Lunch,43.79237,-79.260203,Breakfast Spot
1317,"Richmond, Adelaide, King",0.0,0,M5H,Downtown Toronto,43.650571,-79.384568,Shangri-La Toronto,43.649129,-79.386557,Hotel
1316,"Richmond, Adelaide, King",0.0,0,M5H,Downtown Toronto,43.650571,-79.384568,Bosk at Shangri-La,43.649023,-79.385826,Asian Restaurant
1315,"Richmond, Adelaide, King",0.0,0,M5H,Downtown Toronto,43.650571,-79.384568,Four Seasons Centre for the Performing Arts,43.650592,-79.385806,Concert Hall
1314,"Richmond, Adelaide, King",0.0,0,M5H,Downtown Toronto,43.650571,-79.384568,The Keg Steakhouse + Bar - York Street,43.649987,-79.384103,Restaurant


In [74]:
# create map
map_clusters = folium.Map(location=[lat, long], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(to_merged['BoroughLatitude'], to_merged['BoroughLongitude'], to_merged['Neighbourhood'], to_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [57]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

In [87]:
# examine clusters

# Cluster 0
cluster0 = to_merged.loc[to_merged['Cluster Labels'] == 0]
cluster0
# cluster0[cluster0['VenueCategory'] == 'Chinese Restaurant']
# cluster0[cluster0['VenueCategory'] == 'Bubble Tea Shop']

Unnamed: 0,Neighbourhood,Chinese Restaurant,Cluster Labels,PostalCode,Borough,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Agincourt,0.0,0,M1S,Scarborough,43.794200,-79.262029,Panagio's Breakfast & Lunch,43.792370,-79.260203,Breakfast Spot
1317,"Richmond, Adelaide, King",0.0,0,M5H,Downtown Toronto,43.650571,-79.384568,Shangri-La Toronto,43.649129,-79.386557,Hotel
1316,"Richmond, Adelaide, King",0.0,0,M5H,Downtown Toronto,43.650571,-79.384568,Bosk at Shangri-La,43.649023,-79.385826,Asian Restaurant
1315,"Richmond, Adelaide, King",0.0,0,M5H,Downtown Toronto,43.650571,-79.384568,Four Seasons Centre for the Performing Arts,43.650592,-79.385806,Concert Hall
1314,"Richmond, Adelaide, King",0.0,0,M5H,Downtown Toronto,43.650571,-79.384568,The Keg Steakhouse + Bar - York Street,43.649987,-79.384103,Restaurant
1313,"Richmond, Adelaide, King",0.0,0,M5H,Downtown Toronto,43.650571,-79.384568,Nathan Phillips Square,43.652270,-79.383516,Plaza
1312,"Regent Park, Harbourfront",0.0,0,M5A,Downtown Toronto,43.654260,-79.360636,GW General,43.650495,-79.357538,Antique Shop
1311,"Regent Park, Harbourfront",0.0,0,M5A,Downtown Toronto,43.654260,-79.360636,Wine Rack,43.656573,-79.356928,Wine Shop
1310,"Regent Park, Harbourfront",0.0,0,M5A,Downtown Toronto,43.654260,-79.360636,The Healthy Road,43.656265,-79.357119,Health Food Store
1309,"Regent Park, Harbourfront",0.0,0,M5A,Downtown Toronto,43.654260,-79.360636,Savoury Grounds,43.656821,-79.358970,Coffee Shop


In [88]:
# Cluster 1
cluster1 = to_merged.loc[to_merged['Cluster Labels'] == 1]
cluster1
# cluster1[cluster1['VenueCategory'] == 'Chinese Restaurant']

Unnamed: 0,Neighbourhood,Chinese Restaurant,Cluster Labels,PostalCode,Borough,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
29,Bayview Village,0.25,1,M2K,North York,43.786947,-79.385975,Kaga Sushi,43.787758,-79.38109,Japanese Restaurant
999,"Kennedy Park, Ionview, East Birchmount Park",0.142857,1,M1K,Scarborough,43.727929,-79.262029,Tim Hortons,43.726895,-79.266157,Coffee Shop
524,"Dorset Park, Wexford Heights, Scarborough Town...",0.142857,1,M1P,Scarborough,43.75741,-79.273304,Omescape Scarborough,43.754158,-79.27623,Gaming Cafe
523,"Dorset Park, Wexford Heights, Scarborough Town...",0.142857,1,M1P,Scarborough,43.75741,-79.273304,Scarborough LRT,43.756465,-79.272194,Light Rail Station
522,"Dorset Park, Wexford Heights, Scarborough Town...",0.142857,1,M1P,Scarborough,43.75741,-79.273304,Pho Vietnam,43.75777,-79.278572,Vietnamese Restaurant
521,"Dorset Park, Wexford Heights, Scarborough Town...",0.142857,1,M1P,Scarborough,43.75741,-79.273304,Big Al's Pet Supercentre,43.759279,-79.278325,Pet Store
520,"Dorset Park, Wexford Heights, Scarborough Town...",0.142857,1,M1P,Scarborough,43.75741,-79.273304,Karaikudi Chettinad South Indian Restaurant,43.756042,-79.276276,Indian Restaurant
518,"Dorset Park, Wexford Heights, Scarborough Town...",0.142857,1,M1P,Scarborough,43.75741,-79.273304,Kairali,43.754915,-79.276945,Indian Restaurant
28,Bayview Village,0.25,1,M2K,North York,43.786947,-79.385975,Maxim's Cafe and Patisserie,43.787863,-79.380751,Café
1595,"Steeles West, L'Amoreaux West",0.166667,1,M1W,Scarborough,43.799525,-79.318389,KFC,43.798938,-79.318854,Fast Food Restaurant


In [89]:
# Cluster 2
cluster2 = to_merged.loc[to_merged['Cluster Labels'] == 2]
cluster2
# cluster2[cluster2['VenueCategory'] == 'Chinese Restaurant']

Unnamed: 0,Neighbourhood,Chinese Restaurant,Cluster Labels,PostalCode,Borough,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
1958,"University of Toronto, Harbord",0.028571,2,M5S,Downtown Toronto,43.662696,-79.400049,FLOCK Rotisserie + Greens,43.662637,-79.403798,Comfort Food Restaurant
1977,"University of Toronto, Harbord",0.028571,2,M5S,Downtown Toronto,43.662696,-79.400049,East of Brunswick,43.665609,-79.403324,Pub
1959,"University of Toronto, Harbord",0.028571,2,M5S,Downtown Toronto,43.662696,-79.400049,Harvest Kitchen,43.662714,-79.404330,Restaurant
1979,"University of Toronto, Harbord",0.028571,2,M5S,Downtown Toronto,43.662696,-79.400049,Elchi Chai Shop,43.662695,-79.404652,Coffee Shop
1960,"University of Toronto, Harbord",0.028571,2,M5S,Downtown Toronto,43.662696,-79.400049,Innis Cafe,43.665401,-79.399715,Café
1961,"University of Toronto, Harbord",0.028571,2,M5S,Downtown Toronto,43.662696,-79.400049,Coach House Printing,43.666320,-79.400277,Bookstore
1962,"University of Toronto, Harbord",0.028571,2,M5S,Downtown Toronto,43.662696,-79.400049,Hart House Theatre,43.663571,-79.394616,Theater
1978,"University of Toronto, Harbord",0.028571,2,M5S,Downtown Toronto,43.662696,-79.400049,The Beer Store,43.665385,-79.403477,Beer Store
1963,"University of Toronto, Harbord",0.028571,2,M5S,Downtown Toronto,43.662696,-79.400049,Gyubee,43.667088,-79.400571,Japanese Restaurant
1976,"University of Toronto, Harbord",0.028571,2,M5S,Downtown Toronto,43.662696,-79.400049,Comfort Zone,43.658397,-79.400274,Nightclub


## Observation

Chinese restaurants are mostly found in cluster 1 and least in cluster 0. While all bubble tea shops are found in cluster 1, areas such as Church and Wellesley, Central Bay Street, Garden District and Ryerson in Downtown Toronto. These neighbourhoods are outside of our recommendation in consideration of potential competitions. However, bubble tea shops and Chinese restaurants usually share customer base, and that the relationship between chinese food and bubble tea is more similar to complements. This project recommends locations where Chinese restaurant are nearby but competitions with other bubble tea shops are low. So neighborhoods with Chinese restaurants like Steeles West and L'Amoreaux West in the borough of Scarborough are highly favored.