In [2]:
import requests
import pandas as pd
import numpy as np
import folium
from bs4 import BeautifulSoup

In [3]:
url  = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = requests.get(url)
page.status_code

200

In [4]:
import lxml
soup = BeautifulSoup(page.content, "lxml")
table = soup.find('table', {"class":"wikitable sortable"})
trs = table.find_all('tr')

postcode = []
borough = []
neighbourhood = []

for tr in trs:
    tds = tr.find_all('td')
    if tds:
        postcode.append(tds[0].text.strip())
        borough.append(tds[1].text.strip())
        neighbourhood.append(tds[2].text.strip())
        
df_html = pd.DataFrame(data=[postcode, borough, neighbourhood]).transpose()
df_html .columns=['PostalCode', 'Borough', 'Neighborhood']
df_html.loc[df_html['Borough'] == "Not assigned", 'Borough'] = np.nan
df_html.loc[df_html['Neighborhood'] == "Not assigned", 'Neighborhood'] = np.nan
df_html.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [5]:
df_html.dropna(subset=['Borough'], inplace=True)

In [6]:
not_empty_neighborhood = df_html[df_html['Neighborhood'].isna()].shape[0]
print('Count of Neighborhood column is empty: {}'.format(not_empty_neighborhood))

Count of Neighborhood column is empty: 1


In [7]:
df_html[df_html['Neighborhood'].isna()]

Unnamed: 0,PostalCode,Borough,Neighborhood
9,M9A,Queen's Park,


In [8]:
df_html['Neighborhood'].fillna(df_html['Borough'], inplace=True)
not_empty_neighborhood = df_html[df_html['Neighborhood'].isna()].shape[0]
print('Count of Neighborhood column is empty: {}'.format(not_empty_neighborhood))

Count of Neighborhood column is empty: 0


In [9]:
df_html[df_html['Borough']=="Queen's Park"]

Unnamed: 0,PostalCode,Borough,Neighborhood
9,M9A,Queen's Park,Queen's Park


In [10]:
df_postcodes =pd.DataFrame( df_html.groupby(['PostalCode','Borough']).Neighborhood.agg([('Neighborhood', ', '.join)]))
df_postcodes.reset_index(inplace=True)
df_postcodes.head(5)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [11]:
df_html.dropna(subset=['Borough'], inplace=True)

In [12]:
not_empty_neighborhood = df_html[df_html['Neighborhood'].isna()].shape[0]
print('Count of Neighborhood column is empty: {}'.format(not_empty_neighborhood))

Count of Neighborhood column is empty: 0


In [13]:
print('The shape is:',df_postcodes.shape)

The shape is: (103, 3)


In [14]:
df_postcodes.to_csv('Toronto_Postcodes.csv')

In [15]:
url_csv = 'http://cocl.us/Geospatial_data'
df_coordinates = pd.read_csv(url_csv)
df_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [16]:
df_neighborhoods = pd.read_csv('Toronto_Postcodes.csv',index_col=[0])
df_neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [17]:
df_coordinates.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)
df_neighborhoods.rename(columns={'PostalCode': 'PostalCode'}, inplace=True)

In [18]:
df_neighborhoods_coordinates = pd.merge(df_neighborhoods, df_coordinates, on='PostalCode')
df_neighborhoods_coordinates.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [19]:
df_neighborhoods_coordinates[(df_neighborhoods_coordinates['PostalCode']=='M5G') |
                             (df_neighborhoods_coordinates['PostalCode']=='M2H') ]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
17,M2H,North York,Hillcrest Village,43.803762,-79.363452
57,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383


In [20]:
df_neighborhoods_coordinates.to_csv('Toronto_Postcodes_2.csv')

In [21]:
df = pd.read_csv('Toronto_Postcodes_2.csv', index_col=0)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [22]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


In [23]:
df.rename(columns={'Neighborhood': 'Neighborhood'}, inplace=True)
df.groupby('Borough').count()['Neighborhood']

Borough
Central Toronto      9
Downtown Toronto    19
East Toronto         5
East York            5
Etobicoke           11
Mississauga          1
North York          24
Queen's Park         1
Scarborough         17
West Toronto         6
York                 5
Name: Neighborhood, dtype: int64

In [24]:
df_toronto = df[df['Borough'].str.contains('Toronto')]
df_toronto.reset_index(inplace=True)
df_toronto.drop('index', axis=1, inplace=True)
df_toronto.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [25]:
print(df_toronto.groupby('Borough').count()['Neighborhood'])

Borough
Central Toronto      9
Downtown Toronto    19
East Toronto         5
West Toronto         6
Name: Neighborhood, dtype: int64


In [26]:
boroughs = df_toronto['Borough'].unique().tolist()
lat_toronto = df_toronto['Latitude'].mean()
lon_toronto = df_toronto['Longitude'].mean()
print('The geographical coordinates of Toronto are {}, {}'.format(lat_toronto, lon_toronto))

The geographical coordinates of Toronto are 43.66713498717948, -79.38987324871795


In [27]:
borough_color = {}
for borough in boroughs:
    borough_color[borough]= '#%02X%02X%02X' % tuple(np.random.choice(range(256), size=3))
    
map_toronto = folium.Map(location=[lat_toronto, lon_toronto], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], 
                                           df_toronto['Longitude'],
                                           df_toronto['Borough'], 
                                           df_toronto['Neighborhood']):
    label_text = borough + ' - ' + neighborhood
    label = folium.Popup(label_text)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=borough_color[borough],
        fill_color=borough_color[borough],
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

In [28]:
# config of fourquare api
CLIENT_ID = '4JRUE1NXUGCLJKXV2DKGKZAVDGTYAOIBUW5C44JJTIAPCWYN'
CLIENT_SECRET = 'RQE52FLJ1KEZYZJMZS1EOY2Z0ZMLWAWUGETDKQML2T3H1YZB'
VERSION = '20190425'
LIMIT = 100
radius = 500

In [29]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
   
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [30]:
toronto_venues = getNearbyVenues(names=df_toronto['Neighborhood'],
                                latitudes=df_toronto['Latitude'],
                                longitudes=df_toronto['Longitude'])

The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The Junction Sout

In [31]:
toronto_venues.shape

(1709, 7)

In [32]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,55,55,55,55,55,55
"Brockton, Exhibition Place, Parkdale Village",22,22,22,22,22,22
Business Reply Mail Processing Centre 969 Eastern,15,15,15,15,15,15
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",15,15,15,15,15,15
"Cabbagetown, St. James Town",45,45,45,45,45,45
Central Bay Street,84,84,84,84,84,84
"Chinatown, Grange Park, Kensington Market",90,90,90,90,90,90
Christie,17,17,17,17,17,17
Church and Wellesley,84,84,84,84,84,84


In [33]:
toronto_venues['Venue Category'].unique()[:100]

array(['Trail', 'Health Food Store', 'Pub', 'Other Great Outdoors',
       'Neighborhood', 'Greek Restaurant', 'Cosmetics Shop',
       'Italian Restaurant', 'Ice Cream Shop', 'Brewery', 'Yoga Studio',
       'Fruit & Vegetable Store', 'Pizza Place', 'Restaurant',
       'Dessert Shop', 'Bubble Tea Shop', 'Bookstore', 'Spa', 'Juice Bar',
       'Furniture / Home Store', 'Diner', 'Grocery Store', 'Coffee Shop',
       'Caribbean Restaurant', 'Indian Restaurant', 'Bakery',
       'Sports Bar', 'Liquor Store', 'American Restaurant', 'Gym',
       'Fish & Chips Shop', 'Burger Joint', 'Sushi Restaurant', 'Park',
       'Pet Store', 'Burrito Place', 'Steakhouse', 'Movie Theater',
       'Fast Food Restaurant', 'Sandwich Place', 'Café', 'Cheese Shop',
       'Fish Market', 'Seafood Restaurant', 'Gay Bar',
       'Comfort Food Restaurant', 'Thai Restaurant',
       'Middle Eastern Restaurant', 'Stationery Store', 'Wine Bar',
       'Coworking Space', 'Bar', 'Gastropub', 'Latin American Restaur

In [34]:
"Chinese Restaurant" in toronto_venues['Venue Category'].unique()

True

In [35]:
to_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe & move it to the first column
to_onehot['Neighborhoods'] = toronto_venues['Neighborhood'] 
fixed_columns = [to_onehot.columns[-1]] + list(to_onehot.columns[:-1])
to_onehot = to_onehot[fixed_columns]

print(to_onehot.shape)
to_onehot.head()

(1709, 237)


Unnamed: 0,Neighborhoods,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [36]:
to_grouped = to_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(to_grouped.shape)
to_grouped

(39, 237)


Unnamed: 0,Neighborhoods,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.066667,0.066667,0.066667,0.133333,0.133333,0.133333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,...,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.0,0.011905
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.055556,0.0,0.055556,0.011111,0.0,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,...,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.011905,0.0,0.02381


In [37]:
len(to_grouped[to_grouped["Chinese Restaurant"] > 0])

9

In [38]:
to_chinese = to_grouped[["Neighborhoods","Chinese Restaurant"]]

In [39]:
to_chinese.head(9)

Unnamed: 0,Neighborhoods,Chinese Restaurant
0,"Adelaide, King, Richmond",0.0
1,Berczy Park,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0
5,"Cabbagetown, St. James Town",0.022222
6,Central Bay Street,0.02381
7,"Chinatown, Grange Park, Kensington Market",0.044444
8,Christie,0.0


In [40]:
from sklearn.cluster import KMeans
toclusters = 3

to_clustering = to_chinese.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=toclusters, random_state=1)
kmeans.fit_transform(to_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:20]

array([0, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])

In [41]:
to_merged = to_chinese.copy()

# add clustering labels
to_merged["Cluster Labels"] = kmeans.labels_

In [42]:
to_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
to_merged.head(9)

Unnamed: 0,Neighborhood,Chinese Restaurant,Cluster Labels
0,"Adelaide, King, Richmond",0.0,0
1,Berczy Park,0.0,0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0
5,"Cabbagetown, St. James Town",0.022222,1
6,Central Bay Street,0.02381,1
7,"Chinatown, Grange Park, Kensington Market",0.044444,2
8,Christie,0.0,0


In [43]:
to_merged = to_merged.join(toronto_venues.set_index("Neighborhood"), on="Neighborhood")
print(to_merged.shape)
to_merged.sort_values(["Cluster Labels"], inplace=True)
to_merged.head(9)

(1709, 9)


Unnamed: 0,Neighborhood,Chinese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Adelaide, King, Richmond",0.0,0,43.650571,-79.384568,Four Seasons Centre for the Performing Arts,43.650592,-79.385806,Concert Hall
23,"Little Portugal, Trinity",0.0,0,43.647927,-79.41975,Bang Bang Ice Cream & Bakery,43.646246,-79.419553,Ice Cream Shop
23,"Little Portugal, Trinity",0.0,0,43.647927,-79.41975,Foxley Bistro,43.648643,-79.420495,Asian Restaurant
23,"Little Portugal, Trinity",0.0,0,43.647927,-79.41975,OddSeoul,43.646192,-79.419601,Korean Restaurant
23,"Little Portugal, Trinity",0.0,0,43.647927,-79.41975,Pizzeria Libretto,43.648979,-79.420604,Pizza Place
23,"Little Portugal, Trinity",0.0,0,43.647927,-79.41975,Bellwoods Brewery,43.647097,-79.419955,Brewery
22,Lawrence Park,0.0,0,43.72802,-79.38879,TTC Bus #162 - Lawrence-Donway,43.728026,-79.382805,Bus Line
22,Lawrence Park,0.0,0,43.72802,-79.38879,Zodiac Swim School,43.728532,-79.38286,Swim School
22,Lawrence Park,0.0,0,43.72802,-79.38879,The Photo School – Toronto,43.730429,-79.388767,Photography Studio


In [49]:
from folium import CircleMarker

In [51]:
map_clusters = folium.Map(location=[lat_toronto, lon_toronto],zoom_start=14)

# add markers to the map
markers_colors={}
markers_colors[0] = 'red' #more Chinese restaurant
markers_colors[1] = 'blue' #no Chinese restaurant
markers_colors[2] = 'green' #less Chinese restaurant
markers_colors[3] = 'yellow'
markers_colors[4] = 'cyan'
markers_colors[5] = 'black'
for lat, lon, cluster in zip(to_merged['Neighborhood Latitude'], to_merged['Neighborhood Longitude'], to_merged['Cluster Labels']):
    
    
    folium.CircleMarker(
        [lat, lon],
        radius=5,
       
        color =markers_colors[cluster],
        fill_color=markers_colors[cluster],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [55]:
#Cluster 0
to_merged.loc[(to_merged['Cluster Labels'] ==0) & (to_merged['Venue Category'] == 'Chinese Restaurant') ]

Unnamed: 0,Neighborhood,Chinese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
20,"Harbourfront East, Toronto Islands, Union Station",0.01,0,43.640816,-79.381752,Pearl Harbourfront,43.638157,-79.380688,Chinese Restaurant
31,"Ryerson, Garden District",0.01,0,43.657162,-79.378937,GB Hand-Pulled Noodles,43.656434,-79.383783,Chinese Restaurant
9,Church and Wellesley,0.011905,0,43.66586,-79.38316,Crown Princess Fine Dining 伯爵名宴,43.666455,-79.387698,Chinese Restaurant


In [56]:
#Cluster 1
to_merged.loc[(to_merged['Cluster Labels'] ==1) & (to_merged['Venue Category'] == 'Chinese Restaurant') ]

Unnamed: 0,Neighborhood,Chinese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
18,"Harbord, University of Toronto",0.028571,1,43.662696,-79.400049,River Tai Restaurant,43.662902,-79.403167,Chinese Restaurant
27,Queen's Park,0.025641,1,43.662301,-79.389494,Crown Princess Fine Dining 伯爵名宴,43.666455,-79.387698,Chinese Restaurant
5,"Cabbagetown, St. James Town",0.022222,1,43.667967,-79.367675,China Gourmet,43.66418,-79.368359,Chinese Restaurant
6,Central Bay Street,0.02381,1,43.657952,-79.387383,Yueh Tung Chinese Restaurant,43.655281,-79.385337,Chinese Restaurant
6,Central Bay Street,0.02381,1,43.657952,-79.387383,GB Hand-Pulled Noodles,43.656434,-79.383783,Chinese Restaurant


In [57]:
#Cluster 2

to_merged.loc[(to_merged['Cluster Labels'] ==2) & (to_merged['Venue Category'] == 'Chinese Restaurant') ]

Unnamed: 0,Neighborhood,Chinese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
7,"Chinatown, Grange Park, Kensington Market",0.044444,2,43.653206,-79.400049,New Sky Restaurant 小沙田食家,43.655337,-79.398897,Chinese Restaurant
7,"Chinatown, Grange Park, Kensington Market",0.044444,2,43.653206,-79.400049,Swatow Restaurant 汕頭小食家,43.653866,-79.398334,Chinese Restaurant
25,North Toronto West,0.043478,2,43.715383,-79.405678,C'est Bon,43.716785,-79.400406,Chinese Restaurant
7,"Chinatown, Grange Park, Kensington Market",0.044444,2,43.653206,-79.400049,Asian Legend 味香村,43.653603,-79.395047,Chinese Restaurant
7,"Chinatown, Grange Park, Kensington Market",0.044444,2,43.653206,-79.400049,Rosewood Chinese Cuisine,43.653171,-79.39671,Chinese Restaurant


<h2>Conclusion</h2>

<p>Cluster 0, there are more Chinese restaurants in downtown Toronto and coastal areas.</p>
<p>Cluster 1 and 2, less or no Chinese restaurants are arround Toronto university and colleges.</p>

<p>I recomment open Chinese restaurant near Toronto Islands, Union Station or Garden District. It might be a good location as there're not a lot of Chinese restaurant in these areas.</p>