In [457]:
import requests
import pandas as pd
import numpy as np
import folium

In [458]:
url  = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = requests.get(url)
if page.status_code == 200:
    print('Page download successful')
else:
    print('Page download error. Error code: {}'.format(page.status_code))

Page download successful


In [459]:
import lxml
df_html = pd.read_html(url, header=0, na_values = ['Not assigned'])[0]
df_html.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [460]:
df_html.dropna(subset=['Borough'], inplace=True)

In [461]:
n_empty_neighborhood = df_html[df_html['Neighbourhood'].isna()].shape[0]
print('Number of rows on which Neighborhood column is empty: {}'.format(n_empty_neighborhood))

Number of rows on which Neighborhood column is empty: 0


In [462]:
df_html[df_html['Neighbourhood'].isna()]

Unnamed: 0,Postal Code,Borough,Neighbourhood


In [463]:
df_html['Neighbourhood'].fillna(df_html['Borough'], inplace=True)
n_empty_neighborhood = df_html[df_html['Neighbourhood'].isna()].shape[0]
print('Number of rows on which Neighborhood column is empty: {}'.format(n_empty_neighborhood))

Number of rows on which Neighborhood column is empty: 0


In [464]:
df_html[df_html['Borough']=="Queen's Park"]

Unnamed: 0,Postal Code,Borough,Neighbourhood


In [465]:
df_postcodes =pd.DataFrame( df_html.groupby(['Postal Code','Borough']).Neighbourhood.agg([('Neighbourhood', ', '.join)]))
df_postcodes.reset_index(inplace=True)
df_postcodes.head(5)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [466]:
df_html.dropna(subset=['Borough'], inplace=True)

In [467]:
n_empty_neighborhood = df_html[df_html['Neighbourhood'].isna()].shape[0]
print('Number of rows on which Neighborhood column is empty: {}'.format(n_empty_neighborhood))

Number of rows on which Neighborhood column is empty: 0


In [468]:
print('The shape of the dataset is:',df_postcodes.shape)

The shape of the dataset is: (103, 3)


In [469]:
df_postcodes.to_csv('Toronto_Postcodes.csv')

In [470]:
url_csv = 'http://cocl.us/Geospatial_data'
df_coordinates = pd.read_csv(url_csv)
df_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [471]:
df_neighborhoods = pd.read_csv('Toronto_Postcodes.csv',index_col=[0])
df_neighborhoods.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [472]:
df_neighborhoods.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [473]:
df_neighborhoods_coordinates = pd.merge(df_neighborhoods, df_coordinates, on='Postal Code')
df_neighborhoods_coordinates.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [474]:
df_neighborhoods_coordinates[(df_neighborhoods_coordinates['Postal Code']=='M2K') |
                             (df_neighborhoods_coordinates['Postal Code']=='M2M') ]

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
19,M2K,North York,Bayview Village,43.786947,-79.385975
21,M2M,North York,"Willowdale, Newtonbrook",43.789053,-79.408493


In [475]:
df_neighborhoods_coordinates.to_csv('Toronto_Postcodes_2.csv')

In [476]:
df = pd.read_csv('Toronto_Postcodes_2.csv', index_col=0)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [477]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


In [478]:
df.rename(columns={'Neighbourhood': 'Neighborhood'}, inplace=True)

In [479]:
df.groupby('Borough').count()['Neighborhood']

Borough
Central Toronto      9
Downtown Toronto    19
East Toronto         5
East York            5
Etobicoke           12
Mississauga          1
North York          24
Scarborough         17
West Toronto         6
York                 5
Name: Neighborhood, dtype: int64

In [480]:
df_toronto = df[df['Borough'].str.contains(' ')]
df_toronto.reset_index(inplace=True)
df_toronto.drop('index', axis=1, inplace=True)
df_toronto.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M2H,North York,Hillcrest Village,43.803762,-79.363452
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
2,M2K,North York,Bayview Village,43.786947,-79.385975
3,M2L,North York,"York Mills, Silver Hills",43.75749,-79.374714
4,M2M,North York,"Willowdale, Newtonbrook",43.789053,-79.408493


In [481]:
print(df_toronto.groupby('Borough').count()['Neighborhood'])

Borough
Central Toronto      9
Downtown Toronto    19
East Toronto         5
East York            5
North York          24
West Toronto         6
Name: Neighborhood, dtype: int64


In [482]:
boroughs = df_toronto['Borough'].unique().tolist()

In [483]:
lat_toronto = df_toronto['Latitude'].mean()
lon_toronto = df_toronto['Longitude'].mean()
print('The geographical coordinates of Toronto are {}, {}'.format(lat_toronto, lon_toronto))

The geographical coordinates of Toronto are 43.699077060294115, -79.39982988676468


In [484]:
borough_color = {}
for borough in boroughs:
    borough_color[borough]= '#%02X%02X%02X' % tuple(np.random.choice(range(256), size=3))

In [485]:
map_toronto = folium.Map(location=[lat_toronto, lon_toronto], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], 
                                           df_toronto['Longitude'],
                                           df_toronto['Borough'], 
                                           df_toronto['Neighborhood']):
    label_text = borough + ' - ' + neighborhood
    label = folium.Popup(label_text)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=borough_color[borough],
        fill_color=borough_color[borough],
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

In [486]:
CLIENT_ID = 'I3WDOJBU0UJRXWO0T0410XQPLG3JDYT2QOTZN0NEROECPXB0' # your Foursquare ID
CLIENT_SECRET = 'GLM4ZJBSDRE31VORNBIQEJD0BDJFS0MI3R3IPI4UHWVCKSXW' # your Foursquare Secret
VERSION = '20190425' # Foursquare API version
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

In [487]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
   
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [488]:
toronto_venues = getNearbyVenues(names=df_toronto['Neighborhood'],
                                latitudes=df_toronto['Latitude'],
                                longitudes=df_toronto['Longitude'])

Hillcrest Village


KeyError: 'groups'

In [489]:
toronto_venues.shape

(1938, 7)

In [490]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Wilson Heights, Downsview North",20,20,20,20,20,20
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",22,22,22,22,22,22
Berczy Park,57,57,57,57,57,57
"Brockton, Parkdale Village, Exhibition Place",22,22,22,22,22,22
...,...,...,...,...,...,...
"Willowdale, Willowdale East",33,33,33,33,33,33
"Willowdale, Willowdale West",6,6,6,6,6,6
Woodbine Heights,5,5,5,5,5,5
York Mills West,2,2,2,2,2,2


In [491]:
toronto_venues['Venue Category'].unique()[:100]

array(['Golf Course', 'Pool', 'Mediterranean Restaurant', 'Dog Run',
       'Toy / Game Store', 'Movie Theater', 'Shopping Mall', 'Restaurant',
       'American Restaurant', 'Electronics Store', 'Pharmacy',
       'Burger Joint', 'Clothing Store', 'Chocolate Shop', 'Coffee Shop',
       'Bakery', 'Department Store', 'Salon / Barbershop', 'Juice Bar',
       'Theater', 'Bank', 'Food Court', 'Liquor Store', 'Cosmetics Shop',
       'Japanese Restaurant', 'Fast Food Restaurant',
       'Sporting Goods Shop', 'Asian Restaurant', 'Video Game Store',
       'Burrito Place', "Women's Store", 'Mobile Phone Shop', 'Boutique',
       'Miscellaneous Shop', 'Tea Room', 'Supplement Shop',
       'Accessories Store', 'Jewelry Store', 'Shoe Store',
       'Luggage Store', 'Greek Restaurant', 'Chinese Restaurant',
       'Sandwich Place', 'Spa', 'Bus Station', 'Convenience Store',
       'Baseball Field', 'Café', 'Cafeteria', 'Gym', 'Grocery Store',
       'Steakhouse', 'Ramen Restaurant', 'Indonesian

In [492]:
"Coffee Shop" in toronto_venues['Venue Category'].unique()

True

In [493]:
to_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
to_onehot['Neighborhoods'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [to_onehot.columns[-1]] + list(to_onehot.columns[:-1])
to_onehot = to_onehot[fixed_columns]

print(to_onehot.shape)
to_onehot.head()

(1938, 257)


Unnamed: 0,Neighborhoods,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Women's Store,Yoga Studio
0,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Fairview, Henry Farm, Oriole",0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [494]:
to_grouped = to_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(to_grouped.shape)
to_grouped

(64, 257)


Unnamed: 0,Neighborhoods,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Women's Store,Yoga Studio
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0
3,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.017544,0.0,0.000000,0.0,0.0,0.0,0.0
4,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,"Willowdale, Willowdale East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.030303,0.0,0.0,0.0,0.0
60,"Willowdale, Willowdale West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0
61,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0
62,York Mills West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0


In [495]:
len(to_grouped[to_grouped["Coffee Shop"] > 0])

40

In [496]:
Coffee_Shop = to_grouped[["Neighborhoods","Coffee Shop"]]

In [497]:
Coffee_Shop

Unnamed: 0,Neighborhoods,Coffee Shop
0,"Bathurst Manor, Wilson Heights, Downsview North",0.100000
1,Bayview Village,0.000000
2,"Bedford Park, Lawrence Manor East",0.090909
3,Berczy Park,0.087719
4,"Brockton, Parkdale Village, Exhibition Place",0.090909
...,...,...
59,"Willowdale, Willowdale East",0.060606
60,"Willowdale, Willowdale West",0.166667
61,Woodbine Heights,0.000000
62,York Mills West,0.000000


In [498]:
Coffee_Shop.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64 entries, 0 to 63
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Neighborhoods  64 non-null     object 
 1   Coffee Shop    64 non-null     float64
dtypes: float64(1), object(1)
memory usage: 1.1+ KB


In [499]:
Coffee_Shop.describe

<bound method NDFrame.describe of                                       Neighborhoods  Coffee Shop
0   Bathurst Manor, Wilson Heights, Downsview North     0.100000
1                                   Bayview Village     0.000000
2                 Bedford Park, Lawrence Manor East     0.090909
3                                       Berczy Park     0.087719
4      Brockton, Parkdale Village, Exhibition Place     0.090909
..                                              ...          ...
59                      Willowdale, Willowdale East     0.060606
60                      Willowdale, Willowdale West     0.166667
61                                 Woodbine Heights     0.000000
62                                  York Mills West     0.000000
63                         York Mills, Silver Hills     0.000000

[64 rows x 2 columns]>

In [500]:
from sklearn.cluster import KMeans
toclusters = 3

to_clustering = Coffee_Shop.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=toclusters, random_state=1)
kmeans.fit_transform(to_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:20]

array([0, 1, 0, 0, 0, 1, 0, 2, 0, 0, 2, 0, 1, 0, 1, 1, 1, 0, 0, 1])

In [501]:
to_merged = Coffee_Shop.copy()

# add clustering labels
to_merged["Cluster Labels"] = kmeans.labels_

In [502]:
to_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
to_merged.head(5)

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels
0,"Bathurst Manor, Wilson Heights, Downsview North",0.1,0
1,Bayview Village,0.0,1
2,"Bedford Park, Lawrence Manor East",0.090909,0
3,Berczy Park,0.087719,0
4,"Brockton, Parkdale Village, Exhibition Place",0.090909,0


In [503]:
to_merged.head(15)

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels
0,"Bathurst Manor, Wilson Heights, Downsview North",0.1,0
1,Bayview Village,0.0,1
2,"Bedford Park, Lawrence Manor East",0.090909,0
3,Berczy Park,0.087719,0
4,"Brockton, Parkdale Village, Exhibition Place",0.090909,0
5,"Business reply mail Processing Centre, South C...",0.0,1
6,"CN Tower, King and Spadina, Railway Lands, Har...",0.066667,0
7,Central Bay Street,0.171875,2
8,Christie,0.058824,0
9,Church and Wellesley,0.105263,0


In [504]:
to_merged = to_merged.join(toronto_venues.set_index("Neighborhood"), on="Neighborhood")

print(to_merged.shape)
to_merged.head()

(1938, 9)


Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Bathurst Manor, Wilson Heights, Downsview North",0.1,0,43.754328,-79.442259,Wolfie's Deli,43.754875,-79.442438,Deli / Bodega
0,"Bathurst Manor, Wilson Heights, Downsview North",0.1,0,43.754328,-79.442259,Starbucks,43.755797,-79.440471,Coffee Shop
0,"Bathurst Manor, Wilson Heights, Downsview North",0.1,0,43.754328,-79.442259,Best for Bride in Toronto,43.755789,-79.437834,Bridal Shop
0,"Bathurst Manor, Wilson Heights, Downsview North",0.1,0,43.754328,-79.442259,Bagel Plus,43.755395,-79.440686,Restaurant
0,"Bathurst Manor, Wilson Heights, Downsview North",0.1,0,43.754328,-79.442259,Tim Hortons,43.754767,-79.44325,Coffee Shop


In [505]:
to_merged.sort_values(["Cluster Labels"], inplace=True)
to_merged.head()

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Bathurst Manor, Wilson Heights, Downsview North",0.1,0,43.754328,-79.442259,Wolfie's Deli,43.754875,-79.442438,Deli / Bodega
37,"Parkdale, Roncesvalles",0.071429,0,43.64896,-79.456325,Likely General,43.650622,-79.450635,Gift Shop
37,"Parkdale, Roncesvalles",0.071429,0,43.64896,-79.456325,Scout,43.65097,-79.450866,Gift Shop
37,"Parkdale, Roncesvalles",0.071429,0,43.64896,-79.456325,Cider House,43.650688,-79.450685,Restaurant
37,"Parkdale, Roncesvalles",0.071429,0,43.64896,-79.456325,Reunion Island Coffee Bar,43.650463,-79.45061,Coffee Shop


In [506]:
map_clusters = folium.Map(location=[lat_toronto, lon_toronto],zoom_start=11)

# set color scheme for the clusters


# add markers to the map
markers_colors={}
markers_colors[0] = 'red'
markers_colors[1] = 'blue'
markers_colors[2] = 'green'
markers_colors[3] = 'yellow'
markers_colors[4] = 'cyan'
markers_colors[5] = 'black'
for lat, lon, cluster in zip(to_merged['Neighborhood Latitude'], to_merged['Neighborhood Longitude'], to_merged['Cluster Labels']):
    
    
    folium.CircleMarker(
        [lat, lon],
        radius=5,
       
        color =markers_colors[cluster],
        fill_color=markers_colors[cluster],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [507]:
#Cluster 0
to_merged.loc[(to_merged['Cluster Labels'] ==0) & (to_merged['Venue Category'] == 'Coffee Shop') ]

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
37,"Parkdale, Roncesvalles",0.071429,0,43.648960,-79.456325,Reunion Island Coffee Bar,43.650463,-79.450610,Coffee Shop
35,"North Toronto West, Lawrence Park",0.105263,0,43.715383,-79.405678,Tim Hortons,43.714894,-79.399776,Coffee Shop
35,"North Toronto West, Lawrence Park",0.105263,0,43.715383,-79.405678,Starbucks,43.715590,-79.400450,Coffee Shop
42,"Richmond, Adelaide, King",0.070000,0,43.650571,-79.384568,M Square Coffee Co,43.651218,-79.383555,Coffee Shop
32,"Little Portugal, Trinity",0.068182,0,43.647927,-79.419750,Pilot Coffee Roasters,43.646610,-79.419606,Coffee Shop
...,...,...,...,...,...,...,...,...,...
18,"First Canadian Place, Underground city",0.130000,0,43.648429,-79.382280,Balzac's Coffee,43.644373,-79.383065,Coffee Shop
18,"First Canadian Place, Underground city",0.130000,0,43.648429,-79.382280,Tim Hortons,43.646256,-79.379573,Coffee Shop
18,"First Canadian Place, Underground city",0.130000,0,43.648429,-79.382280,Starbucks,43.646799,-79.380690,Coffee Shop
18,"First Canadian Place, Underground city",0.130000,0,43.648429,-79.382280,Pilot Coffee Roasters,43.645018,-79.380415,Coffee Shop


In [508]:
#Cluster 1
to_merged.loc[(to_merged['Cluster Labels'] ==1) & (to_merged['Venue Category'] == 'Coffee Shop') ]

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
56,"University of Toronto, Harbord",0.028571,1,43.662696,-79.400049,Elchi Chai Shop,43.662695,-79.404652,Coffee Shop


In [509]:
#Cluster 2
to_merged.loc[(to_merged['Cluster Labels'] ==2) & (to_merged['Venue Category'] == 'Coffee Shop') ]

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
40,"Queen's Park, Ontario Provincial Government",0.264706,2,43.662301,-79.389494,Starbucks,43.662407,-79.385943,Coffee Shop
40,"Queen's Park, Ontario Provincial Government",0.264706,2,43.662301,-79.389494,Starbucks,43.659456,-79.390411,Coffee Shop
40,"Queen's Park, Ontario Provincial Government",0.264706,2,43.662301,-79.389494,Starbucks,43.661527,-79.383411,Coffee Shop
40,"Queen's Park, Ontario Provincial Government",0.264706,2,43.662301,-79.389494,Starbucks,43.658204,-79.388998,Coffee Shop
40,"Queen's Park, Ontario Provincial Government",0.264706,2,43.662301,-79.389494,Tim Hortons,43.661038,-79.393797,Coffee Shop
55,"Toronto Dominion Centre, Design Exchange",0.14,2,43.647177,-79.381576,Pilot Coffee Roasters,43.648835,-79.380936,Coffee Shop
55,"Toronto Dominion Centre, Design Exchange",0.14,2,43.647177,-79.381576,Pilot Coffee Roasters,43.645018,-79.380415,Coffee Shop
55,"Toronto Dominion Centre, Design Exchange",0.14,2,43.647177,-79.381576,Dineen @CommerceCourt,43.648251,-79.380127,Coffee Shop
55,"Toronto Dominion Centre, Design Exchange",0.14,2,43.647177,-79.381576,Starbucks,43.646731,-79.383951,Coffee Shop
55,"Toronto Dominion Centre, Design Exchange",0.14,2,43.647177,-79.381576,Starbucks,43.650159,-79.377793,Coffee Shop


In [510]:
to_merged.loc[(to_merged['Cluster Labels'] ==0) & (to_merged['Venue Category'] == 'Coffee Shop') ].count()

Neighborhood              121
Coffee Shop               121
Cluster Labels            121
Neighborhood Latitude     121
Neighborhood Longitude    121
Venue                     121
Venue Latitude            121
Venue Longitude           121
Venue Category            121
dtype: int64

In [511]:
to_merged.shape

(1938, 9)

In [512]:
to_merged.loc[(to_merged['Cluster Labels'] ==1) & (to_merged['Venue Category'] == 'Coffee Shop') ].count()

Neighborhood              1
Coffee Shop               1
Cluster Labels            1
Neighborhood Latitude     1
Neighborhood Longitude    1
Venue                     1
Venue Latitude            1
Venue Longitude           1
Venue Category            1
dtype: int64

In [513]:
to_merged.loc[(to_merged['Cluster Labels'] ==2) & (to_merged['Venue Category'] == 'Coffee Shop') ].count()

Neighborhood              58
Coffee Shop               58
Cluster Labels            58
Neighborhood Latitude     58
Neighborhood Longitude    58
Venue                     58
Venue Latitude            58
Venue Longitude           58
Venue Category            58
dtype: int64