# For this assignment, you will be required to explore and cluster the neighborhoods in Toronto.

## Start by creating a new Notebook for this assignment.
### Use the Notebook to build the code to scrape the following Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, in order to obtain the data that is in the table of postal codes and to transform the data into a pandas dataframe like the one shown below:

### 1 - Pre-Processing

In [1]:
# importing necessary libraries
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import json 

# !conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values


from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

import warnings
warnings.filterwarnings('ignore')

print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Libraries imported.


In [2]:
# getting data from internet
wikipedia_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_wikipedia_page= requests.get(wikipedia_link).text

# using beautiful soup to parse the HTML/XML codes.
soup = BeautifulSoup(raw_wikipedia_page,'xml')
#print(soup.prettify())

### Pre-Processing (Part 1) : Extracting raw table (From Website)

In [3]:
# extracting the raw table inside that webpage
table = soup.find('table')

Postcode      = []
Borough       = []
Neighbourhood = []

# print(table)

# extracting a clean form of the table
for tr_cell in table.find_all('tr'):
    
    counter = 1
    Postcode_var      = -1
    Borough_var       = -1
    Neighbourhood_var = -1
    
    for td_cell in tr_cell.find_all('td'):
        if counter == 1: 
            Postcode_var = td_cell.text
        if counter == 2: 
            Borough_var = td_cell.text
            tag_a_Borough = td_cell.find('a')
            
        if counter == 3: 
            Neighbourhood_var = str(td_cell.text).strip()
            tag_a_Neighbourhood = td_cell.find('a')
            
        counter +=1
        
    if (Postcode_var == 'Not assigned' or Borough_var == 'Not assigned' or Neighbourhood_var == 'Not assigned'): 
        continue
    try:
        if ((tag_a_Borough is None) or (tag_a_Neighbourhood is None)):
            continue
    except:
        pass
    if(Postcode_var == -1 or Borough_var == -1 or Neighbourhood_var == -1):
        continue
        
    Postcode.append(Postcode_var)
    Borough.append(Borough_var)
    Neighbourhood.append(Neighbourhood_var)

### Pre-Processing (Part 2): Integrating Postal codes with more than 1 Neighbours.

In [4]:
unique_p = set(Postcode)
print('num of unique Postal codes:', len(unique_p))
Postcode_u      = []
Borough_u       = []
Neighbourhood_u = []


for postcode_unique_element in unique_p:
    p_var = ''; b_var = ''; n_var = ''; 
    for postcode_idx, postcode_element in enumerate(Postcode):
        if postcode_unique_element == postcode_element:
            p_var = postcode_element;
            b_var = Borough[postcode_idx]
            if n_var == '': 
                n_var = Neighbourhood[postcode_idx]
            else:
                n_var = n_var + ', ' + Neighbourhood[postcode_idx]
    Postcode_u.append(p_var)
    Borough_u.append(b_var)
    Neighbourhood_u.append(n_var)

num of unique Postal codes: 77


### Post-Processing: Creating Pandas Dataframe

In [5]:
toronto_dict = {'Postal Code':Postcode_u, 'Borough':Borough_u, 'Neighbourhood':Neighbourhood_u}
df_toronto = pd.DataFrame.from_dict(toronto_dict)
df_toronto.to_csv('toronto_part1.csv')
df_toronto.head(14)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M9M,North York,"Emery, Humberlea"
1,M8Y,Etobicoke,"Humber Bay, Mimico NE, Old Mill South, The Que..."
2,M1G,Scarborough,Woburn
3,M5K,Downtown Toronto,"Design Exchange, Toronto Dominion Centre"
4,M4J,East York,East Toronto
5,M4G,East York,Leaside
6,M8V,Etobicoke,New Toronto
7,M6M,York,"Keelesdale, Mount Dennis, Silverthorn"
8,M4C,East York,Woodbine Heights
9,M4Y,Downtown Toronto,Church and Wellesley


In [6]:
df_toronto = df_toronto[df_toronto.Borough != "Not assigned"]

In [7]:
df_toronto = df_toronto.groupby(["Postal Code", "Borough"]).agg(lambda x: ', '.join(set(x))).reset_index()

In [8]:
df_toronto[df_toronto.Neighbourhood == 'Not assigned']

Unnamed: 0,Postal Code,Borough,Neighbourhood


In [9]:
df_toronto.loc[8, "Neighbourhood"] = df_toronto.loc[8, "Borough"]
df_toronto[df_toronto.Neighbourhood == 'Not assigned']

Unnamed: 0,Postal Code,Borough,Neighbourhood


In [10]:
df_toronto.shape

(77, 3)

### Part 2: Pre-processing - Installing Geocoder


In [11]:
!pip install geocoder
print('geocoder has been installed before.')
import geocoder
print('geocoder has been successfully imported.')

geocoder has been installed before.
geocoder has been successfully imported.


In [12]:
geo_df=pd.read_csv('http://cocl.us/Geospatial_data')

In [13]:
geo_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [14]:
geospatial_data = geo_df
geospatial_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [15]:
geospatial_data.shape

(103, 3)

In [16]:
df_toronto_lat = pd.merge(df_toronto, geospatial_data, left_index=True, right_index=True, how = 'inner', on = ['Postal Code'])
df_toronto_lat

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1J,Scarborough,Scarborough Village,43.773136,-79.239476
5,M1K,Scarborough,"Ionview, Kennedy Park",43.744734,-79.239476
6,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.727929,-79.262029
7,M1M,Scarborough,"Cliffcrest, Cliffside",43.711112,-79.284577
8,M1N,Scarborough,Scarborough,43.716316,-79.239476
9,M1P,Scarborough,"Dorset Park, Scarborough Town Centre, Wexford ...",43.692657,-79.264848


In [17]:
df_toronto_lat = df_toronto_lat.dropna()

In [18]:
df_toronto_lat

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1J,Scarborough,Scarborough Village,43.773136,-79.239476
5,M1K,Scarborough,"Ionview, Kennedy Park",43.744734,-79.239476
6,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.727929,-79.262029
7,M1M,Scarborough,"Cliffcrest, Cliffside",43.711112,-79.284577
8,M1N,Scarborough,Scarborough,43.716316,-79.239476
9,M1P,Scarborough,"Dorset Park, Scarborough Town Centre, Wexford ...",43.692657,-79.264848


In [19]:
df_toronto_lat.head(11)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1J,Scarborough,Scarborough Village,43.773136,-79.239476
5,M1K,Scarborough,"Ionview, Kennedy Park",43.744734,-79.239476
6,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.727929,-79.262029
7,M1M,Scarborough,"Cliffcrest, Cliffside",43.711112,-79.284577
8,M1N,Scarborough,Scarborough,43.716316,-79.239476
9,M1P,Scarborough,"Dorset Park, Scarborough Town Centre, Wexford ...",43.692657,-79.264848


##  PART 3 : Exploring and Clustering Neighbourhoods in Toronto

### I will use Boroughs with "Toronto in Them"

In [20]:
df_toronto_lat_tor = df_toronto_lat[df_toronto_lat.Borough.str.endswith("Toronto")]
df_toronto_lat_tor.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
30,M4E,East Toronto,The Beaches,43.737473,-79.464763
34,M4K,East Toronto,Riverdale,43.725882,-79.315572
35,M4L,East Toronto,India Bazaar,43.706397,-79.309937
36,M4N,Central Toronto,Lawrence Park,43.695344,-79.318389
37,M4T,Central Toronto,Moore Park,43.676357,-79.293031


In [21]:
df_toronto_lat_tor.dtypes

Postal Code       object
Borough           object
Neighbourhood     object
Latitude         float64
Longitude        float64
dtype: object

In [22]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [23]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_toronto_lat_tor['Latitude'], df_toronto_lat_tor['Longitude'], df_toronto_lat_tor['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## Foursquare API to explore the Neighborhoods

In [24]:
# define Foursquare Credentials and Version
CLIENT_ID = 'OJ34BXKBAVW3LNLZQBTKV13MWMHIIUBGD1RHF2CYV1RUZLUL' # your Foursquare ID
CLIENT_SECRET = 'EODKR1F4KKREZ1TW2SPYUIHTBUSPFT35EL0LOAKPT3MICXQX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: OJ34BXKBAVW3LNLZQBTKV13MWMHIIUBGD1RHF2CYV1RUZLUL
CLIENT_SECRET:EODKR1F4KKREZ1TW2SPYUIHTBUSPFT35EL0LOAKPT3MICXQX


In [25]:
# Set Venue Limit and Radius
LIMIT=100
radius=500

In [31]:
# Helper Functions
# Function that extracts the category of the venue
# ================================================
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    
# Function to get the Nearby Venues in different Neighborhoods
# =============================================================
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [33]:
toronto_venues = getNearbyVenues(names=df_toronto_lat_tor['Neighbourhood'],
                                   latitudes=df_toronto_lat_tor['Latitude'],
                                   longitudes=df_toronto_lat_tor['Longitude']
                                  )

The Beaches
Riverdale
India Bazaar
Lawrence Park
Moore Park
Deer Park, Rathnelly, South Hill
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront
St. James Town
Berczy Park
Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court
Forest Hill North
The Annex, Yorkville
University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, King and Spadina, Railway Lands, South Niagara
First Canadian Place, Underground city
Dovercourt Village
Little Portugal, Trinity
Exhibition Place, Parkdale Village
High Park
Parkdale, Roncesvalles
Runnymede, Swansea
Queen's Park


In [34]:
print(toronto_venues.shape)
toronto_venues.head()

(926, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.737473,-79.464763,Toronto Downsview Airport (YZD),43.738883,-79.470111,Airport
1,The Beaches,43.737473,-79.464763,Ancaster Park,43.734706,-79.464777,Park
2,The Beaches,43.737473,-79.464763,Fly By (Bombardier),43.737632,-79.469056,Snack Place
3,Riverdale,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Riverdale,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


In [36]:
toronto_venues.groupby("Neighborhood").count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,3,3,3,3,3,3
"CN Tower, King and Spadina, Railway Lands, South Niagara",47,47,47,47,47,47
"Cabbagetown, St. James Town",4,4,4,4,4,4
"Chinatown, Grange Park, Kensington Market",81,81,81,81,81,81
Church and Wellesley,41,41,41,41,41,41
Commerce Court,38,38,38,38,38,38
"Deer Park, Rathnelly, South Hill",34,34,34,34,34,34
"Design Exchange, Toronto Dominion Centre",20,20,20,20,20,20
Dovercourt Village,56,56,56,56,56,56
"Exhibition Place, Parkdale Village",100,100,100,100,100,100


In [37]:
print("Number of Unique Venues in the DataFrame is: ", len(toronto_venues["Venue Category"].unique()))

Number of Unique Venues in the DataFrame is:  185


## Analysis of Each Neighborhood

In [38]:
# One Hot Encode the Categorical Variables
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [39]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"CN Tower, King and Spadina, Railway Lands, Sou...",0.021277,0.0,0.0,0.0,0.021277,0.021277,0.0,0.021277,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Chinatown, Grange Park, Kensington Market",0.012346,0.012346,0.0,0.012346,0.0,0.0,0.012346,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.012346,0.0
4,Church and Wellesley,0.02439,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,...,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Commerce Court,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,...,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Deer Park, Rathnelly, South Hill",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Design Exchange, Toronto Dominion Centre",0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Dovercourt Village,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,...,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.02,0.0,...,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.01


In [40]:
toronto_grouped.shape

(28, 185)

In [41]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+ hood +"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                     venue  freq
0              Swim School  0.33
1                 Bus Line  0.33
2                     Park  0.33
3              Yoga Studio  0.00
4  New American Restaurant  0.00


----CN Tower, King and Spadina, Railway Lands, South Niagara----
            venue  freq
0     Coffee Shop  0.15
1             Pub  0.06
2            Park  0.06
3          Bakery  0.06
4  Breakfast Spot  0.04


----Cabbagetown, St. James Town----
               venue  freq
0               Park  0.50
1  Convenience Store  0.25
2        Coffee Shop  0.25
3        Yoga Studio  0.00
4       Noodle House  0.00


----Chinatown, Grange Park, Kensington Market----
                 venue  freq
0          Coffee Shop  0.07
1  Japanese Restaurant  0.06
2              Gay Bar  0.05
3     Sushi Restaurant  0.05
4           Restaurant  0.04


----Church and Wellesley----
                    venue  freq
0        Greek Restaurant  0.22
1      Italian Restaurant  0.07
2             Coff

In [42]:
## Helper Function - Function to Sort Venues in Descending Order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [43]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Park,Swim School,Bus Line,Women's Store,Dog Run,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store
1,"CN Tower, King and Spadina, Railway Lands, Sou...",Coffee Shop,Pub,Park,Bakery,Restaurant,Mexican Restaurant,Breakfast Spot,Café,Theater,Farmers Market
2,"Cabbagetown, St. James Town",Park,Convenience Store,Coffee Shop,Discount Store,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
3,"Chinatown, Grange Park, Kensington Market",Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Restaurant,Men's Store,Gastropub,Fast Food Restaurant,Pub,Gym
4,Church and Wellesley,Greek Restaurant,Coffee Shop,Italian Restaurant,Furniture / Home Store,Ice Cream Shop,Bookstore,Yoga Studio,Bubble Tea Shop,Spa,Café


In [44]:
# set number of clusters
kclusters = 4

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 0, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [46]:
# Appending Clustered Labels
# neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_merged = df_toronto_lat_tor
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')
toronto_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
30,M4E,East Toronto,The Beaches,43.737473,-79.464763,Airport,Park,Snack Place,Women's Store,Discount Store,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store
34,M4K,East Toronto,Riverdale,43.725882,-79.315572,Pizza Place,Portuguese Restaurant,Hockey Arena,Coffee Shop,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
35,M4L,East Toronto,India Bazaar,43.706397,-79.309937,Pizza Place,Fast Food Restaurant,Athletics & Sports,Café,Bus Line,Pharmacy,Bank,Intersection,Gym / Fitness Center,Gastropub
36,M4N,Central Toronto,Lawrence Park,43.695344,-79.318389,Skating Rink,Cosmetics Shop,Pharmacy,Park,Video Store,Curling Ice,Dance Studio,Beer Store,Bus Stop,Dog Run
37,M4T,Central Toronto,Moore Park,43.676357,-79.293031,Park,Health Food Store,Other Great Outdoors,Pub,Trail,Asian Restaurant,Women's Store,Diner,Event Space,Ethiopian Restaurant
