#### Segmenting and Clustering Neighborhoods in Toronto¶


In [4]:
# import libraries
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

#### Download and Explore Dataset


In [5]:

# specify the url
post_codes = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'


In [6]:
# query the website and return the html to the variable ‘page’
page = requests.get(post_codes, timeout=5)



In [8]:
# parse the html using beautiful soup and store in variable `soup`
soup = BeautifulSoup(page.content, 'html.parser')

In [9]:
code_table = soup.find('table')
code_rows = code_table.findAll('tr')
columns=['Postcode', 'Borough', 'Neighbourhood']
df_codes = pd.DataFrame(columns=columns)
for idx, val in enumerate(code_rows):
    code_cells = val.findAll('td')
    df_list = []
    for idx, val in enumerate(code_cells):
        df_list.append(val.text.rstrip())
    if(int(len(df_list)) > 0):
        if(df_list[1] != "Not assigned"):
            if(df_list[2] == "Not assigned"):
                df_list[2] = df_list[1]

            df_dic={columns[0]: df_list[0], columns[1]: df_list[1], columns[2]: df_list[2]}
            df_codes = df_codes.append(df_dic, ignore_index=True)

df_codes = df_codes.groupby('Postcode', as_index=False).agg(lambda x: ', '.join(set(x.dropna())))                

df_codes.shape

(103, 3)

In [10]:
df_codes

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"West Hill, Guildwood, Morningside"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Clairlea, Oakridge, Golden Mile"
8,M1M,Scarborough,"Cliffcrest, Scarborough Village West, Cliffside"
9,M1N,Scarborough,"Cliffside West, Birch Cliff"


In [11]:
!wget -O Geospatial_data.csv https://cocl.us/Geospatial_data

--2018-10-21 02:05:26--  https://cocl.us/Geospatial_data
Resolving cocl.us (cocl.us)... 169.48.113.201
Connecting to cocl.us (cocl.us)|169.48.113.201|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2018-10-21 02:05:30--  https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.box.com (ibm.box.com)... 185.235.236.197
Connecting to ibm.box.com (ibm.box.com)|185.235.236.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.ent.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2018-10-21 02:05:30--  https://ibm.ent.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.ent.box.com (ibm.ent.box.com)... 185.235.236.211
Connecting to ibm.ent.box.com (ibm.ent.box.com)|185.235.236.211|:443... connected.
HTTP request sent, awaiting response... 302 F

In [12]:
df_geo = pd.read_csv("Geospatial_data.csv")
df_codes = df_codes.join(df_geo)

In [13]:
df_codes

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"West Hill, Guildwood, Morningside",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,M1J,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",M1K,43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Oakridge, Golden Mile",M1L,43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Scarborough Village West, Cliffside",M1M,43.716316,-79.239476
9,M1N,Scarborough,"Cliffside West, Birch Cliff",M1N,43.692657,-79.264848


####  1 :Explore and cluster the neighborhoods in the city of Toronto.

In [14]:
!pip install geopy
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library

Collecting geopy
  Downloading https://files.pythonhosted.org/packages/f3/b4/c8842fc6a07fc95d6887370768850885382b47bf52f5cd8b6a4ae1b6f4d9/geopy-1.17.0-py2.py3-none-any.whl (92kB)
[K    100% |################################| 92kB 1.5MB/s ta 0:00:01
[?25hCollecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/5b/ac/4f348828091490d77899bc74e92238e2b55c59392f21948f296e94e50e2b/geographiclib-1.49.tar.gz
Building wheels for collected packages: geographiclib
  Running setup.py bdist_wheel for geographiclib ... [?25ldone
[?25h  Stored in directory: /home/spark/shared/.cache/pip/wheels/99/45/d1/14954797e2a976083182c2e7da9b4e924509e59b6e5c661061
Successfully built geographiclib
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.49 geopy-1.17.0
Solving environment: done

## Package Plan ##

  environment location: /opt/ibm/conda/miniconda3

  added / updated specs: 
    - folium=0.5.0


The following package

In [16]:
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [17]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)
folium.TileLayer('openstreetmap').add_to(map_toronto)
# add markers to map
for lat, lng, label in zip(df_codes['Latitude'], df_codes['Longitude'], df_codes['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto


#### Define Foursquare Credentials and Version


In [18]:
CLIENT_ID = '0DXAVIXJUGYNEZNM4E1XVTFKNUMWS1TJ5ZMHGXRP2LZ4O15G' # your Foursquare ID
CLIENT_SECRET = 'M2253MDW32HJB43ZN1P4FKKWAVI5FXLFQYKYYWAWLK0F0IWC' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 0DXAVIXJUGYNEZNM4E1XVTFKNUMWS1TJ5ZMHGXRP2LZ4O15G
CLIENT_SECRET:M2253MDW32HJB43ZN1P4FKKWAVI5FXLFQYKYYWAWLK0F0IWC


In [19]:

df_codes.loc[0, 'Neighbourhood']


'Malvern, Rouge'

In [20]:
neighborhood_latitude = df_codes.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_codes.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df_codes.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Malvern, Rouge are 43.806686299999996, -79.19435340000001.


In [21]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=0DXAVIXJUGYNEZNM4E1XVTFKNUMWS1TJ5ZMHGXRP2LZ4O15G&client_secret=M2253MDW32HJB43ZN1P4FKKWAVI5FXLFQYKYYWAWLK0F0IWC&v=20180605&ll=43.806686299999996,-79.19435340000001&radius=500&limit=100'

In [22]:
results = requests.get(url).json()
results


{'meta': {'code': 200, 'requestId': '5bcc190f4c1f67197cccb228'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-4bb6b9446edc76b0d771311c-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/fastfood_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d16e941735',
         'name': 'Fast Food Restaurant',
         'pluralName': 'Fast Food Restaurants',
         'primary': True,
         'shortName': 'Fast Food'}],
       'id': '4bb6b9446edc76b0d771311c',
       'location': {'cc': 'CA',
        'city': 'Toronto',
        'country': 'Canada',
        'crossStreet': 'Morningside & Sheppard',
        'distance': 387,
        'formattedAddress': ['Toronto ON', 'Canada'],
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.80744841934756,
          'ln

In [25]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']



In [24]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]


In [26]:
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Wendy's,Fast Food Restaurant,43.807448,-79.199056
1,Interprovincial Group,Print Shop,43.80563,-79.200378


####  2 :Explore Neighborhoods in Toronto

In [29]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [30]:
toronto_venues = getNearbyVenues(names=df_codes['Neighbourhood'],
                                   latitudes=df_codes['Latitude'],
                                   longitudes=df_codes['Longitude']
                                  )

Malvern, Rouge
Highland Creek, Rouge Hill, Port Union
West Hill, Guildwood, Morningside
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Clairlea, Oakridge, Golden Mile
Cliffcrest, Scarborough Village West, Cliffside
Cliffside West, Birch Cliff
Scarborough Town Centre, Wexford Heights, Dorset Park
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Milliken, L'Amoreaux East, Steeles East, Agincourt North
L'Amoreaux West, Steeles West
Upper Rouge
Hillcrest Village
Oriole, Henry Farm, Fairview
Bayview Village
York Mills, Silver Hills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Don Mills South, Flemingdon Park
Bathurst Manor, Wilson Heights, Downsview North
York University, Northwood Park
Downsview East, CFB Toronto
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The D

In [31]:
print(toronto_venues.shape)
toronto_venues.head()

(2253, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Malvern, Rouge",43.806686,-79.194353,Interprovincial Group,43.80563,-79.200378,Print Shop
2,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Affordable Toronto Movers,43.787919,-79.162977,Moving Target
4,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Scarborough Historical Society,43.788755,-79.162438,History Museum


In [32]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Albion Gardens, Thistletown, Beaumond Heights, Jamestown, Mount Olive, Humbergate, South Steeles, Silverstone",9,9,9,9,9,9
"Bathurst Manor, Wilson Heights, Downsview North",19,19,19,19,19,19
Bayview Village,4,4,4,4,4,4
Berczy Park,53,53,53,53,53,53
"Bloordale Gardens, Markland Wood, Old Burnhamthorpe, Eringate",6,6,6,6,6,6
Business reply mail Processing Centre969 Eastern,17,17,17,17,17,17
"CN Tower, Harbourfront West, Bathurst Quay, Island airport, South Niagara, Railway Lands, King and Spadina",14,14,14,14,14,14
"Cabbagetown, St. James Town",48,48,48,48,48,48
Caledonia-Fairbanks,6,6,6,6,6,6


In [33]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 275 uniques categories.


#### 3. Analyze Each Neighborhood

In [34]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
toronto_onehot.shape

(2253, 275)

In [36]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped


Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,Agincourt,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.000000
1,"Albion Gardens, Thistletown, Beaumond Heights,...",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.000000
2,"Bathurst Manor, Wilson Heights, Downsview North",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00000,0.000000,0.000000,0.052632,0.000000,0.0000,0.000000,0.000000,0.000000
3,Bayview Village,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.000000
4,Berczy Park,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.000000
5,"Bloordale Gardens, Markland Wood, Old Burnhamt...",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.000000
6,Business reply mail Processing Centre969 Eastern,0.058824,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.000000
7,"CN Tower, Harbourfront West, Bathurst Quay, Is...",0.000000,0.00,0.000000,0.000000,0.071429,0.071429,0.071429,0.142857,0.142857,...,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.000000
8,"Cabbagetown, St. James Town",0.020833,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.000000
9,Caledonia-Fairbanks,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.166667


In [37]:
toronto_grouped.shape


(101, 275)

In [38]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [39]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Clothing Store,Lounge,Skating Rink,Breakfast Spot,Women's Store,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
1,"Albion Gardens, Thistletown, Beaumond Heights,...",Grocery Store,Fried Chicken Joint,Pharmacy,Pizza Place,Sandwich Place,Coffee Shop,Fast Food Restaurant,Beer Store,General Entertainment,Cuban Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Frozen Yogurt Shop,Pharmacy,Deli / Bodega,Bank,Shopping Mall,Fried Chicken Joint,Sushi Restaurant,Pizza Place,Pet Store
3,Bayview Village,Bank,Japanese Restaurant,Chinese Restaurant,Café,Women's Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
4,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Steakhouse,Farmers Market,Beer Bar,Cheese Shop,Café,Seafood Restaurant,Restaurant
5,"Bloordale Gardens, Markland Wood, Old Burnhamt...",Beer Store,Liquor Store,Pharmacy,Pizza Place,Shopping Plaza,Café,Discount Store,Department Store,Dessert Shop,Dim Sum Restaurant
6,Business reply mail Processing Centre969 Eastern,Yoga Studio,Butcher,Light Rail Station,Brewery,Spa,Farmers Market,Fast Food Restaurant,Restaurant,Burrito Place,Recording Studio
7,"CN Tower, Harbourfront West, Bathurst Quay, Is...",Airport Terminal,Airport Service,Airport Lounge,Boat or Ferry,Airport Gate,Plane,Sculpture Garden,Airport Food Court,Harbor / Marina,Airport
8,"Cabbagetown, St. James Town",Coffee Shop,Restaurant,Bakery,Pub,Chinese Restaurant,Indian Restaurant,Park,Italian Restaurant,Pizza Place,Café
9,Caledonia-Fairbanks,Park,Women's Store,Fast Food Restaurant,Market,Pharmacy,German Restaurant,General Travel,Drugstore,Gluten-free Restaurant,Donut Shop


#### 4. Cluster Neighborhoods

In [40]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
#kmeans.labels_[0:10] 
kmeans.labels_

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       2, 1, 1, 0, 4, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1,
       2, 1, 1, 1, 1, 1, 2, 3, 1], dtype=int32)

In [41]:
df_codes

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"West Hill, Guildwood, Morningside",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,M1J,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",M1K,43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Oakridge, Golden Mile",M1L,43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Scarborough Village West, Cliffside",M1M,43.716316,-79.239476
9,M1N,Scarborough,"Cliffside West, Birch Cliff",M1N,43.692657,-79.264848


In [42]:
df_codes_join = df_codes.rename(columns={'Neighbourhood':'Neighborhood'}) #['Neighbourhood','Latitude','Longitude']
result = toronto_grouped.join(df_codes_join.set_index('Neighborhood'), on='Neighborhood')
#result = pd.concat([toronto_grouped,df_codes_join],axis=1, join='inner', on='Neighborhood')
result

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Postcode,Borough,Postal Code,Latitude,Longitude
0,Agincourt,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0000,0.000000,0.000000,0.000000,M1S,Scarborough,M1S,43.794200,-79.262029
1,"Albion Gardens, Thistletown, Beaumond Heights,...",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0000,0.000000,0.000000,0.000000,M9V,Etobicoke,M9V,43.739416,-79.588437
2,"Bathurst Manor, Wilson Heights, Downsview North",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0000,0.000000,0.000000,0.000000,M3H,North York,M3H,43.754328,-79.442259
3,Bayview Village,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0000,0.000000,0.000000,0.000000,M2K,North York,M2K,43.786947,-79.385975
4,Berczy Park,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0000,0.000000,0.000000,0.000000,M5E,Downtown Toronto,M5E,43.644771,-79.373306
5,"Bloordale Gardens, Markland Wood, Old Burnhamt...",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0000,0.000000,0.000000,0.000000,M9C,Etobicoke,M9C,43.643515,-79.577201
6,Business reply mail Processing Centre969 Eastern,0.058824,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0000,0.000000,0.000000,0.000000,M7Y,East Toronto,M7Y,43.662744,-79.321558
7,"CN Tower, Harbourfront West, Bathurst Quay, Is...",0.000000,0.00,0.000000,0.000000,0.071429,0.071429,0.071429,0.142857,0.142857,...,0.000000,0.0000,0.000000,0.000000,0.000000,M5V,Downtown Toronto,M5V,43.628947,-79.394420
8,"Cabbagetown, St. James Town",0.020833,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0000,0.000000,0.000000,0.000000,M4X,Downtown Toronto,M4X,43.667967,-79.367675
9,Caledonia-Fairbanks,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0000,0.000000,0.000000,0.166667,M6E,York,M6E,43.689026,-79.453512


In [43]:
toronto_merged = result

# add clustering labels
toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!
toronto_merged.columns.values



array(['Neighborhood', 'Yoga Studio', 'Accessories Store',
       'Adult Boutique', 'Afghan Restaurant', 'Airport',
       'Airport Food Court', 'Airport Gate', 'Airport Lounge',
       'Airport Service', 'Airport Terminal', 'American Restaurant',
       'Antique Shop', 'Aquarium', 'Arcade', 'Arepa Restaurant',
       'Art Gallery', 'Art Museum', 'Arts & Crafts Store',
       'Asian Restaurant', 'Athletics & Sports', 'Auto Garage',
       'Auto Workshop', 'BBQ Joint', 'Baby Store', 'Bagel Shop', 'Bakery',
       'Bank', 'Bar', 'Baseball Field', 'Baseball Stadium',
       'Basketball Court', 'Basketball Stadium', 'Beach', 'Beer Bar',
       'Beer Store', 'Belgian Restaurant', 'Bike Shop', 'Bistro',
       'Board Shop', 'Boat or Ferry', 'Bookstore', 'Boutique',
       'Bowling Alley', 'Brazilian Restaurant', 'Breakfast Spot',
       'Brewery', 'Bridal Shop', 'Bubble Tea Shop', 'Building',
       'Burger Joint', 'Burrito Place', 'Bus Line', 'Bus Station',
       'Bus Stop', 'Business Serv

In [44]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### 5. Examine Clusters

In [49]:
## Cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[0] + list(range(toronto_merged.shape[1]-10, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
49,"Malvern, Rouge",Print Shop,Fast Food Restaurant,Women's Store,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run


In [46]:
## Cluster 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[0] + list(range(toronto_merged.shape[1]-10, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Clothing Store,Lounge,Skating Rink,Breakfast Spot,Women's Store,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
1,"Albion Gardens, Thistletown, Beaumond Heights,...",Grocery Store,Fried Chicken Joint,Pharmacy,Pizza Place,Sandwich Place,Coffee Shop,Fast Food Restaurant,Beer Store,General Entertainment,Cuban Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Frozen Yogurt Shop,Pharmacy,Deli / Bodega,Bank,Shopping Mall,Fried Chicken Joint,Sushi Restaurant,Pizza Place,Pet Store
3,Bayview Village,Bank,Japanese Restaurant,Chinese Restaurant,Café,Women's Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
4,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Steakhouse,Farmers Market,Beer Bar,Cheese Shop,Café,Seafood Restaurant,Restaurant
5,"Bloordale Gardens, Markland Wood, Old Burnhamt...",Beer Store,Liquor Store,Pharmacy,Pizza Place,Shopping Plaza,Café,Discount Store,Department Store,Dessert Shop,Dim Sum Restaurant
6,Business reply mail Processing Centre969 Eastern,Yoga Studio,Butcher,Light Rail Station,Brewery,Spa,Farmers Market,Fast Food Restaurant,Restaurant,Burrito Place,Recording Studio
7,"CN Tower, Harbourfront West, Bathurst Quay, Is...",Airport Terminal,Airport Service,Airport Lounge,Boat or Ferry,Airport Gate,Plane,Sculpture Garden,Airport Food Court,Harbor / Marina,Airport
8,"Cabbagetown, St. James Town",Coffee Shop,Restaurant,Bakery,Pub,Chinese Restaurant,Indian Restaurant,Park,Italian Restaurant,Pizza Place,Café
10,Canada Post Gateway Processing Centre,Coffee Shop,Hotel,Fried Chicken Joint,Mediterranean Restaurant,Burrito Place,Middle Eastern Restaurant,Sandwich Place,American Restaurant,Gym / Fitness Center,Dog Run


In [50]:
### Cluster 3
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[0] + list(range(toronto_merged.shape[1]-10, toronto_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Caledonia-Fairbanks,Park,Women's Store,Fast Food Restaurant,Market,Pharmacy,German Restaurant,General Travel,Drugstore,Gluten-free Restaurant,Donut Shop
26,"Downsview East, CFB Toronto",Park,Bus Stop,Airport,Women's Store,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
29,East Toronto,Park,Furniture / Home Store,Convenience Store,Women's Store,Discount Store,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Dog Run
31,"Forest Hill North, Forest Hill West",Park,Jewelry Store,Sushi Restaurant,Trail,Dumpling Restaurant,Eastern European Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dance Studio
46,Lawrence Park,Bus Line,Park,Swim School,Dim Sum Restaurant,Discount Store,Deli / Bodega,Department Store,Dessert Shop,Diner,Dog Run
52,"Milliken, L'Amoreaux East, Steeles East, Aginc...",Coffee Shop,Park,Playground,Doner Restaurant,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
53,"Moore Park, Summerhill East",Park,Tennis Court,Playground,Restaurant,Dim Sum Restaurant,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop
57,"North Park, Maple Leaf Park, Upwood Park",Construction & Landscaping,Park,Basketball Court,Bakery,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
63,Parkwoods,Park,Fast Food Restaurant,Food & Drink Shop,Women's Store,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
67,Rosedale,Park,Trail,Playground,Discount Store,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner


In [51]:
#### Cluster 4
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[0] + list(range(toronto_merged.shape[1]-10, toronto_merged.shape[1]))]]



Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
99,"York Mills, Silver Hills",Cafeteria,Women's Store,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Doner Restaurant,Dance Studio


In [52]:
#### Cluster 5
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[0] + list(range(toronto_merged.shape[1]-10, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
50,"Martin Grove, Cloverdale, Princess Gardens, Is...",Bank,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Women's Store,Deli / Bodega
