# Segmenting and Clustering Neighborhoods in Toronto

## Part 1.

In [1]:
import pandas as pd

In [2]:
toronto_wiki_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

### Load the data into Pandas

In [3]:
df_toronto = pd.read_html(toronto_wiki_url)[0]
df_toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Clean the data

Drop the unassigned boroughs


In [4]:
df_na= df_toronto[df_toronto.Borough != 'Not assigned']
df_na.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


If neighborhood is not assigned, make the neighborhood the same as the borough

In [5]:
df_na.loc[df_na.Neighbourhood == 'Not assigned', 'Neighbourhood'] = df_na.Borough
df_na.head(15)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


Combine rows so that each borough has all of the neighborhoods listed

In [6]:
df = df_na.groupby(['Postcode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()
df.head(20)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [7]:
df.shape

(103, 3)

## Part 2.

### Example use of geocoder

In [7]:
#!pip install geocoder
import geocoder # import geocoder

'''
# initialize your variable to None
lat_lng_coords = None

# loop until you get the coordinates
while(lat_lng_coords is None):
  g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
  lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1] '''

"\n# initialize your variable to None\nlat_lng_coords = None\n\n# loop until you get the coordinates\nwhile(lat_lng_coords is None):\n  g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))\n  lat_lng_coords = g.latlng\n\nlatitude = lat_lng_coords[0]\nlongitude = lat_lng_coords[1] "

### Create function to get latitude and longiture using ARCGIS

In [8]:
def post_latlong(postal_code):
    coord = None
    while(coord is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        coord = g.latlng
        return coord

In [9]:
post_latlong('M5G')

[43.65609081300005, -79.38492999999994]

Loop through dataframe to retrieve each latitude and longitude. Add each to a list.

In [10]:
code_list = []
for i in df.index:
    code = df.loc[i,'Postcode']
    arc_ll = post_latlong(code)
    code_list.append(arc_ll)

In [11]:
print(code_list)

[[43.811525000000074, -79.19551721399995], [43.78573000000006, -79.15874999999994], [43.76569000000006, -79.17525603599995], [43.76835912100006, -79.21758999999997], [43.76968799900004, -79.23943999999995], [43.74312500000008, -79.23174973599998], [43.726244585000074, -79.26366999999993], [43.71313321100007, -79.28505499999994], [43.72357500000004, -79.23497617799995], [43.69666500000005, -79.26016331599999], [43.759975000000054, -79.26897402899993], [43.750710464000065, -79.30055999999996], [43.79394000000008, -79.26798280099996], [43.78472500000004, -79.29904659999994], [43.817810000000065, -79.28024362199994], [43.80088094900003, -79.32073999999994], [43.83421500000003, -79.21670085099998], [43.80284500000005, -79.35623615099996], [43.780880000000025, -79.34779577599994], [43.781015000000025, -79.38054242199996], [43.75719200000003, -79.37986499999994], [43.79147500000005, -79.41360487299994], [43.76816500000007, -79.40741984599998], [43.74785500000007, -79.40006223799998], [43.7776

Create dataframe with all of the latitude and longitude values

In [12]:
ll = pd.DataFrame(code_list,columns=['Latitude','Longitude'])
ll.head()

Unnamed: 0,Latitude,Longitude
0,43.811525,-79.195517
1,43.78573,-79.15875
2,43.76569,-79.175256
3,43.768359,-79.21759
4,43.769688,-79.23944


### Add the latitude and longitude dataframe to the original dataframe

In [13]:
df_codes = pd.concat([df, ll], axis=1)
df_codes.head(20)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.78573,-79.15875
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76569,-79.175256
3,M1G,Scarborough,Woburn,43.768359,-79.21759
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944
5,M1J,Scarborough,Scarborough Village,43.743125,-79.23175
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.726245,-79.26367
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.713133,-79.285055
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.723575,-79.234976
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.696665,-79.260163


In [14]:
df_codes.shape

(103, 5)

## Part 3.

Download dependencies

In [15]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Libraries imported.


### Create map of Toronto with postal codes pinned

In [16]:
# Get latitude and longitude of Toronto
tor = geocoder.arcgis('Toronto, Ontario')

# Make map
map_toronto = folium.Map(location = [tor.lat,tor.lng], zoom_start=10)

# Add markers
for lat, lng, borough, neighborhood, postcode in zip(df_codes['Latitude'], df_codes['Longitude'], df_codes['Borough'], df_codes['Neighbourhood'], df_codes['Postcode']):
    label = "Postcode: {} | Borough: {} | Neighborhood(s): {}".format(postcode, borough, neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
map_toronto

### Create a map for one borough

In [17]:
df_codes['Borough'].value_counts()

North York          24
Downtown Toronto    18
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
York                 5
East York            5
East Toronto         5
Queen's Park         1
Mississauga          1
Name: Borough, dtype: int64

Let's map the borough with the most postal codes, North York

In [18]:
df_ny = df_codes.loc[df_codes['Borough']=='North York'].reset_index(drop=True)
df_ny

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M2H,North York,Hillcrest Village,43.802845,-79.356236
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.78088,-79.347796
2,M2K,North York,Bayview Village,43.781015,-79.380542
3,M2L,North York,"Silver Hills, York Mills",43.757192,-79.379865
4,M2M,North York,"Newtonbrook, Willowdale",43.791475,-79.413605
5,M2N,North York,Willowdale South,43.768165,-79.40742
6,M2P,North York,York Mills West,43.747855,-79.400062
7,M2R,North York,Willowdale West,43.777695,-79.445797
8,M3A,North York,Parkwoods,43.75244,-79.329271
9,M3B,North York,Don Mills North,43.749195,-79.361905


In [58]:
# Get latitude and longitude for North York
nyll = geocoder.arcgis('North York, Ontario')
nyll.latlng

[43.768260000000055, -79.41262999999998]

In [59]:
# Make map
map_ny = folium.Map(location = [nyll.lat,nyll.lng], zoom_start=11)

# Add markers
for lat, lng, borough, neighborhood, postcode in zip(df_ny['Latitude'], df_ny['Longitude'], df_ny['Borough'], df_ny['Neighbourhood'], df_ny['Postcode']):
    label = "Postcode: {} | Neighborhood(s): {}".format(postcode, neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_ny)  
map_ny

### Create a map for top 3 boroughs with most postal codes

Make dataframe with the top 3 boroughs

In [21]:
df_dt = df_codes.loc[df_codes['Borough']=='Downtown Toronto'].reset_index(drop=True)
df_sb = df_codes.loc[df_codes['Borough']=='Scarborough'].reset_index(drop=True)

In [22]:
# Make map of Toronto
map_3 = folium.Map(location = [tor.lat,tor.lng], zoom_start=10)
map_3

In [23]:
from folium import IFrame

# Add markers
for lat, lng, borough, neighborhood, postcode in zip(df_ny['Latitude'], df_ny['Longitude'], df_ny['Borough'], df_ny['Neighbourhood'], df_ny['Postcode']):
    label = "Postcode: {} | Neighborhood(s): {}".format(postcode, neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_opacity=0.7,
        parse_html=False).add_to(map_3)  

for lat, lng, borough, neighborhood, postcode in zip(df_dt['Latitude'], df_dt['Longitude'], df_dt['Borough'], df_dt['Neighbourhood'], df_dt['Postcode']):
    label = "Postcode: {} | Neighborhood(s): {}".format(postcode, neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='orange',
        fill=True,
        fill_opacity=0.7,
        parse_html=False).add_to(map_3)  
    
for lat, lng, borough, neighborhood, postcode in zip(df_sb['Latitude'], df_sb['Longitude'], df_sb['Borough'], df_sb['Neighbourhood'], df_sb['Postcode']):
    label = "Postcode: {} | Neighborhood(s): {}".format(postcode, neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='yellow',
        fill=True,
        fill_opacity=0.7,
        parse_html=False).add_to(map_3)  
map_3

In [24]:
# Add legend
# Source: https://nbviewer.jupyter.org/gist/talbertc-usgs/18f8901fc98f109f2b71156cf3ac81cd
from branca.element import Template, MacroElement

template = """
{% macro html(this, kwargs) %}

<!doctype html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>jQuery UI Draggable - Default functionality</title>
  <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">

  <script src="https://code.jquery.com/jquery-1.12.4.js"></script>
  <script src="https://code.jquery.com/ui/1.12.1/jquery-ui.js"></script>
  
  <script>
  $( function() {
    $( "#maplegend" ).draggable({
                    start: function (event, ui) {
                        $(this).css({
                            right: "auto",
                            top: "auto",
                            bottom: "auto"
                        });
                    }
                });
});

  </script>
</head>
<body>

 
<div id='maplegend' class='maplegend' 
    style='position: absolute; z-index:9999; border:2px solid grey; background-color:rgba(255, 255, 255, 0.8);
     border-radius:6px; padding: 10px; font-size:14px; right: 20px; bottom: 20px;'>
     
<div class='legend-title'>Legend</div>
<div class='legend-scale'>
  <ul class='legend-labels'>
    <li><span style='background:red;opacity:0.7;'></span>North York</li>
    <li><span style='background:orange;opacity:0.7;'></span>Downtown Toronto</li>
    <li><span style='background:yellow;opacity:0.7;'></span>Scarborough</li>

  </ul>
</div>
</div>
 
</body>
</html>

<style type='text/css'>
  .maplegend .legend-title {
    text-align: left;
    margin-bottom: 5px;
    font-weight: bold;
    font-size: 90%;
    }
  .maplegend .legend-scale ul {
    margin: 0;
    margin-bottom: 5px;
    padding: 0;
    float: left;
    list-style: none;
    }
  .maplegend .legend-scale ul li {
    font-size: 80%;
    list-style: none;
    margin-left: 0;
    line-height: 18px;
    margin-bottom: 2px;
    }
  .maplegend ul.legend-labels li span {
    display: block;
    float: left;
    height: 16px;
    width: 30px;
    margin-right: 5px;
    margin-left: 0;
    border: 1px solid #999;
    }
  .maplegend .legend-source {
    font-size: 80%;
    color: #777;
    clear: both;
    }
  .maplegend a {
    color: #777;
    }
</style>
{% endmacro %}"""

macro = MacroElement()
macro._template = Template(template)

map_3.get_root().add_child(macro)

map_3

### Use Foursquare data

In [25]:
#Define Foursquare Credentials and Version (hidden in next cell)

In [26]:
# @hidden cell
CLIENT_ID = 'T4UAO5LOHX2ORNBOPBLSVXDIIXYTCBUJD24EZQKEXMVZNDWI' # your Foursquare ID
CLIENT_SECRET = 'YHVWQ3ALAGL3KSUDLAFLWIH3L0UZ2XPDZHC2ZGVTH2INQZ5R' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: T4UAO5LOHX2ORNBOPBLSVXDIIXYTCBUJD24EZQKEXMVZNDWI
CLIENT_SECRET:YHVWQ3ALAGL3KSUDLAFLWIH3L0UZ2XPDZHC2ZGVTH2INQZ5R


In [27]:
ny = 'North York'
ny_lat = df_ny.loc[0, 'Latitude']
ny_long = df_ny.loc[0, 'Longitude']
print('Latitude and longitude values of {} are {}, {}.'.format(ny, 
                                                               ny_lat, 
                                                               ny_long))


Latitude and longitude values of North York are 43.80284500000005, -79.35623615099996.


### Get top venues in North York within 1000m radius

In [28]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 1000 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    ny_lat, 
    ny_long, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=T4UAO5LOHX2ORNBOPBLSVXDIIXYTCBUJD24EZQKEXMVZNDWI&client_secret=YHVWQ3ALAGL3KSUDLAFLWIH3L0UZ2XPDZHC2ZGVTH2INQZ5R&v=20180605&ll=43.80284500000005,-79.35623615099996&radius=1000&limit=100'

In [29]:
# Send GET request and view results
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d77fa55d69ed0002cea18d3'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 22,
  'suggestedBounds': {'ne': {'lat': 43.81184500900006,
    'lng': -79.34378931285491},
   'sw': {'lat': 43.793844991000036, 'lng': -79.36868298914501}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bd9842be914a593adbd56fa',
       'name': 'Tastee',
       'location': {'address': '3913 Don Mills Rd.',
        'crossStreet': 'at Cliffwood Rd.',
        'lat': 43.80772211146167,
        'lng': -79.35679781099806,
        'labeledLatLngs': [{'label': 'display',
     

The radius was set to a large number as the area does not seem to have very many venues close by.

In [30]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [31]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Tastee,Bakery,43.807722,-79.356798
1,Starbucks,Coffee Shop,43.795522,-79.350193
2,TD Canada Trust,Bank,43.794204,-79.353023
3,Woodbrooke Estate,Residential Building (Apartment / Condo),43.802067,-79.354347
4,Subway,Sandwich Place,43.794171,-79.353576


In [32]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

22 venues were returned by Foursquare.


### Find top venues around all the neighborhoods in North York

In [33]:
# Create function to get venues

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [36]:
# Create new dataframe with venues near North York

ny_venues = getNearbyVenues(names=df_ny['Neighbourhood'],
                                   latitudes=df_ny['Latitude'],
                                   longitudes=df_ny['Longitude']
                                  )

Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Bedford Park, Lawrence Manor East
Lawrence Heights, Lawrence Manor
Glencairn
Downsview, North Park, Upwood Park
Humber Summit
Emery, Humberlea


In [37]:
print(ny_venues.shape)
ny_venues.head()

(286, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Hillcrest Village,43.802845,-79.356236,Woodbrooke Estate,43.802067,-79.354347,Residential Building (Apartment / Condo)
1,Hillcrest Village,43.802845,-79.356236,Duncan Creek Park,43.805539,-79.360695,Dog Run
2,"Fairview, Henry Farm, Oriole",43.78088,-79.347796,The LEGO Store,43.778207,-79.343483,Toy / Game Store
3,"Fairview, Henry Farm, Oriole",43.78088,-79.347796,SilverCity Fairview Mall Cinemas,43.778681,-79.344085,Movie Theater
4,"Fairview, Henry Farm, Oriole",43.78088,-79.347796,Purdys Chocolatier,43.77815,-79.344152,Candy Store


In [38]:
# Number of venues per neighborhood
ny_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bayview Village,5,5,5,5,5,5
"Bedford Park, Lawrence Manor East",19,19,19,19,19,19
"CFB Toronto, Downsview East",5,5,5,5,5,5
Don Mills North,5,5,5,5,5,5
Downsview Central,3,3,3,3,3,3
Downsview Northwest,20,20,20,20,20,20
Downsview West,2,2,2,2,2,2
"Downsview, North Park, Upwood Park",3,3,3,3,3,3
"Emery, Humberlea",4,4,4,4,4,4
"Fairview, Henry Farm, Oriole",57,57,57,57,57,57


In [39]:
# Number of unique categories
print('There are {} uniques categories.'.format(len(ny_venues['Venue Category'].unique())))

There are 110 uniques categories.


### Analyze each neighborhood

In [40]:
# one hot encoding
ny_onehot = pd.get_dummies(ny_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
ny_onehot['Neighborhood'] = ny_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [ny_onehot.columns[-1]] + list(ny_onehot.columns[:-1])
ny_onehot = ny_onehot[fixed_columns]

ny_onehot.head()

Unnamed: 0,Neighborhood,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Bakery,Bank,Bar,Basketball Court,Beer Store,Bookstore,Boutique,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Business Service,Butcher,Café,Candy Store,Caribbean Restaurant,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store,Falafel Restaurant,Fast Food Restaurant,Food & Drink Shop,Food Court,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Golf Driving Range,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Health & Beauty Service,Hookah Bar,Hotel,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kitchen Supply Store,Korean Restaurant,Latin American Restaurant,Leather Goods Store,Lingerie Store,Liquor Store,Lounge,Massage Studio,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Music Venue,Nail Salon,Nightclub,Park,Pet Store,Pharmacy,Pizza Place,Platform,Plaza,Pub,Ramen Restaurant,Residential Building (Apartment / Condo),Restaurant,Sandwich Place,Shoe Store,Shopping Mall,Skating Rink,Smoothie Shop,Soccer Field,Spa,Speakeasy,Sporting Goods Shop,Sports Club,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,Hillcrest Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Hillcrest Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Fairview, Henry Farm, Oriole",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
3,"Fairview, Henry Farm, Oriole",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Fairview, Henry Farm, Oriole",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [41]:
ny_onehot.shape

(286, 111)

In [42]:
# Group rows by neighborhood. Take mean of the frequency of the occurence of each category.
ny_grouped = ny_onehot.groupby('Neighborhood').mean().reset_index()
ny_grouped

Unnamed: 0,Neighborhood,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Bakery,Bank,Bar,Basketball Court,Beer Store,Bookstore,Boutique,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Business Service,Butcher,Café,Candy Store,Caribbean Restaurant,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store,Falafel Restaurant,Fast Food Restaurant,Food & Drink Shop,Food Court,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Golf Driving Range,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Health & Beauty Service,Hookah Bar,Hotel,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kitchen Supply Store,Korean Restaurant,Latin American Restaurant,Leather Goods Store,Lingerie Store,Liquor Store,Lounge,Massage Studio,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Music Venue,Nail Salon,Nightclub,Park,Pet Store,Pharmacy,Pizza Place,Platform,Plaza,Pub,Ramen Restaurant,Residential Building (Apartment / Condo),Restaurant,Sandwich Place,Shoe Store,Shopping Mall,Skating Rink,Smoothie Shop,Soccer Field,Spa,Speakeasy,Sporting Goods Shop,Sports Club,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0
1,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.105263,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.105263,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.052632,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.052632,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"CFB Toronto, Downsview East",0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Don Mills North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Downsview Central,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Downsview Northwest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.05,0.1,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.1,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0
6,Downsview West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Downsview, North Park, Upwood Park",0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Emery, Humberlea",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Fairview, Henry Farm, Oriole",0.0,0.0,0.0,0.017544,0.017544,0.017544,0.0,0.0,0.0,0.0,0.017544,0.0,0.017544,0.017544,0.0,0.0,0.0,0.0,0.017544,0.0,0.017544,0.0,0.192982,0.052632,0.0,0.0,0.017544,0.017544,0.017544,0.0,0.017544,0.0,0.0,0.0,0.017544,0.0,0.087719,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.017544,0.017544,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.017544,0.0,0.017544,0.017544,0.017544,0.0,0.017544,0.0,0.017544,0.0,0.0,0.0,0.017544,0.0,0.035088,0.0,0.017544,0.017544,0.0,0.017544,0.0,0.017544,0.052632


In [43]:
ny_grouped.shape

(23, 111)

In [44]:
# Print each neighborhood with top 5 most common venues
num_top_venues = 5

for hood in ny_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = ny_grouped[ny_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bayview Village----
                        venue  freq
0          Golf Driving Range   0.2
1                     Dog Run   0.2
2  Construction & Landscaping   0.2
3                       Trail   0.2
4                        Park   0.2


----Bedford Park, Lawrence Manor East----
                venue  freq
0  Italian Restaurant  0.11
1         Coffee Shop  0.11
2         Sports Club  0.05
3        Liquor Store  0.05
4   Indian Restaurant  0.05


----CFB Toronto, Downsview East----
         venue  freq
0      Airport   0.2
1         Park   0.2
2  Coffee Shop   0.2
3   Shoe Store   0.2
4   Food Court   0.2


----Don Mills North----
          venue  freq
0          Park   0.2
1   Coffee Shop   0.2
2  Soccer Field   0.2
3  Burger Joint   0.2
4           Spa   0.2


----Downsview Central----
                     venue  freq
0         Business Service  0.33
1                     Park  0.33
2  Health & Beauty Service  0.33
3                  Airport  0.00
4              Pizza Place  0.00


In [45]:
# Sort venues into descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [46]:
# New dataframe with top 10 venues for each neighborhood
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = ny_grouped['Neighborhood']

for ind in np.arange(ny_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ny_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bayview Village,Construction & Landscaping,Trail,Park,Dog Run,Golf Driving Range,Women's Store,Food Court,Department Store,Dessert Shop,Discount Store
1,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Pharmacy,Juice Bar,Sandwich Place,Café,Butcher,Fast Food Restaurant,Liquor Store,Pub
2,"CFB Toronto, Downsview East",Airport,Food Court,Park,Coffee Shop,Shoe Store,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant
3,Don Mills North,Spa,Park,Coffee Shop,Burger Joint,Soccer Field,Women's Store,Department Store,Dessert Shop,Discount Store,Dog Run
4,Downsview Central,Park,Health & Beauty Service,Business Service,Women's Store,Food Court,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant


### Cluster neighborhoods

k-means clustering

In [47]:
# set number of clusters
kclusters = 5

ny_grouped_clustering = ny_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ny_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 4, 0, 0, 0, 4, 0, 0, 0, 4], dtype=int32)

In [50]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)



In [74]:
ny_merged = df_ny

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
ny_merged = ny_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

ny_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M2H,North York,Hillcrest Village,43.802845,-79.356236,2.0,Residential Building (Apartment / Condo),Dog Run,Women's Store,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Discount Store,Eastern European Restaurant,Electronics Store
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.78088,-79.347796,4.0,Clothing Store,Fast Food Restaurant,Women's Store,Coffee Shop,Tea Room,Cosmetics Shop,Burrito Place,Candy Store,Greek Restaurant,Chinese Restaurant
2,M2K,North York,Bayview Village,43.781015,-79.380542,4.0,Construction & Landscaping,Trail,Park,Dog Run,Golf Driving Range,Women's Store,Food Court,Department Store,Dessert Shop,Discount Store
3,M2L,North York,"Silver Hills, York Mills",43.757192,-79.379865,1.0,Music Venue,Women's Store,French Restaurant,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store,Falafel Restaurant
4,M2M,North York,"Newtonbrook, Willowdale",43.791475,-79.413605,4.0,Café,Korean Restaurant,Middle Eastern Restaurant,Pizza Place,Shopping Mall,Fast Food Restaurant,Ramen Restaurant,Fried Chicken Joint,Sporting Goods Shop,Supermarket


In [75]:
ny_merged

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M2H,North York,Hillcrest Village,43.802845,-79.356236,2.0,Residential Building (Apartment / Condo),Dog Run,Women's Store,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Discount Store,Eastern European Restaurant,Electronics Store
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.78088,-79.347796,4.0,Clothing Store,Fast Food Restaurant,Women's Store,Coffee Shop,Tea Room,Cosmetics Shop,Burrito Place,Candy Store,Greek Restaurant,Chinese Restaurant
2,M2K,North York,Bayview Village,43.781015,-79.380542,4.0,Construction & Landscaping,Trail,Park,Dog Run,Golf Driving Range,Women's Store,Food Court,Department Store,Dessert Shop,Discount Store
3,M2L,North York,"Silver Hills, York Mills",43.757192,-79.379865,1.0,Music Venue,Women's Store,French Restaurant,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store,Falafel Restaurant
4,M2M,North York,"Newtonbrook, Willowdale",43.791475,-79.413605,4.0,Café,Korean Restaurant,Middle Eastern Restaurant,Pizza Place,Shopping Mall,Fast Food Restaurant,Ramen Restaurant,Fried Chicken Joint,Sporting Goods Shop,Supermarket
5,M2N,North York,Willowdale South,43.768165,-79.40742,4.0,Coffee Shop,Ramen Restaurant,Fast Food Restaurant,Café,Pet Store,Juice Bar,Middle Eastern Restaurant,Discount Store,Pharmacy,Pizza Place
6,M2P,North York,York Mills West,43.747855,-79.400062,4.0,Convenience Store,Speakeasy,Park,Bank,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store
7,M2R,North York,Willowdale West,43.777695,-79.445797,4.0,Intersection,Bus Line,Eastern European Restaurant,Park,Coffee Shop,Convenience Store,Bakery,Bank,Frozen Yogurt Shop,Discount Store
8,M3A,North York,Parkwoods,43.75244,-79.329271,0.0,Park,Food & Drink Shop,Women's Store,French Restaurant,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store
9,M3B,North York,Don Mills North,43.749195,-79.361905,0.0,Spa,Park,Coffee Shop,Burger Joint,Soccer Field,Women's Store,Department Store,Dessert Shop,Discount Store,Dog Run


In [78]:
# Remove NaN
ny_merged = ny_merged.dropna(axis=0, how='any').reset_index()
ny_merged['Cluster Labels'] = ny_merged['Cluster Labels'].astype('int64')

In [79]:
ny_merged

Unnamed: 0,index,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,M2H,North York,Hillcrest Village,43.802845,-79.356236,2,Residential Building (Apartment / Condo),Dog Run,Women's Store,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Discount Store,Eastern European Restaurant,Electronics Store
1,1,M2J,North York,"Fairview, Henry Farm, Oriole",43.78088,-79.347796,4,Clothing Store,Fast Food Restaurant,Women's Store,Coffee Shop,Tea Room,Cosmetics Shop,Burrito Place,Candy Store,Greek Restaurant,Chinese Restaurant
2,2,M2K,North York,Bayview Village,43.781015,-79.380542,4,Construction & Landscaping,Trail,Park,Dog Run,Golf Driving Range,Women's Store,Food Court,Department Store,Dessert Shop,Discount Store
3,3,M2L,North York,"Silver Hills, York Mills",43.757192,-79.379865,1,Music Venue,Women's Store,French Restaurant,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store,Falafel Restaurant
4,4,M2M,North York,"Newtonbrook, Willowdale",43.791475,-79.413605,4,Café,Korean Restaurant,Middle Eastern Restaurant,Pizza Place,Shopping Mall,Fast Food Restaurant,Ramen Restaurant,Fried Chicken Joint,Sporting Goods Shop,Supermarket
5,5,M2N,North York,Willowdale South,43.768165,-79.40742,4,Coffee Shop,Ramen Restaurant,Fast Food Restaurant,Café,Pet Store,Juice Bar,Middle Eastern Restaurant,Discount Store,Pharmacy,Pizza Place
6,6,M2P,North York,York Mills West,43.747855,-79.400062,4,Convenience Store,Speakeasy,Park,Bank,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store
7,7,M2R,North York,Willowdale West,43.777695,-79.445797,4,Intersection,Bus Line,Eastern European Restaurant,Park,Coffee Shop,Convenience Store,Bakery,Bank,Frozen Yogurt Shop,Discount Store
8,8,M3A,North York,Parkwoods,43.75244,-79.329271,0,Park,Food & Drink Shop,Women's Store,French Restaurant,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store
9,9,M3B,North York,Don Mills North,43.749195,-79.361905,0,Spa,Park,Coffee Shop,Burger Joint,Soccer Field,Women's Store,Department Store,Dessert Shop,Discount Store,Dog Run


Show clusters on map

In [80]:
# create map
map_clusters = folium.Map(location=[nyll.lat, nyll.lng], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(ny_merged['Latitude'], ny_merged['Longitude'], ny_merged['Neighbourhood'], ny_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### All of the clusters have been placed on the map. Each color is a different cluster.

In [81]:
# Cluster 0
ny_merged.loc[ny_merged['Cluster Labels'] == 0, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Postcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,M3A,-79.329271,0,Park,Food & Drink Shop,Women's Store,French Restaurant,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store
9,M3B,-79.361905,0,Spa,Park,Coffee Shop,Burger Joint,Soccer Field,Women's Store,Department Store,Dessert Shop,Discount Store,Dog Run
12,M3K,-79.46732,0,Airport,Food Court,Park,Coffee Shop,Shoe Store,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant
13,M3L,-79.505027,0,Hotel,Park,Women's Store,French Restaurant,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store
14,M3M,-79.493151,0,Park,Health & Beauty Service,Business Service,Women's Store,Food Court,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant
20,M6L,-79.488305,0,Park,Bakery,Basketball Court,Women's Store,Fried Chicken Joint,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store
22,M9M,-79.537522,0,Coffee Shop,Park,Nightclub,Women's Store,Deli / Bodega,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store


In [86]:
# Cluster 1
ny_merged.loc[ny_merged['Cluster Labels'] == 1, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Postcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,M2L,-79.379865,1,Music Venue,Women's Store,French Restaurant,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store,Falafel Restaurant


In [87]:
# Cluster 2
ny_merged.loc[ny_merged['Cluster Labels'] == 2, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Postcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M2H,-79.356236,2,Residential Building (Apartment / Condo),Dog Run,Women's Store,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Discount Store,Eastern European Restaurant,Electronics Store


In [88]:
# Cluster 3
ny_merged.loc[ny_merged['Cluster Labels'] == 3, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Postcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,M9L,-79.556852,3,Sporting Goods Shop,Furniture / Home Store,French Restaurant,Deli / Bodega,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store


In [89]:
# Cluster 4
ny_merged.loc[ny_merged['Cluster Labels'] == 4, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Postcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,M2J,-79.347796,4,Clothing Store,Fast Food Restaurant,Women's Store,Coffee Shop,Tea Room,Cosmetics Shop,Burrito Place,Candy Store,Greek Restaurant,Chinese Restaurant
2,M2K,-79.380542,4,Construction & Landscaping,Trail,Park,Dog Run,Golf Driving Range,Women's Store,Food Court,Department Store,Dessert Shop,Discount Store
4,M2M,-79.413605,4,Café,Korean Restaurant,Middle Eastern Restaurant,Pizza Place,Shopping Mall,Fast Food Restaurant,Ramen Restaurant,Fried Chicken Joint,Sporting Goods Shop,Supermarket
5,M2N,-79.40742,4,Coffee Shop,Ramen Restaurant,Fast Food Restaurant,Café,Pet Store,Juice Bar,Middle Eastern Restaurant,Discount Store,Pharmacy,Pizza Place
6,M2P,-79.400062,4,Convenience Store,Speakeasy,Park,Bank,Department Store,Dessert Shop,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store
7,M2R,-79.445797,4,Intersection,Bus Line,Eastern European Restaurant,Park,Coffee Shop,Convenience Store,Bakery,Bank,Frozen Yogurt Shop,Discount Store
10,M3C,-79.343453,4,Intersection,Gym,Bubble Tea Shop,Supermarket,Beer Store,Grocery Store,Coffee Shop,Eastern European Restaurant,Dog Run,French Restaurant
11,M3J,-79.487183,4,Coffee Shop,Restaurant,Pizza Place,Massage Studio,Japanese Restaurant,Caribbean Restaurant,Bank,Bar,Fast Food Restaurant,Furniture / Home Store
15,M3N,-79.51959,4,Pizza Place,Discount Store,Fast Food Restaurant,Grocery Store,Shopping Mall,Sandwich Place,Liquor Store,Caribbean Restaurant,Fried Chicken Joint,Beer Store
16,M4A,-79.31332,4,Intersection,Grocery Store,Park,Nail Salon,French Restaurant,Health & Beauty Service,Fast Food Restaurant,Department Store,Dessert Shop,Ice Cream Shop


In [90]:
# This borough does not contain many venues, so some of the clusters are individual locations.