# Segmenting and Clustering Neighborhoods in Toronto 
### Applied Data Science Capstone week 3 Peer graded assignment

Neville Yoon

In [1]:
import pandas as pd
import numpy as np
import requests
#!conda install -c conda-forge beautifulsoup4 --yes 
from bs4 import BeautifulSoup
#!conda install -c anaconda lxml --yes

# Section 1: Scrape the table from the Wikipedia page into a DataFrame

### Use BeautifulSoup to scrape the web page and convert the table into a Pandas DataFrame

In [2]:
# Read the html page into a BeautifulSoup object
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(url).text
doc = BeautifulSoup(page,'lxml')

#Extract the table and convert to a DataFrame
tables = doc.find('table')
df = pd.read_html(str(tables))[0]
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,
176,M6Z,Not assigned,
177,M7Z,Not assigned,
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


### Inspect and clean the data

In [3]:
# look for rows where Borough is 'Not assigned'
sum(df['Borough'].str.upper() == 'NOT ASSIGNED')

77

In [4]:
# remove rows where Borough is 'Not assigned'
df_filtered = df[df['Borough']!= 'Not assigned'].reset_index(drop=True)
df_filtered

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [5]:
# look for rows where Neighborhood is 'Not assigned'
sum(df_filtered['Neighborhood'].str.upper() == 'NOT ASSIGNED')

0

In [6]:
# look for rows where Neighborhod is empty
sum(df_filtered['Neighborhood'].isnull())

0

## Answer #1:

In [7]:
df_filtered.shape

(103, 3)

# Section 2: Geocode Postal Codes

### Add latitude and longitude coordinates to the postal code/neighborhood DataFrame.

First let's attempt to get latitude and longitude coordinates for the postal codes using geocoder.

In [8]:
import io
import geocoder # import geocoder

def get_coords(postal_code):
    '''Get latitude and longitude coordinates for a Toronto Postal Code'''
    print("attempting to get coordinates for {}".format(postal_code),end="")
    max_tries = 10
    lat_lng_coords = None
    i = 1
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        if i > max_tries:
            print("Failed to get coordinates",end="")
            break
        print(".",end="")
        g = geocoder.osm('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
        i += 1
    print("")
    return lat_lng_coords

In [9]:
postal_codes = df_filtered['Postal Code'].tolist()

latitudes = []
longitudes = []
for pc in postal_codes:
    coords = get_coords(pc)
    if coords:
        latitudes.append(coords[0])
        longitudes.append(coords[1])
    else:
        latitudes.append(None)
        longitudes.append(None)

attempting to get coordinates for M3A.
attempting to get coordinates for M4A..........Failed to get coordinates
attempting to get coordinates for M5A..........Failed to get coordinates
attempting to get coordinates for M6A..........Failed to get coordinates
attempting to get coordinates for M7A.
attempting to get coordinates for M9A..........Failed to get coordinates
attempting to get coordinates for M1B.
attempting to get coordinates for M3B..........Failed to get coordinates
attempting to get coordinates for M4B..........Failed to get coordinates
attempting to get coordinates for M5B..........Failed to get coordinates
attempting to get coordinates for M6B..........Failed to get coordinates
attempting to get coordinates for M9B.
attempting to get coordinates for M1C.
attempting to get coordinates for M3C.
attempting to get coordinates for M4C..........Failed to get coordinates
attempting to get coordinates for M5C..........Failed to get coordinates
attempting to get coordinates for M6

In [10]:
df_lat_lon = pd.DataFrame({'Postal Code':postal_codes, 'Latitude':latitudes,'Longitude':longitudes})
df_combined = pd.merge(df_filtered,df_lat_lon, on='Postal Code')
df_combined

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.653482,-79.383935
1,M4A,North York,Victoria Village,,
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",,
3,M6A,North York,"Lawrence Manor, Lawrence Heights",,
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.653482,-79.383935
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",,
99,M4Y,Downtown Toronto,Church and Wellesley,,
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",,
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",,


That didn't work so well. So let's just get the geo coordinates from the web.

In [11]:
url = 'https://cocl.us/Geospatial_data'
csv = requests.get(url).content
df_lat_lon = pd.read_csv(io.StringIO(csv.decode('utf-8')))
df_combined = pd.merge(df_filtered,df_lat_lon, on='Postal Code')
df_combined = df_combined.rename(columns={'Postal Code':'PostalCode'})

## Answer #2

In [12]:
df_combined

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


# Section 3: Cluster and Map Neighborhoods

### Segment and cluster Toronto neighborhoods following the approach from the New York Neighborhood Segmenting and Clustering lab.

In [13]:
#!conda install -c conda-forge folium
import folium # map rendering library
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
# Foursquare credentials will be kept in keyring and accessed via ipython_secrets
from ipython_secrets import *
import matplotlib.cm as cm
import matplotlib.colors as colors

As suggested we'll just look at boroughs whose names contain 'Toronto'.

In [14]:
df_neighborhoods = df_combined[df_combined['Borough'].str.contains('Toronto')].reset_index(drop=True)
df_neighborhoods

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


#### Map the postal codes/neighborhoods.

In [15]:
# get latitude and longitude for Toronto
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create a map of Toronto
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map for the postal codes/neighborhoods
for lat, lng, postal_code, borough, neighborhood in zip(df_neighborhoods['Latitude'], df_neighborhoods['Longitude'], df_neighborhoods['PostalCode'], df_neighborhoods['Borough'], df_neighborhoods['Neighborhood']):
    label = '{}: {} - {}'.format(postal_code, borough, neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### The map will not be visible when viewed from GitHub. You can see a screenshot of it here:

https://github.com/nevyoon/Coursera_Capstone/blob/master/segmenting_and_clustering_neighborhoods_in_toronto_map_1.png

In [16]:
# get FourSquare credentials out of keyring
FOURSQUARE_CLIENT_ID = get_secret('FOURSQUARE_CLIENT_ID')
FOURSQUARE_CLIENT_SECRET = get_secret('FOURSQUARE_CLIENT_SECRET')

#### Get the venues from FourSquare

In [17]:
#Borrow the function to get venues from FourSquare from the 
#New York Neighborhood Segmenting and Clustering lab
def getNearbyVenues(names, 
                    latitudes, 
                    longitudes, 
                    client_id, 
                    client_secret, 
                    version='20180605', 
                    radius=500, 
                    limit=100):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            client_id, 
            client_secret, 
            version, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [18]:
toronto_venues = getNearbyVenues(names=df_neighborhoods['Neighborhood'],
                                 latitudes=df_neighborhoods['Latitude'],
                                 longitudes=df_neighborhoods['Longitude'],
                                 client_id=FOURSQUARE_CLIENT_ID,
                                 client_secret=FOURSQUARE_CLIENT_SECRET
                                )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West, Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
R

In [19]:
print(toronto_venues.shape)
toronto_venues.head()

(1605, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
3,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
4,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


#### One-hot encode the venue types

In [20]:
# One hot encode the venue types
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
# add neighborhood column back to dataframe

# The venue types returned for Toronto includes 'Neighborhood'
# so there is already a Neighborhood column
# we'll need to rename it
toronto_onehot.rename(columns={'Neighborhood':'Neighborhood Venue'},inplace=True) 

# add the neighborhood column back
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood']

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]
toronto_onehot

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1600,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1601,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1602,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1603,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
# get frequencies by neighborhood
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.066667,0.066667,0.066667,0.133333,0.133333,0.133333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.016129,0.0,0.0,0.016129,0.0,0.0,0.016129


#### Build a dataframe with the top-10 venues for each neighborhood.

In [22]:
# borrowed from New York Neighborhood Segmenting and Clustering lab
def get_n_most_common_venues(row, n):
    '''return the n most common venues in a row of venue frequencies in descending order'''
    venues = row.iloc[1:]
    venues_sorted = venues.sort_values(ascending=False)
    return venues_sorted.index.values[0:n]

# derived from New York Neighborhood Segmenting and Clustering lab
def init_n_most_common_venues_dataframe(n):
    indicators = ['st','nd','rd']
    columns = ['Neighborhood']
    for i in np.arange(n):
        try:
            columns.append('{}{} Most Common Venue'.format(i+1, indicators[i]))
        except:
            columns.append('{}th Most Common Venue'.format(i+1))
    return pd.DataFrame(columns=columns)

In [23]:
toronto_top_ten_venues = init_n_most_common_venues_dataframe(10)
toronto_top_ten_venues['Neighborhood'] = toronto_grouped['Neighborhood']

for i in np.arange(len(toronto_grouped)):
    toronto_top_ten_venues.iloc[i, 1:]  = get_n_most_common_venues(toronto_grouped.iloc[i, :], 10)

toronto_top_ten_venues

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Café,Cheese Shop,Beer Bar,Restaurant,Seafood Restaurant,Pub,Creperie
1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Nightclub,Grocery Store,Stadium,Burrito Place,Restaurant,Climbing Gym,Pet Store
2,"Business reply mail Processing Centre, South C...",Yoga Studio,Pizza Place,Light Rail Station,Smoke Shop,Brewery,Spa,Burrito Place,Farmers Market,Fast Food Restaurant,Restaurant
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Airport Terminal,Boutique,Sculpture Garden,Coffee Shop,Boat or Ferry,Harbor / Marina,Bar,Airport Gate
4,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Burger Joint,Japanese Restaurant,Ice Cream Shop,Bar,Thai Restaurant,Salad Place
5,Christie,Grocery Store,Café,Park,Diner,Baby Store,Restaurant,Athletics & Sports,Italian Restaurant,Candy Store,Coffee Shop
6,Church and Wellesley,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Men's Store,Hotel,Gastropub,Café,Yoga Studio
7,"Commerce Court, Victoria Hotel",Coffee Shop,Café,Restaurant,Hotel,Gym,American Restaurant,Japanese Restaurant,Seafood Restaurant,Italian Restaurant,Deli / Bodega
8,Davisville,Dessert Shop,Sandwich Place,Pizza Place,Italian Restaurant,Café,Coffee Shop,Gym,Sushi Restaurant,Asian Restaurant,Seafood Restaurant
9,Davisville North,Park,Gym,Breakfast Spot,Hotel,Sandwich Place,Department Store,Food & Drink Shop,Yoga Studio,Diner,Dim Sum Restaurant


#### Segment the neighborhoods by venues into 5 clusters using k-means clustering.

In [24]:
# set number of clusters
kclusters = 5

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped.drop('Neighborhood',1))

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([4, 0, 0, 4, 4, 0, 4, 4, 4, 4, 0, 4, 3, 4, 4, 0, 0, 0, 3, 4, 1, 0,
       0, 4, 4, 4, 3, 2, 0, 4, 0, 4, 0, 0, 0, 0, 0, 4, 0], dtype=int32)

#### Add the cluster labels to the top-10 venues DataFrame.

In [25]:
toronto_top_ten_venues.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_top_ten_venues.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,4,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Café,Cheese Shop,Beer Bar,Restaurant,Seafood Restaurant,Pub,Creperie
1,0,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Nightclub,Grocery Store,Stadium,Burrito Place,Restaurant,Climbing Gym,Pet Store
2,0,"Business reply mail Processing Centre, South C...",Yoga Studio,Pizza Place,Light Rail Station,Smoke Shop,Brewery,Spa,Burrito Place,Farmers Market,Fast Food Restaurant,Restaurant
3,4,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Airport Terminal,Boutique,Sculpture Garden,Coffee Shop,Boat or Ferry,Harbor / Marina,Bar,Airport Gate
4,4,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Burger Joint,Japanese Restaurant,Ice Cream Shop,Bar,Thai Restaurant,Salad Place


#### Add PostalCode, Borough, Latitude and Longitude.

In [26]:
df_merged = df_neighborhoods.join(toronto_top_ten_venues.set_index('Neighborhood'), on='Neighborhood')
df_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,4,Coffee Shop,Bakery,Park,Pub,Theater,Breakfast Spot,Café,Restaurant,Beer Store,Spa
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,4,Coffee Shop,Music Venue,Beer Bar,Smoothie Shop,Sandwich Place,Burrito Place,Café,Park,College Auditorium,College Cafeteria
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,4,Clothing Store,Coffee Shop,Cosmetics Shop,Restaurant,Café,Bubble Tea Shop,Japanese Restaurant,Italian Restaurant,Middle Eastern Restaurant,Hotel
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,4,Café,Coffee Shop,Cocktail Bar,Gastropub,American Restaurant,Italian Restaurant,Restaurant,Beer Bar,Clothing Store,Moroccan Restaurant
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Trail,Neighborhood Venue,Pub,Health Food Store,Distribution Center,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Yoga Studio
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,4,Coffee Shop,Cocktail Bar,Bakery,Café,Cheese Shop,Beer Bar,Restaurant,Seafood Restaurant,Pub,Creperie
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,4,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Burger Joint,Japanese Restaurant,Ice Cream Shop,Bar,Thai Restaurant,Salad Place
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564,0,Grocery Store,Café,Park,Diner,Baby Store,Restaurant,Athletics & Sports,Italian Restaurant,Candy Store,Coffee Shop
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,4,Coffee Shop,Café,Restaurant,Deli / Bodega,Gym,Hotel,Clothing Store,Thai Restaurant,Sushi Restaurant,Concert Hall
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,0,Pharmacy,Bakery,Grocery Store,Pool,Brewery,Café,Bar,Bank,Supermarket,Middle Eastern Restaurant


#### Reset cluster labels to start from 1 instead of 0.

In [27]:
df_merged['Cluster Labels'] = df_merged['Cluster Labels'] + 1
df_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,5,Coffee Shop,Bakery,Park,Pub,Theater,Breakfast Spot,Café,Restaurant,Beer Store,Spa
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,5,Coffee Shop,Music Venue,Beer Bar,Smoothie Shop,Sandwich Place,Burrito Place,Café,Park,College Auditorium,College Cafeteria
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,5,Clothing Store,Coffee Shop,Cosmetics Shop,Restaurant,Café,Bubble Tea Shop,Japanese Restaurant,Italian Restaurant,Middle Eastern Restaurant,Hotel
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,5,Café,Coffee Shop,Cocktail Bar,Gastropub,American Restaurant,Italian Restaurant,Restaurant,Beer Bar,Clothing Store,Moroccan Restaurant
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,1,Trail,Neighborhood Venue,Pub,Health Food Store,Distribution Center,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Yoga Studio
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,5,Coffee Shop,Cocktail Bar,Bakery,Café,Cheese Shop,Beer Bar,Restaurant,Seafood Restaurant,Pub,Creperie
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,5,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Burger Joint,Japanese Restaurant,Ice Cream Shop,Bar,Thai Restaurant,Salad Place
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564,1,Grocery Store,Café,Park,Diner,Baby Store,Restaurant,Athletics & Sports,Italian Restaurant,Candy Store,Coffee Shop
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,5,Coffee Shop,Café,Restaurant,Deli / Bodega,Gym,Hotel,Clothing Store,Thai Restaurant,Sushi Restaurant,Concert Hall
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,1,Pharmacy,Bakery,Grocery Store,Pool,Brewery,Café,Bar,Bank,Supermarket,Middle Eastern Restaurant


#### Map the clusters.

In [28]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['Neighborhood'], df_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### The map will not be visible when viewed from GitHub. You can see a screenshot of it here:

https://github.com/nevyoon/Coursera_Capstone/blob/master/segmenting_and_clustering_neighborhoods_in_toronto_map_2.png

#### List the neighborhoods in each cluster

In [29]:
def get_ith_cluster(i, df):
    columns = df.columns[[2] + list(range(6, df.shape[1]))]
    return df[df['Cluster Labels']==i].loc[:,columns]

#### Cluster 1

Downtown.

In [30]:
get_ith_cluster(1,df_merged)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,The Beaches,Trail,Neighborhood Venue,Pub,Health Food Store,Distribution Center,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Yoga Studio
7,Christie,Grocery Store,Café,Park,Diner,Baby Store,Restaurant,Athletics & Sports,Italian Restaurant,Candy Store,Coffee Shop
9,"Dufferin, Dovercourt Village",Pharmacy,Bakery,Grocery Store,Pool,Brewery,Café,Bar,Bank,Supermarket,Middle Eastern Restaurant
12,"The Danforth West, Riverdale",Greek Restaurant,Italian Restaurant,Coffee Shop,Restaurant,Ice Cream Shop,Furniture / Home Store,Fruit & Vegetable Store,Pub,Pizza Place,Lounge
14,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Nightclub,Grocery Store,Stadium,Burrito Place,Restaurant,Climbing Gym,Pet Store
15,"India Bazaar, The Beaches West",Fast Food Restaurant,Steakhouse,Burrito Place,Pub,Intersection,Restaurant,Italian Restaurant,Fish & Chips Shop,Ice Cream Shop,Pizza Place
17,Studio District,Café,Coffee Shop,Gastropub,Bakery,Brewery,American Restaurant,Comfort Food Restaurant,Seafood Restaurant,Sandwich Place,Cheese Shop
22,"High Park, The Junction South",Thai Restaurant,Mexican Restaurant,Bakery,Café,Diner,Flea Market,Cajun / Creole Restaurant,Italian Restaurant,Speakeasy,Fried Chicken Joint
23,"North Toronto West, Lawrence Park",Clothing Store,Coffee Shop,Shoe Store,Salon / Barbershop,Restaurant,Rental Car Location,Café,Chinese Restaurant,Pet Store,Yoga Studio
24,"The Annex, North Midtown, Yorkville",Sandwich Place,Café,Coffee Shop,Park,Cosmetics Shop,Liquor Store,Burger Joint,Indian Restaurant,Flower Shop,Middle Eastern Restaurant


#### Cluster 2

Outer downtown.

In [31]:
get_ith_cluster(2,df_merged)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,"Moore Park, Summerhill East",Playground,Summer Camp,Restaurant,Yoga Studio,Deli / Bodega,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run


#### Cluster 3

Upscale suburbs.

In [32]:
get_ith_cluster(3,df_merged)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Roselawn,Home Service,Garden,Yoga Studio,Deli / Bodega,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run


#### Cluster 4

Upscale suburbs.

In [33]:
get_ith_cluster(4,df_merged)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Lawrence Park,Park,Dim Sum Restaurant,Bus Line,Swim School,Event Space,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run
21,"Forest Hill North & West, Forest Hill Road Park",Park,Jewelry Store,Trail,Bus Line,Sushi Restaurant,Yoga Studio,Department Store,Eastern European Restaurant,Donut Shop,Doner Restaurant
33,Rosedale,Park,Playground,Trail,Deli / Bodega,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run


#### Cluster 5

Suburbs.

In [34]:
get_ith_cluster(5,df_merged)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Regent Park, Harbourfront",Coffee Shop,Bakery,Park,Pub,Theater,Breakfast Spot,Café,Restaurant,Beer Store,Spa
1,"Queen's Park, Ontario Provincial Government",Coffee Shop,Music Venue,Beer Bar,Smoothie Shop,Sandwich Place,Burrito Place,Café,Park,College Auditorium,College Cafeteria
2,"Garden District, Ryerson",Clothing Store,Coffee Shop,Cosmetics Shop,Restaurant,Café,Bubble Tea Shop,Japanese Restaurant,Italian Restaurant,Middle Eastern Restaurant,Hotel
3,St. James Town,Café,Coffee Shop,Cocktail Bar,Gastropub,American Restaurant,Italian Restaurant,Restaurant,Beer Bar,Clothing Store,Moroccan Restaurant
5,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Café,Cheese Shop,Beer Bar,Restaurant,Seafood Restaurant,Pub,Creperie
6,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Burger Joint,Japanese Restaurant,Ice Cream Shop,Bar,Thai Restaurant,Salad Place
8,"Richmond, Adelaide, King",Coffee Shop,Café,Restaurant,Deli / Bodega,Gym,Hotel,Clothing Store,Thai Restaurant,Sushi Restaurant,Concert Hall
10,"Harbourfront East, Union Station, Toronto Islands",Coffee Shop,Aquarium,Café,Hotel,Italian Restaurant,Fried Chicken Joint,Brewery,Restaurant,Scenic Lookout,Sporting Goods Shop
11,"Little Portugal, Trinity",Bar,Restaurant,Asian Restaurant,Men's Store,Café,Vegetarian / Vegan Restaurant,Cuban Restaurant,Brewery,Record Shop,Pizza Place
13,"Toronto Dominion Centre, Design Exchange",Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Seafood Restaurant,Italian Restaurant,Japanese Restaurant,Salad Place,Beer Bar
