## Segmenting and Clustering Neighborhoods in Toronto

# 1. Get the data and transform it into the form needed

In [255]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

In [256]:
res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0]
df = pd.read_html(str(table))[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


## 1.1 Drop rows with no borough assigned

In [257]:
df = df[df['Borough'] != 'Not assigned']
df = df.reset_index(drop=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


## 1.2 If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.

In [258]:
for i, row in df.iterrows():
    if (row['Borough'] != 'Not assigned') & (row['Neighbourhood'] == 'Not assigned'):
        df.loc[i,'Neighbourhood'] = df.loc[i,'Borough']

## 1.3 More than one neighborhood can exist in one postal code area. These two rows will be combined into one row with the neighborhoods separated with a comma.

In [259]:
df = df.groupby(['Postcode', 'Borough']).agg(lambda x: tuple(x)).applymap(list)
df = df.reset_index()
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"[Rouge, Malvern]"
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]"
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]"
3,M1G,Scarborough,[Woburn]
4,M1H,Scarborough,[Cedarbrae]


In [260]:
for index, row in df.iterrows():
    elements = len(row['Neighbourhood'])
    df.at[index,'Neighbourhood2'] = ''
    for i in range(elements):
        df.at[index,'Neighbourhood2'] = df.at[index,'Neighbourhood2'] + row['Neighbourhood'][i] + ', '
    df.loc[index,'Neighbourhood2'] = df.loc[index,'Neighbourhood2'][:-2]

df = df.drop('Neighbourhood',axis=1)
df.columns = ['Postcode', 'Borough', 'Neighbourhood']
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [261]:
df.shape

(103, 3)

# 2. Get the latitude and longitude of all the postal codes

In [262]:
!pip install geocoder



In [263]:
import geocoder
for index, row in df.iterrows():
    lat_lng_coords = None
    while (lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(row.Postcode))
        lat_lng_coords = g.latlng  
    df.at[index,'Latitude'] = lat_lng_coords[0]
    df.at[index,'Longitude'] = lat_lng_coords[1]

df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.78573,-79.15875
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76569,-79.175256
3,M1G,Scarborough,Woburn,43.768359,-79.21759
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944


# 3. Visualize the neighborhoods that contain "Toronto" on maps

## 3.1 Get the data for boroughs with names containing "Toronto"

In [264]:
df_Toronto=df[[('Toronto' in x) for x in df['Borough']]].reset_index(drop = True)
df_Toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676845,-79.295225
1,M4K,East Toronto,"The Danforth West, Riverdale",43.683262,-79.35512
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.667965,-79.314673
3,M4M,East Toronto,Studio District,43.662766,-79.33483
4,M4N,Central Toronto,Lawrence Park,43.72816,-79.387085


In [265]:
df_Toronto.shape

(38, 5)

## 3.2 Get the geographical coordinates of Toronto and Create a map with neighborhoods shown.

In [266]:
!pip install geopy
from geopy.geocoders import Nominatim

import requests

import matplotlib.cm as cm
import matplotlib.colors as colors

!pip install folium
import folium

from sklearn.cluster import KMeans
print('Libraries imported!')

Libraries imported!


### 3.2.1 Get the geographical coordinates of Toronto

In [267]:
address = "Toronto, Canada"
geolocator = Nominatim(user_agent = "toronto_explorer")
location = geolocator.geocode(address) 
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinate of Toronto is {}, {}.'.format(latitude, longitude))

The geographical coordinate of Toronto is 43.653963, -79.387207.


### 3.2.2 Create a map with neighborhoods of Toronto shown on the map.
### Check this link for the generated map: https://github.com/minshenli/Coursera_Capstone/blob/master/Toronto_Neighbourhoods_Map.png

In [270]:
map_toronto = folium.Map(location=[latitude,longitude], zoom_start = 12)

for lat, lng, borough, neighbourhood in zip(df_Toronto['Latitude'], df_Toronto['Longitude'], df_Toronto['Borough'], df_Toronto['Neighbourhood']):
    label = "({}), {}".format(neighbourhood, borough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = 'red',
        fill_opacity = 0.5,
        parse_html = True).add_to(map_toronto)
map_toronto

# 4. Get top venues of each neighbourhood 

## 4.1 Define Foursquare Credentials and Version

In [271]:
CLIENT_ID = 'XXXX' # your Foursquare ID
CLIENT_SECRET = 'XXXX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: LKKVDN4NKFTRZE3SF10TTJ1WQ4EF3HDYQXRO3B5VMKLA2YSG
CLIENT_SECRET:NEM5FKYIJLKP5EL3EKUAW5CA4DR3HELLNXEBNEBXKUV5UBAR


## 4.2 Get top 100 venues in each neighborhood within a radius of 1000 meters

### 4.2.1 Define the function as needed

In [272]:
def getNearbyVenues(Borough_Name, Neighbourhood_Name, Lat, Lng, radius = 1000, limit = 100):
    
    venues_list = []
    for nn, bn, lat, lng in zip(Neighbourhood_Name, Borough_Name, Lat, Lng):
        print('({})'.format(nn), bn)
        
        # create API request URL
        url = "https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        lng,
        radius,
        limit)
        
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        # return only the relevant information
        venues_list.append([(bn, 
                             nn, 
                             lat, 
                             lng, 
                             result['venue']['name'], 
                             result['venue']['location']['lat'],
                            result['venue']['location']['lng'],
                            result['venue']['categories'][0]['name']) for result in results])
        
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = [
        'Borough',
        'Neighbourhood',
        'Latitude',
        'Longitude',
        'Venue',
        'Venue Latitude',
        'Venue Longitude',
        'Venue Category'
    ]
    
    return(nearby_venues)

### 4.2.2 Run the function: Get the top 100 venues in each neighborhood within a radius of 1000 meters.

In [273]:
Toronto_venues = getNearbyVenues(df_Toronto['Borough'], df_Toronto['Neighbourhood'], df_Toronto['Latitude'], df_Toronto['Longitude'])

(The Beaches) East Toronto
(The Danforth West, Riverdale) East Toronto
(The Beaches West, India Bazaar) East Toronto
(Studio District) East Toronto
(Lawrence Park) Central Toronto
(Davisville North) Central Toronto
(North Toronto West) Central Toronto
(Davisville) Central Toronto
(Moore Park, Summerhill East) Central Toronto
(Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West) Central Toronto
(Rosedale) Downtown Toronto
(Cabbagetown, St. James Town) Downtown Toronto
(Church and Wellesley) Downtown Toronto
(Harbourfront, Regent Park) Downtown Toronto
(Ryerson, Garden District) Downtown Toronto
(St. James Town) Downtown Toronto
(Berczy Park) Downtown Toronto
(Central Bay Street) Downtown Toronto
(Adelaide, King, Richmond) Downtown Toronto
(Harbourfront East, Toronto Islands, Union Station) Downtown Toronto
(Design Exchange, Toronto Dominion Centre) Downtown Toronto
(Commerce Court, Victoria Hotel) Downtown Toronto
(Roselawn) Central Toronto
(Forest Hill North, Forest Hill 

In [274]:
Toronto_venues.shape

(3174, 8)

In [275]:
Toronto_venues.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,East Toronto,The Beaches,43.676845,-79.295225,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
1,East Toronto,The Beaches,43.676845,-79.295225,Glen Manor Ravine,43.676821,-79.293942,Trail
2,East Toronto,The Beaches,43.676845,-79.295225,The Beech Tree,43.680493,-79.288846,Gastropub
3,East Toronto,The Beaches,43.676845,-79.295225,Tori's Bakeshop,43.672114,-79.290331,Vegetarian / Vegan Restaurant
4,East Toronto,The Beaches,43.676845,-79.295225,Beaches Bake Shop,43.680363,-79.289692,Bakery


In [276]:
Toronto_venues.groupby(['Borough','Neighbourhood']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Borough,Neighbourhood,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Central Toronto,Davisville,100,100,100,100,100,100
Central Toronto,Davisville North,100,100,100,100,100,100
Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West",83,83,83,83,83,83
Central Toronto,"Forest Hill North, Forest Hill West",36,36,36,36,36,36
Central Toronto,Lawrence Park,12,12,12,12,12,12
Central Toronto,"Moore Park, Summerhill East",66,66,66,66,66,66
Central Toronto,North Toronto West,49,49,49,49,49,49
Central Toronto,Roselawn,6,6,6,6,6,6
Central Toronto,"The Annex, North Midtown, Yorkville",100,100,100,100,100,100
Downtown Toronto,"Adelaide, King, Richmond",100,100,100,100,100,100


### 4.2.3 Check how many unique categories can be curated from all the returned venues

In [277]:
print("There are {} unique categories.".format(len(Toronto_venues['Venue Category'].unique())))

There are 282 unique categories.


## 4.3 Analyze the neighborhoods

### 4.3.1 Transform "Venue Category" variable into dummy variables indicating categories

In [278]:
Toronto_onehot = pd.get_dummies(Toronto_venues['Venue Category'])
Toronto_onehot[['Borough','Neighbourhood']] = Toronto_venues[['Borough','Neighbourhood']]
columns = list(Toronto_onehot.columns[-2:])+list(Toronto_onehot.columns[:-2])
Toronto_onehot = Toronto_onehot[columns]
Toronto_onehot.head()

Unnamed: 0,Borough,Neighbourhood,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,East Toronto,The Beaches,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,East Toronto,The Beaches,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,East Toronto,The Beaches,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,East Toronto,The Beaches,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,East Toronto,The Beaches,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### 4.3.2 Groupby (Borough, Neighbourhood) and analyze the frequency of occurence of each category within each (Borough, Neighbourhood)

In [279]:
Toronto_grouped = Toronto_onehot.groupby(['Borough','Neighbourhood']).mean().reset_index()
Toronto_grouped.head()

Unnamed: 0,Borough,Neighbourhood,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Central Toronto,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0
1,Central Toronto,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.02
2,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.024096,0.0,0.0,0.012048,0.0,0.0,0.0,0.0,0.012048
3,Central Toronto,"Forest Hill North, Forest Hill West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Toronto,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [280]:
Toronto_grouped.shape

(38, 284)

### 4.3.3 print the neighbourhood along with the Top 5 categories

In [281]:
num_top_categories = 5

for bn, nn in zip(Toronto_grouped['Borough'],Toronto_grouped['Neighbourhood']):
    print("----{}----".format(bn))
    print("----{}----".format(nn))
    temp = Toronto_grouped[(Toronto_grouped['Borough']==bn) & (Toronto_grouped['Neighbourhood']==nn)].T.reset_index()
    temp.columns = ['categories','freq']
    temp = temp.iloc[2:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_categories))
    print("\n")

----Central Toronto----
----Davisville----
           categories  freq
0    Sushi Restaurant  0.06
1  Italian Restaurant  0.06
2   Indian Restaurant  0.06
3                Café  0.05
4         Coffee Shop  0.05


----Central Toronto----
----Davisville North----
           categories  freq
0         Coffee Shop  0.08
1  Italian Restaurant  0.05
2        Dessert Shop  0.05
3         Pizza Place  0.04
4                Café  0.04


----Central Toronto----
----Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West----
           categories  freq
0         Coffee Shop  0.10
1                Café  0.06
2    Sushi Restaurant  0.06
3  Italian Restaurant  0.05
4         Pizza Place  0.04


----Central Toronto----
----Forest Hill North, Forest Hill West----
           categories  freq
0                Café  0.11
1                Park  0.11
2  Italian Restaurant  0.08
3    Sushi Restaurant  0.06
4         Coffee Shop  0.06


----Central Toronto----
----Lawrence Park----
     categories 

In [143]:
# @temp = Toronto_grouped[(Toronto_grouped['Borough']=='Central Toronto') & (Toronto_grouped['Neighbourhood']=='Davisville')].T.reset_index()
# @temp = temp.iloc[2:]
# @temp.columns=['categories','freq']
# @temp.head()

### 4.3.4 Get a dataframe where each neighbourhood's top 10 venues are listed

#### 4.3.4.1 write a function that returns the top 10 venues for each neighbourhood.

In [292]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[2:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### 4.3.4.2 Run the function and write the info into a dataframe.

In [293]:
num_top_venues = 10
indicators = ['st','nd','rd']

columns = ['Borough','Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append(str(ind+1) + indicators[ind] + ' Most Common Category')
    except:
        columns.append(str(ind+1) + 'th' + ' Most Common Category')
            
Toronto_venues_sorted = pd.DataFrame(columns=columns)
Toronto_venues_sorted[['Borough','Neighbourhood']] = Toronto_grouped[['Borough','Neighbourhood']]

for ind in np.arange(Toronto_grouped.shape[0]):
    Toronto_venues_sorted.iloc[ind,2:] = return_most_common_venues(Toronto_grouped.iloc[ind], num_top_venues)

In [294]:
Toronto_venues_sorted.head()

Unnamed: 0,Borough,Neighbourhood,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
0,Central Toronto,Davisville,Italian Restaurant,Sushi Restaurant,Indian Restaurant,Coffee Shop,Café,Restaurant,Pizza Place,Dessert Shop,Bakery,Ice Cream Shop
1,Central Toronto,Davisville North,Coffee Shop,Dessert Shop,Italian Restaurant,Café,Pizza Place,Fast Food Restaurant,Japanese Restaurant,Sushi Restaurant,Pharmacy,Dog Run
2,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",Coffee Shop,Café,Sushi Restaurant,Italian Restaurant,Pizza Place,Thai Restaurant,Park,Gym,Sandwich Place,Spa
3,Central Toronto,"Forest Hill North, Forest Hill West",Park,Café,Italian Restaurant,Pharmacy,Coffee Shop,Sushi Restaurant,Jewelry Store,Bagel Shop,Juice Bar,Bank
4,Central Toronto,Lawrence Park,Café,Pharmacy,Bookstore,Park,Bus Line,Restaurant,Trail,College Gym,Gym / Fitness Center,Coffee Shop


# 5. Cluster Neighborhoods

## 5.1 Run KNN analysis

In [295]:
kclusters=5
Toronto_grouped_clustering = Toronto_grouped.drop(['Borough', 'Neighbourhood'], axis=1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)
kmeans.labels_

array([0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
       0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

## 5.2 Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [296]:
Toronto_venues_sorted.insert(0,'Cluster Labels',kmeans.labels_)
Toronto_venues_sorted.head()

Unnamed: 0,Cluster Labels,Borough,Neighbourhood,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
0,0,Central Toronto,Davisville,Italian Restaurant,Sushi Restaurant,Indian Restaurant,Coffee Shop,Café,Restaurant,Pizza Place,Dessert Shop,Bakery,Ice Cream Shop
1,0,Central Toronto,Davisville North,Coffee Shop,Dessert Shop,Italian Restaurant,Café,Pizza Place,Fast Food Restaurant,Japanese Restaurant,Sushi Restaurant,Pharmacy,Dog Run
2,0,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",Coffee Shop,Café,Sushi Restaurant,Italian Restaurant,Pizza Place,Thai Restaurant,Park,Gym,Sandwich Place,Spa
3,0,Central Toronto,"Forest Hill North, Forest Hill West",Park,Café,Italian Restaurant,Pharmacy,Coffee Shop,Sushi Restaurant,Jewelry Store,Bagel Shop,Juice Bar,Bank
4,2,Central Toronto,Lawrence Park,Café,Pharmacy,Bookstore,Park,Bus Line,Restaurant,Trail,College Gym,Gym / Fitness Center,Coffee Shop


In [297]:
Toronto_merged = df_Toronto.iloc[:,1:]
Toronto_merged = Toronto_merged.join(Toronto_venues_sorted.set_index(['Borough','Neighbourhood']), on=['Borough','Neighbourhood'])
Toronto_merged

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
0,East Toronto,The Beaches,43.676845,-79.295225,0,Pub,Coffee Shop,Pizza Place,Breakfast Spot,Bar,Thai Restaurant,Japanese Restaurant,Park,Caribbean Restaurant,Burger Joint
1,East Toronto,"The Danforth West, Riverdale",43.683262,-79.35512,0,Greek Restaurant,Coffee Shop,Café,Pub,Italian Restaurant,Sandwich Place,Bakery,Fast Food Restaurant,Pizza Place,Yoga Studio
2,East Toronto,"The Beaches West, India Bazaar",43.667965,-79.314673,0,Indian Restaurant,Coffee Shop,Pizza Place,Beach,Pub,Burrito Place,Discount Store,Bakery,Fast Food Restaurant,Burger Joint
3,East Toronto,Studio District,43.662766,-79.33483,0,Coffee Shop,Pizza Place,Café,Italian Restaurant,American Restaurant,Bakery,Bar,Park,Thai Restaurant,Sushi Restaurant
4,Central Toronto,Lawrence Park,43.72816,-79.387085,2,Café,Pharmacy,Bookstore,Park,Bus Line,Restaurant,Trail,College Gym,Gym / Fitness Center,Coffee Shop
5,Central Toronto,Davisville North,43.712815,-79.388526,0,Coffee Shop,Dessert Shop,Italian Restaurant,Café,Pizza Place,Fast Food Restaurant,Japanese Restaurant,Sushi Restaurant,Pharmacy,Dog Run
6,Central Toronto,North Toronto West,43.714523,-79.40696,0,Coffee Shop,Sporting Goods Shop,Italian Restaurant,Skating Rink,Café,Diner,Park,Mexican Restaurant,Pizza Place,Pharmacy
7,Central Toronto,Davisville,43.703395,-79.385964,0,Italian Restaurant,Sushi Restaurant,Indian Restaurant,Coffee Shop,Café,Restaurant,Pizza Place,Dessert Shop,Bakery,Ice Cream Shop
8,Central Toronto,"Moore Park, Summerhill East",43.690655,-79.383561,0,Italian Restaurant,Park,Coffee Shop,Grocery Store,Gym,Thai Restaurant,Gastropub,Pizza Place,Pub,Restaurant
9,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686083,-79.402335,0,Coffee Shop,Café,Sushi Restaurant,Italian Restaurant,Pizza Place,Thai Restaurant,Park,Gym,Sandwich Place,Spa


## 5.3 Show clusters on the map
## Check this link for the generated map: https://github.com/minshenli/Coursera_Capstone/blob/master/Toronto_Clustered_Neighbourhoods_Map.png

In [299]:
map_toronto_clusters = folium.Map(location=[latitude,longitude], zoom_start = 11)

# set color scheme for the clusters
x=np.arange(kclusters)
ys=[i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0,1,len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
for lat, lng, bn, nn, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Borough'], Toronto_merged['Neighbourhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup( "({}), {}: ".format(neighbourhood, borough) + "Cluster " + str(cluster), parse_html = True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = rainbow[cluster],
        fill = True,
        fill_color = rainbow[cluster],
        fill_opacity = 0.5,
        parse_html = True).add_to(map_toronto_clusters)
map_toronto_clusters

# 6. Examine Clusters 

In [245]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0]

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
0,East Toronto,The Beaches,43.676845,-79.295225,0,Pub,Coffee Shop,Pizza Place,Breakfast Spot,Bar,Thai Restaurant,Japanese Restaurant,Park,Caribbean Restaurant,Burger Joint
1,East Toronto,"The Danforth West, Riverdale",43.683262,-79.35512,0,Greek Restaurant,Coffee Shop,Café,Pub,Italian Restaurant,Sandwich Place,Bakery,Fast Food Restaurant,Pizza Place,Yoga Studio
2,East Toronto,"The Beaches West, India Bazaar",43.667965,-79.314673,0,Indian Restaurant,Coffee Shop,Pizza Place,Beach,Pub,Burrito Place,Discount Store,Bakery,Fast Food Restaurant,Burger Joint
3,East Toronto,Studio District,43.662766,-79.33483,0,Coffee Shop,Pizza Place,Café,Italian Restaurant,American Restaurant,Bakery,Bar,Park,Thai Restaurant,Sushi Restaurant
5,Central Toronto,Davisville North,43.712815,-79.388526,0,Coffee Shop,Dessert Shop,Italian Restaurant,Café,Pizza Place,Fast Food Restaurant,Japanese Restaurant,Sushi Restaurant,Pharmacy,Dog Run
6,Central Toronto,North Toronto West,43.714523,-79.40696,0,Coffee Shop,Sporting Goods Shop,Italian Restaurant,Skating Rink,Café,Diner,Park,Mexican Restaurant,Pizza Place,Pharmacy
7,Central Toronto,Davisville,43.703395,-79.385964,0,Italian Restaurant,Sushi Restaurant,Indian Restaurant,Coffee Shop,Café,Restaurant,Pizza Place,Dessert Shop,Bakery,Ice Cream Shop
8,Central Toronto,"Moore Park, Summerhill East",43.690655,-79.383561,0,Italian Restaurant,Park,Coffee Shop,Grocery Store,Gym,Thai Restaurant,Gastropub,Pizza Place,Pub,Restaurant
9,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686083,-79.402335,0,Coffee Shop,Café,Sushi Restaurant,Italian Restaurant,Pizza Place,Thai Restaurant,Park,Gym,Sandwich Place,Spa
11,Downtown Toronto,"Cabbagetown, St. James Town",43.66816,-79.366602,0,Park,Gastropub,Pool,Café,Diner,Japanese Restaurant,Pub,Farm,Taiwanese Restaurant,Jewelry Store


In [246]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1]

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
22,Central Toronto,Roselawn,43.711941,-79.41912,1,Pharmacy,Skating Rink,Trail,Bank,Café,Yoga Studio,Discount Store,Dog Run,Doner Restaurant,Donut Shop


In [247]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2]

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
4,Central Toronto,Lawrence Park,43.72816,-79.387085,2,Café,Pharmacy,Bookstore,Park,Bus Line,Restaurant,Trail,College Gym,Gym / Fitness Center,Coffee Shop


In [248]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3]

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
19,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.62347,-79.391507,3,Harbor / Marina,Bar,Airport Lounge,Airport Service,Airport Terminal,Burger Joint,Boutique,Coffee Shop,Nudist Beach,Music Venue


In [249]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4]

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
10,Downtown Toronto,Rosedale,43.68194,-79.378474,4,Park,Trail,Italian Restaurant,Grocery Store,Bank,Athletics & Sports,Playground,Building,Sporting Goods Shop,Beer Store
