# Week 3 Part 3 - Toronto Neighbourhoods

In [47]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


## Import additional web scraping libraries

In [48]:
import urllib.request
import time
from bs4 import BeautifulSoup

print('Libraries imported.')

Libraries imported.


## Load and explore the data

In [49]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(url)

response

<Response [200]>

### Source the response html code:

In [50]:
soup = BeautifulSoup(response.text, 'html.parser')

# print(soup.prettify())

### Find postcode table in html code:

In [51]:
table_tag = soup.findAll('table')[0]

postal_code_table = soup.find('table',{'class':'wikitable sortable'})

print(postal_code_table)

<table class="wikitable sortable">
<tbody><tr>
<th>Postal Code
</th>
<th>Borough
</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A
</td>
<td>North York
</td>
<td>Parkwoods
</td></tr>
<tr>
<td>M4A
</td>
<td>North York
</td>
<td>Victoria Village
</td></tr>
<tr>
<td>M5A
</td>
<td>Downtown Toronto
</td>
<td>Regent Park, Harbourfront
</td></tr>
<tr>
<td>M6A
</td>
<td>North York
</td>
<td>Lawrence Manor, Lawrence Heights
</td></tr>
<tr>
<td>M7A
</td>
<td>Downtown Toronto
</td>
<td>Queen's Park, Ontario Provincial Government
</td></tr>
<tr>
<td>M8A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M9A
</td>
<td>Etobicoke
</td>
<td>Islington Avenue, Humber Valley Village
</td></tr>
<tr>
<td>M1B
</td>
<td>Scarborough
</td>
<td>Malvern, Rouge
</td></tr>
<tr>
<td>M2B
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3B
</td>
<td

### Extract rows from table

In [52]:
rows = postal_code_table.find_all('tr')

postal_codes = []
boroughs = []
neighbourhoods = []

for row in rows:
#     print(row)
#     print(" ")
    cells = row.find_all('td')

    if len(cells) > 1:
            postal_code = cells[0]
            borough = cells[1]
            neighbourhood = cells[2]

            # Skip 'Not assigned' Boroughs
            if borough.text.strip()  != 'Not assigned':  
                postal_codes.append(postal_code.text.strip())
                boroughs.append(borough.text.strip())
                
                if neighbourhood.text.strip()  == 'Not assigned':  
                    neighbourhoods.append(borough.text.strip())
                else:
                    neighbourhoods.append(neighbourhood.text.strip())

### Create Pandas Dataframe from lists

In [61]:
postal_code_df = pd.DataFrame(zip(postal_codes, boroughs, neighbourhoods), columns =['Postal Code', 'Borough', 'Neighbourhood'])

# postal_code_df = postal_code_df.sort_values(by = ['Postal Code'], ascending = True)

print(postal_code_df)

    Postal Code           Borough  \
0           M3A        North York   
1           M4A        North York   
2           M5A  Downtown Toronto   
3           M6A        North York   
4           M7A  Downtown Toronto   
5           M9A         Etobicoke   
6           M1B       Scarborough   
7           M3B        North York   
8           M4B         East York   
9           M5B  Downtown Toronto   
10          M6B        North York   
11          M9B         Etobicoke   
12          M1C       Scarborough   
13          M3C        North York   
14          M4C         East York   
15          M5C  Downtown Toronto   
16          M6C              York   
17          M9C         Etobicoke   
18          M1E       Scarborough   
19          M4E      East Toronto   
20          M5E  Downtown Toronto   
21          M6E              York   
22          M1G       Scarborough   
23          M4G         East York   
24          M5G  Downtown Toronto   
25          M6G  Downtown Toronto   
2

### Check dataframe shape

In [62]:
postal_code_df.shape

(103, 3)

## Part 2 - Load geolocation & append dataframe

In [63]:
# import geocoder # import geocoder

# # initialize your variable to None
# lat_lng_coords = None

# latitudes  = []
# longitudes = []
# for pc in postal_codes:
#     # loop until you get the coordinates
#     while(lat_lng_coords is None):
#       g = geocoder.google('{}, Toronto, Ontario'.format(pc))
#       lat_lng_coords = g.latlng

#     latitude = lat_lng_coords[0]
#     longitude = lat_lng_coords[1]
    
#     latitudes.append(latitude)
#     longitudes.append(longitude)


# Read Geospatial_Coordinates.csv
geolocation = pd.read_csv('Geospatial_Coordinates.csv')
print(geolocation)

    Postal Code   Latitude  Longitude
0           M1B  43.806686 -79.194353
1           M1C  43.784535 -79.160497
2           M1E  43.763573 -79.188711
3           M1G  43.770992 -79.216917
4           M1H  43.773136 -79.239476
5           M1J  43.744734 -79.239476
6           M1K  43.727929 -79.262029
7           M1L  43.711112 -79.284577
8           M1M  43.716316 -79.239476
9           M1N  43.692657 -79.264848
10          M1P  43.757410 -79.273304
11          M1R  43.750072 -79.295849
12          M1S  43.794200 -79.262029
13          M1T  43.781638 -79.304302
14          M1V  43.815252 -79.284577
15          M1W  43.799525 -79.318389
16          M1X  43.836125 -79.205636
17          M2H  43.803762 -79.363452
18          M2J  43.778517 -79.346556
19          M2K  43.786947 -79.385975
20          M2L  43.757490 -79.374714
21          M2M  43.789053 -79.408493
22          M2N  43.770120 -79.408493
23          M2P  43.752758 -79.400049
24          M2R  43.782736 -79.442259
25          

### Append postcode dataframe with geolocation

In [64]:
postal_code_wGeocode = pd.merge(postal_code_df, geolocation, on = 'Postal Code', how = 'left')

print(postal_code_wGeocode)

    Postal Code           Borough  \
0           M3A        North York   
1           M4A        North York   
2           M5A  Downtown Toronto   
3           M6A        North York   
4           M7A  Downtown Toronto   
5           M9A         Etobicoke   
6           M1B       Scarborough   
7           M3B        North York   
8           M4B         East York   
9           M5B  Downtown Toronto   
10          M6B        North York   
11          M9B         Etobicoke   
12          M1C       Scarborough   
13          M3C        North York   
14          M4C         East York   
15          M5C  Downtown Toronto   
16          M6C              York   
17          M9C         Etobicoke   
18          M1E       Scarborough   
19          M4E      East Toronto   
20          M5E  Downtown Toronto   
21          M6E              York   
22          M1G       Scarborough   
23          M4G         East York   
24          M5G  Downtown Toronto   
25          M6G  Downtown Toronto   
2

Check if any rows were dropped

In [65]:
print(postal_code_wGeocode.shape)

(103, 5)


In [66]:
len(postal_code_wGeocode) == len(postal_code_df)

True

## Part 3 - Cluster Analysis

#### Use geopy library to get the latitude and longitude values of Toronto

In [67]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


#### Create a map of Toronto with Neighbourhoods superimposed on top

In [74]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

Neighbourhoods = postal_code_wGeocode

# add markers to map
for lat, lng, borough, Neighbourhood in zip(Neighbourhoods['Latitude'], Neighbourhoods['Longitude'], Neighbourhoods['Borough'], Neighbourhoods['Neighbourhood']):
    label = '{}, {}'.format(Neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Slice data to contain only Boroughs with "Toronto" in name

In [221]:
sliced_neighbourhoods = Neighbourhoods[Neighbourhoods['Borough'].str.contains('Toronto', regex=False)].reset_index(drop=True)
sliced_neighbourhoods.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [84]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

Neighbourhoods = sliced_neighbourhoods

# add markers to map
for lat, lng, borough, Neighbourhood in zip(Neighbourhoods['Latitude'], Neighbourhoods['Longitude'], Neighbourhoods['Borough'], Neighbourhoods['Neighbourhood']):
    label = '{}, {}'.format(Neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Define Foursquare Credentials and Version

In [85]:
CLIENT_ID     = 'UZ0UOSK2PVSKAMSNJY1XAQ45BIRLSQKLM4EFBHSNW2KLPQRZ' # your Foursquare ID
CLIENT_SECRET = 'ATRIPSGLVSNEU21RQMDZ1JJZED2VWKXKEPFXCMJANZPDTGZA' # your Foursquare Secret
VERSION       = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: UZ0UOSK2PVSKAMSNJY1XAQ45BIRLSQKLM4EFBHSNW2KLPQRZ
CLIENT_SECRET:ATRIPSGLVSNEU21RQMDZ1JJZED2VWKXKEPFXCMJANZPDTGZA


From the Foursquare lab in the previous module, we know that all the information is in the *items* key. Before we proceed, let's borrow the **get_category_type** function from the Foursquare lab.

In [86]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Explore Neighborhoods in Toronto

In [88]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Query Venues

In [93]:
# Set limit to 10 venues
LIMIT = 10
nearby_venues    = getNearbyVenues(names      = Neighbourhoods['Neighbourhood'],
                                   latitudes  = Neighbourhoods['Latitude'],
                                   longitudes = Neighbourhoods['Longitude']
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West,  Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport


In [94]:
nearby_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


#### Let's find out how many unique categories can be curated from all the returned venues

In [95]:
print('There are {} uniques categories.'.format(len(nearby_venues['Venue Category'].unique())))

There are 127 uniques categories.


### Analyze Neighborhoods

In [211]:
# one hot encoding
toronto_onehot = pd.get_dummies(nearby_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = nearby_venues['Neighborhood']

In [212]:
# move neighborhood column to the first column
cols = list(toronto_onehot.columns)
cols.remove('Neighborhood')
cols_ = ['Neighborhood'] + cols

toronto_onehot = toronto_onehot[cols_]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Airport,Airport Food Court,Airport Lounge,Airport Terminal,American Restaurant,Antique Shop,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,Bakery,Bank,Bar,Beer Bar,Beer Store,Bookstore,Boutique,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Café,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Comic Shop,Concert Hall,Cosmetics Shop,Creperie,Cuban Restaurant,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Distribution Center,Dog Run,Donut Shop,Eastern European Restaurant,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Food & Drink Shop,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Garden Center,Gastropub,Gay Bar,General Entertainment,Gift Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health Food Store,Historic Site,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Liquor Store,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Movie Theater,Museum,Music Venue,New American Restaurant,Organic Grocery,Park,Performing Arts Venue,Persian Restaurant,Pet Store,Pizza Place,Plane,Playground,Plaza,Portuguese Restaurant,Pub,Ramen Restaurant,Rental Car Location,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Skate Park,Skating Rink,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Swim School,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### Find the 6 most common categories

In [213]:
toronto_onehot_most_common = toronto_onehot[cols].sum(axis=0)

# Transform series into dataframe
toronto_onehot_most_common = toronto_onehot_most_common.to_frame()
toronto_onehot_most_common.columns = ['Count']

# Sort category count
toronto_onehot_most_common.sort_values(by = 'Count', ascending = False, inplace = True)

toronto_onehot_most_common = toronto_onehot_most_common.head(6)

print(toronto_onehot_most_common)

                    Count
Coffee Shop            26
Café                   19
Restaurant             17
Park                   15
Italian Restaurant     13
Bakery                 10


In [214]:
toronto_onehot = toronto_onehot[['Neighborhood'] + list(toronto_onehot_most_common.index)]

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [215]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Coffee Shop,Café,Restaurant,Park,Italian Restaurant,Bakery
0,Berczy Park,0.0,0.0,0.1,0.1,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.2,0.1,0.0,0.0,0.1,0.1
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.1,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.1,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.4,0.0,0.0,0.0,0.0,0.0
5,Christie,0.1,0.3,0.1,0.0,0.1,0.0
6,Church and Wellesley,0.0,0.0,0.1,0.1,0.0,0.0
7,"Commerce Court, Victoria Hotel",0.1,0.2,0.1,0.0,0.0,0.1
8,Davisville,0.1,0.1,0.0,0.1,0.1,0.0
9,Davisville North,0.0,0.0,0.0,0.1,0.0,0.0


Print each neighborhood along with the top 6 most common venues

In [216]:
num_top_venues = 6

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                venue  freq
0          Restaurant   0.1
1                Park   0.1
2         Coffee Shop   0.0
3                Café   0.0
4  Italian Restaurant   0.0
5              Bakery   0.0


----Brockton, Parkdale Village, Exhibition Place----
                venue  freq
0         Coffee Shop   0.2
1                Café   0.1
2  Italian Restaurant   0.1
3              Bakery   0.1
4          Restaurant   0.0
5                Park   0.0


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
                venue  freq
0          Restaurant   0.1
1         Coffee Shop   0.0
2                Café   0.0
3                Park   0.0
4  Italian Restaurant   0.0
5              Bakery   0.0


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
                venue  freq
0         Coffee Shop   0.1
1                Café   0.0
2          Restaurant   0.0
3            

Function to sort the venues in descending order:

In [217]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Cluster Neighborhoods

Run k-means to cluster the neighborhood into 3 clusters.

In [219]:
# set number of clusters
kclusters = 3

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

In [222]:
# add clustering labels
neighbourhood_df = sliced_neighbourhoods
neighbourhood_df.insert(0, 'Cluster Labels', kmeans.labels_)

print(neighbourhood_df.head())

   Cluster Labels Postal Code           Borough  \
0               0         M5A  Downtown Toronto   
1               2         M7A  Downtown Toronto   
2               0         M5B  Downtown Toronto   
3               0         M5C  Downtown Toronto   
4               2         M4E      East Toronto   

                                 Neighbourhood   Latitude  Longitude  
0                    Regent Park, Harbourfront  43.654260 -79.360636  
1  Queen's Park, Ontario Provincial Government  43.662301 -79.389494  
2                     Garden District, Ryerson  43.657162 -79.378937  
3                               St. James Town  43.651494 -79.375418  
4                                  The Beaches  43.676357 -79.293031  


Finally, let's visualize the resulting clusters

In [225]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(neighbourhood_df['Latitude'], neighbourhood_df['Longitude'], neighbourhood_df['Neighbourhood'], neighbourhood_df['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters