#### Scraping the Wikipedia page to obtain the data in the table of postal codes

In [2]:
import requests

In [3]:
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [4]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(website_url,'lxml')
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className = document.documentElement.className.replace( /(^|\s)client-nojs(\s|$)/, "$1client-js$2" );
  </script>
  <script>
   (window.RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":890001695,"wgRevisionId":890001695,"wgArticleId":539066,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Communications in Ontario","Postal codes in Canada","Toronto","Ontario-related lists"],"wgBreakFrames":false,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wg

In [5]:
My_table = soup.find('table',{'class':'wikitable sortable'})

In [6]:
My_table

<table class="wikitable sortable">
<tbody><tr>
<th>Postcode</th>
<th>Borough</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
</td></tr>
<tr>
<td>M4A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Harbourfront_(Toronto)" title="Harbourfront (Toronto)">Harbourfront</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Regent_Park" title="Regent Park">Regent Park</a>
</td></tr>
<tr>
<td>M6A</td>

In [7]:
links = My_table.findAll('tr')
links

[<tr>
 <th>Postcode</th>
 <th>Borough</th>
 <th>Neighbourhood
 </th></tr>, <tr>
 <td>M1A</td>
 <td>Not assigned</td>
 <td>Not assigned
 </td></tr>, <tr>
 <td>M2A</td>
 <td>Not assigned</td>
 <td>Not assigned
 </td></tr>, <tr>
 <td>M3A</td>
 <td><a href="/wiki/North_York" title="North York">North York</a></td>
 <td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
 </td></tr>, <tr>
 <td>M4A</td>
 <td><a href="/wiki/North_York" title="North York">North York</a></td>
 <td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
 </td></tr>, <tr>
 <td>M5A</td>
 <td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
 <td><a href="/wiki/Harbourfront_(Toronto)" title="Harbourfront (Toronto)">Harbourfront</a>
 </td></tr>, <tr>
 <td>M5A</td>
 <td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
 <td><a href="/wiki/Regent_Park" title="Regent Park">Regent Park</a>
 </td></tr>, <tr>
 <td>M6A</td>
 <td

In [8]:
#Generate lists
A=[]
B=[]
C=[]

for row in My_table.findAll("tr"):
    cells = row.findAll('td')
    if len(cells)==3: #Only extract table body not heading
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))

#### Import pandas to convert list to data frame

In [9]:
import pandas as pd
df=pd.DataFrame(A,columns=['Postcode'])
df['Borough']=B
df['Neighbourhood']=C
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


#### Only processing the cells that have an assigned borough ignoring cells with a borough that is Not assigned.

In [10]:
import numpy as np
df['Borough'].replace('Not assigned', np.nan, inplace=True)
df.dropna(subset=['Borough'], inplace=True)
df

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


#### Using the .shape method to print the number of rows of the dataframe

In [53]:
df.shape

(211, 3)

#### Getting the geographical coordinates of the neighborhoods using the csv file

In [13]:
!wget -q -O 'geospatial_data.csv' http://cocl.us/Geospatial_data

In [14]:
import pandas as pd

In [15]:
geospatialdata_df = pd.read_csv('geospatial_data.csv')
geospatialdata_df

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [54]:
torontolatlong_df = pd.merge(df, geospatialdata_df, how='inner', left_on = 'Postcode', right_on = 'Postal Code')
torontolatlong_df

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M3A,North York,Parkwoods,M3A,43.753259,-79.329656
1,M4A,North York,Victoria Village,M4A,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,M5A,43.654260,-79.360636
3,M5A,Downtown Toronto,Regent Park,M5A,43.654260,-79.360636
4,M6A,North York,Lawrence Heights,M6A,43.718518,-79.464763
5,M6A,North York,Lawrence Manor,M6A,43.718518,-79.464763
6,M7A,Queen's Park,Not assigned,M7A,43.662301,-79.389494
7,M9A,Etobicoke,Islington Avenue,M9A,43.667856,-79.532242
8,M1B,Scarborough,Rouge,M1B,43.806686,-79.194353
9,M1B,Scarborough,Malvern,M1B,43.806686,-79.194353


In [55]:
toronto_neighborhoods = torontolatlong_df[['Postcode','Borough','Neighbourhood','Latitude','Longitude']]
toronto_neighborhoods

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.654260,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.654260,-79.360636
4,M6A,North York,Lawrence Heights,43.718518,-79.464763
5,M6A,North York,Lawrence Manor,43.718518,-79.464763
6,M7A,Queen's Park,Not assigned,43.662301,-79.389494
7,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
8,M1B,Scarborough,Rouge,43.806686,-79.194353
9,M1B,Scarborough,Malvern,43.806686,-79.194353


#### Exploring Neighborhoods

In [19]:
import json # library to handle JSON files

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans



##### Considering only borough named Scarborough

In [56]:
toronto_data = toronto_neighborhoods[toronto_neighborhoods['Borough'] == 'Scarborough'].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,Rouge,43.806686,-79.194353
1,M1B,Scarborough,Malvern,43.806686,-79.194353
2,M1C,Scarborough,Highland Creek,43.784535,-79.160497
3,M1C,Scarborough,Rouge Hill,43.784535,-79.160497
4,M1C,Scarborough,Port Union,43.784535,-79.160497


In [57]:
CLIENT_ID = 'LIPOB1ZHS4HTENRJ41X3B0XIZUMC0NZFOAOKZA200S03RIKJ' # Foursquare ID
CLIENT_SECRET = 'WYWHPQY1TPFAKCJXY2FN4JNEXQOQ2LBDF40FKGA3Z11CSDLW' # Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

In [58]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [59]:
scarborough_venues = getNearbyVenues(names=toronto_data['Neighbourhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Rouge
Malvern
Highland Creek
Rouge Hill
Port Union
Guildwood

Morningside
West Hill
Woburn
Cedarbrae

Scarborough Village
East Birchmount Park

Ionview
Kennedy Park
Clairlea
Golden Mile
Oakridge
Cliffcrest
Cliffside
Scarborough Village West

Birch Cliff
Cliffside West

Dorset Park
Scarborough Town Centre
Wexford Heights
Maryvale
Wexford
Agincourt
Clarks Corners

Sullivan

Tam O'Shanter
Agincourt North
L'Amoreaux East

Milliken
Steeles East

L'Amoreaux West

Upper Rouge


In [60]:
print(scarborough_venues.shape)
scarborough_venues.head()

(197, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rouge,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,Malvern,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
2,Highland Creek,43.784535,-79.160497,RIGHT WAY TO GOLF,43.785177,-79.161108,Golf Course
3,Highland Creek,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
4,Rouge Hill,43.784535,-79.160497,RIGHT WAY TO GOLF,43.785177,-79.161108,Golf Course


In [61]:
# one hot encoding
scarborough_onehot = pd.get_dummies(scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarborough_onehot['Neighborhood'] = scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [scarborough_onehot.columns[-1]] + list(scarborough_onehot.columns[:-1])
scarborough_onehot = scarborough_onehot[fixed_columns]

scarborough_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Café,...,Playground,Rental Car Location,Sandwich Place,Shopping Mall,Skating Rink,Smoke Shop,Soccer Field,Spa,Thai Restaurant,Vietnamese Restaurant
0,Rouge,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Malvern,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Highland Creek,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Highland Creek,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Rouge Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [62]:
scarborough_grouped = scarborough_onehot.groupby('Neighborhood').mean().reset_index()
scarborough_grouped

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Café,...,Playground,Rental Car Location,Sandwich Place,Shopping Mall,Skating Rink,Smoke Shop,Soccer Field,Spa,Thai Restaurant,Vietnamese Restaurant
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,...,0.0,0.0,0.2,0.0,0.2,0.0,0.0,0.0,0.0,0.0
1,Agincourt North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Birch Cliff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,...,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
3,Cedarbrae,0.0,0.142857,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0
4,Clairlea,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.125,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0
5,Clarks Corners,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.090909,0.0,0.090909,0.0,0.0,0.0,0.0,0.090909,0.0
6,Cliffcrest,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Cliffside,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Cliffside West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,...,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
9,Dorset Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857


In [65]:
num_top_venues = 5

for hood in scarborough_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = scarborough_grouped[scarborough_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                venue  freq
0              Lounge   0.2
1      Breakfast Spot   0.2
2        Skating Rink   0.2
3      Sandwich Place   0.2
4  Chinese Restaurant   0.2


----Agincourt North----
                       venue  freq
0                       Park   0.5
1                 Playground   0.5
2        American Restaurant   0.0
3  Latin American Restaurant   0.0
4         Light Rail Station   0.0


----Birch Cliff----
                   venue  freq
0           Skating Rink  0.25
1                   Café  0.25
2  General Entertainment  0.25
3        College Stadium  0.25
4    American Restaurant  0.00


----Cedarbrae
----
                  venue  freq
0                Bakery  0.14
1                  Bank  0.14
2       Thai Restaurant  0.14
3    Athletics & Sports  0.14
4  Caribbean Restaurant  0.14


----Clairlea----
          venue  freq
0        Bakery  0.25
1      Bus Line  0.25
2  Intersection  0.12
3  Soccer Field  0.12
4   Bus Station  0.12


----Clarks Corne

In [66]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [69]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scarborough_grouped['Neighborhood']

for ind in np.arange(scarborough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarborough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Skating Rink,Sandwich Place,Breakfast Spot,Lounge,College Stadium,Grocery Store,Golf Course,General Entertainment,Fried Chicken Joint
1,Agincourt North,Park,Playground,Coffee Shop,Hakka Restaurant,Grocery Store,Golf Course,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
2,Birch Cliff,College Stadium,Skating Rink,General Entertainment,Café,Vietnamese Restaurant,Hakka Restaurant,Grocery Store,Golf Course,Fried Chicken Joint,Fast Food Restaurant
3,Cedarbrae,Athletics & Sports,Bakery,Bank,Hakka Restaurant,Caribbean Restaurant,Fried Chicken Joint,Thai Restaurant,Vietnamese Restaurant,Discount Store,Hobby Shop
4,Clairlea,Bakery,Bus Line,Intersection,Soccer Field,Bus Station,Fast Food Restaurant,Convenience Store,Hakka Restaurant,Grocery Store,Golf Course


#### Clustering Neighborhoods

In [70]:
# set number of clusters
kclusters = 5

scarborough_grouped_clustering = scarborough_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scarborough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 2, 1, 1, 1, 1, 4, 4, 1, 1], dtype=int32)

In [82]:
# add clustering labels
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
#toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
toronto_merged = pd.merge(toronto_merged, neighborhoods_venues_sorted, how='inner', left_on = 'Neighbourhood', right_on = 'Neighborhood')

toronto_merged.head() # check the last columns!



Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,Rouge,43.806686,-79.194353,0,Rouge,Fast Food Restaurant,Vietnamese Restaurant,Thai Restaurant,Hobby Shop,Hakka Restaurant,Grocery Store,Golf Course,General Entertainment,Fried Chicken Joint,Electronics Store
1,M1B,Scarborough,Malvern,43.806686,-79.194353,0,Malvern,Fast Food Restaurant,Vietnamese Restaurant,Thai Restaurant,Hobby Shop,Hakka Restaurant,Grocery Store,Golf Course,General Entertainment,Fried Chicken Joint,Electronics Store
2,M1C,Scarborough,Highland Creek,43.784535,-79.160497,3,Highland Creek,Bar,Golf Course,Vietnamese Restaurant,Convenience Store,Hobby Shop,Hakka Restaurant,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant
3,M1C,Scarborough,Rouge Hill,43.784535,-79.160497,3,Rouge Hill,Bar,Golf Course,Vietnamese Restaurant,Convenience Store,Hobby Shop,Hakka Restaurant,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant
4,M1C,Scarborough,Port Union,43.784535,-79.160497,3,Port Union,Bar,Golf Course,Vietnamese Restaurant,Convenience Store,Hobby Shop,Hakka Restaurant,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant


In [80]:
!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  44.13 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  26.57 MB/s
vincent-0.4.4- 100% |################################| Time: 0:00:00  40.79 MB/s
folium-0.5.0-p 100% |################################| Time: 0:00:00  49.38 MB/s


In [84]:
# create map
map_clusters = folium.Map(location=[43.70011, -79.4163], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters