In [1]:
#!conda install -c conda-forge beautifulsoup4
from bs4 import BeautifulSoup
#!conda install -c conda-forge lxml
import requests
from geopy.geocoders import Nominatim
!conda install -c conda-forge folium
import folium
import pandas as pd
from pandas.io.json import json_normalize

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - folium


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-2.2.2               |           py35_1         462 KB  conda-forge
    openssl-1.0.2r             |       h14c3975_0         3.1 MB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    folium-0.9.1               |             py_0          59 KB  conda-forge
    ca-certificates-2019.3.9   |       hecc5488_0         146 KB  conda-forge
    certifi-2018.8.24          |        py35_1001         139 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         4.0 MB

The following NEW packages will be IN

In [62]:
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

### 1. Scrape the Wikipedia page to get data

In [3]:
source=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup=BeautifulSoup(source,'lxml')
#print(soup.prettify())

In [4]:
table=soup.find('table',class_='wikitable sortable')
data=table.find_all('td')
Postcode=[]
Borough=[]
Neighborhood=[]
for item in data[0::3]:
    Postcode.append(item.text)
for item in data[1::3]:
    try:
        Borough.append(item.a.text)
    except:
        Borough.append(None)
for item in data[2::3]:
    try:
        Neighborhood.append(item.a.text)
    except:
        Neighborhood.append(None)

### 2. Create the dataframe

In [5]:
df_data={'Postcode':Postcode,'Borough':Borough,'Neighborhood':Neighborhood}
df=pd.DataFrame(df_data,columns=['Postcode','Borough','Neighborhood'])
df.shape

(288, 3)

#### Remove cells with a borough that is Not assigned

In [6]:
df.dropna(subset=['Borough'],inplace=True)
df.reset_index(drop=True,inplace=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


#### Assign neighborhoods that are Not assigned to their borough

In [7]:
missing_index=df[df['Neighborhood'].isna()].index
df.loc[missing_index,'Neighborhood']=df.loc[missing_index,'Borough']
df.head(12)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,Queen's Park
7,M9A,Etobicoke,Islington Avenue
8,M1B,Scarborough,Rouge
9,M1B,Scarborough,Malvern


#### Combine multiple neighborhoods with the same postcode togeter, separated with ',' 

In [8]:
df.drop_duplicates(inplace=True)
Toronto_Neighborhoods=df.groupby(['Postcode','Borough'])['Neighborhood'].apply(lambda x: ', '.join(x)).to_frame().reset_index()
Toronto_Neighborhoods.head(20)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Scarborough, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Scarborough
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Scarborough, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough"
9,M1N,Scarborough,"Birch Cliff, Scarborough"


In [9]:
Toronto_Neighborhoods.shape

(100, 3)

#### Get the location information (latitude, longitude)

In [10]:
df_geo=pd.read_csv('http://cocl.us/Geospatial_data')
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
df_Toronto_Neighborhoods=pd.merge(Toronto_Neighborhoods,df_geo,left_on='Postcode',right_on='Postal Code')
df_Toronto_Neighborhoods.drop(columns=['Postal Code'],inplace=True)
df_Toronto_Neighborhoods.head(12)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Scarborough, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Scarborough,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Scarborough, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Scarborough",43.692657,-79.264848


### 3. Visualize Toronto neighborhoods on map

In [12]:
address = 'Toronto, ON'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


#### Select boroughs that have 'Toronto' in their names

In [13]:
Toronto=df_Toronto_Neighborhoods[df_Toronto_Neighborhoods.Borough.str.contains('Toronto')].reset_index(drop=True)
Toronto.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"East Toronto, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"East Toronto, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,East Toronto,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [14]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(Toronto['Latitude'], Toronto['Longitude'], Toronto['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### 4.  Find venues in Toronto neighborhoods from Foursquare

In [None]:
# The code was removed by Watson Studio for sharing.

In [None]:
# function that extracts the category of the venue
def get_category_type(row):
    
    categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [17]:
# function that gets the nearby venues of a given location 
def getNearbyVenues(names, latitudes, longitudes, radius=500,LIMIT=100):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [18]:
# Note in Toronto dataframe, there are duplicated neighborhood names, i.e. there are five different postcodes all named "Downtown Toronto"
duplicated=Toronto[Toronto.duplicated(subset='Neighborhood')]
# Rename the duplicated neighborhood with numbers
for i in duplicated.index:
    Toronto.loc[i,'Neighborhood']=Toronto.loc[i,'Neighborhood']+'_'+Toronto.loc[i,'Postcode']
Toronto.duplicated(subset='Neighborhood').sum()

0

In [20]:
# Get nearby venues for each neighborhood
toronto_venues = getNearbyVenues(names=Toronto['Neighborhood'],
                                 latitudes=Toronto['Latitude'],
                                 longitudes=Toronto['Longitude']
                                )
toronto_venues.head()

The Beaches
East Toronto, Riverdale
East Toronto, India Bazaar
East Toronto
Lawrence Park
Central Toronto
Central Toronto_M4R
Central Toronto_M4S
Moore Park, Central Toronto
Deer Park, Central Toronto, Rathnelly, South Hill
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Downtown Toronto
St. James Town
Berczy Park
Downtown Toronto_M5G
Downtown Toronto_M5H
Downtown Toronto, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Downtown Toronto
Central Toronto_M5N
Forest Hill North, Central Toronto
The Annex, Central Toronto, Yorkville
Downtown Toronto, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Downtown Toronto, King and Spadina, Railway Lands, South Niagara
Downtown Toronto_M5W
First Canadian Place, Underground city
Downtown Toronto_M6G
Dovercourt Village, West Toronto
Little Portugal, Trinity
West Toronto, Exhibition Place, Parkdale Village
High Park, West Toronto
Parkdale, Roncesvalle

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"East Toronto, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


### 5. Cluster Toronto neighborhoods by the venues

In [21]:
toronto_venues.groupby('Neighborhood')['Venue'].count().sort_values()

Neighborhood
Central Toronto_M5N                                                             2
Moore Park, Central Toronto                                                     2
Lawrence Park                                                                   3
Forest Hill North, Central Toronto                                              4
The Beaches                                                                     4
Rosedale                                                                        4
Central Toronto                                                                 9
CN Tower, Downtown Toronto, King and Spadina, Railway Lands, South Niagara     14
Deer Park, Central Toronto, Rathnelly, South Hill                              14
Downtown Toronto_M6G                                                           15
Parkdale, Roncesvalles                                                         15
Central Toronto_M4R                                                            16
Eas

#### Cluster neighborhoods that have more than 10 venues, and put neighborhoods that have 10 or less venues into one cluster

In [22]:
x=(toronto_venues.groupby('Neighborhood')['Venue'].count()<=10).sum()
low_venue_neighborhood=toronto_venues.groupby('Neighborhood')['Venue'].count().sort_values().head(x)
toronto_venues_new=toronto_venues[toronto_venues['Neighborhood'].apply(lambda x: x not in low_venue_neighborhood.index)].reset_index(drop=True)
toronto_venues_new.groupby('Neighborhood')['Venue'].count().sort_values()

Neighborhood
Deer Park, Central Toronto, Rathnelly, South Hill                              14
CN Tower, Downtown Toronto, King and Spadina, Railway Lands, South Niagara     14
Parkdale, Roncesvalles                                                         15
Downtown Toronto_M6G                                                           15
Central Toronto_M4R                                                            16
East Toronto, India Bazaar                                                     18
East Toronto_M7Y                                                               19
Dovercourt Village, West Toronto                                               20
West Toronto, Exhibition Place, Parkdale Village                               21
High Park, West Toronto                                                        23
The Annex, Central Toronto, Yorkville                                          23
Downtown Toronto, University of Toronto                                        34
Cen

In [23]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues_new[['Venue Category']], prefix="", prefix_sep="")

# add neighborhoods column back to dataframe, note there is a category called "Neighborhood"!!
toronto_onehot['Neighborhoods'] = toronto_venues_new['Neighborhood'] 

# move neighborhoods column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot

Unnamed: 0,Neighborhoods,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"East Toronto, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"East Toronto, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"East Toronto, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"East Toronto, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"East Toronto, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,"East Toronto, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,"East Toronto, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,"East Toronto, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,"East Toronto, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9,"East Toronto, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
toronto_grouped = toronto_onehot.groupby('Neighborhoods').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhoods,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"CN Tower, Downtown Toronto, King and Spadina, ...",0.0,0.0,0.0,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Central Toronto_M4R,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625
4,Central Toronto_M4S,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.06,0.0,0.0,0.03,0.01,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.011494,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.011494,0.0,0.011494,0.0,0.011494,0.0,0.011494
7,"Commerce Court, Downtown Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0
8,"Deer Park, Central Toronto, Rathnelly, South Hill",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0
9,"Design Exchange, Toronto Dominion Centre",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues

In [25]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhoods']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhoods'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1        Cocktail Bar  0.05
2  Seafood Restaurant  0.04
3              Bakery  0.04
4          Steakhouse  0.04


----CN Tower, Downtown Toronto, King and Spadina, Railway Lands, South Niagara----
              venue  freq
0   Airport Service  0.14
1  Airport Terminal  0.14
2    Airport Lounge  0.14
3          Boutique  0.07
4   Harbor / Marina  0.07


----Cabbagetown, St. James Town----
         venue  freq
0  Coffee Shop  0.09
1   Restaurant  0.07
2         Park  0.04
3       Bakery  0.04
4         Café  0.04


----Central Toronto_M4R----
          venue  freq
0   Coffee Shop  0.12
1   Yoga Studio  0.06
2    Bagel Shop  0.06
3  Dessert Shop  0.06
4         Diner  0.06


----Central Toronto_M4S----
             venue  freq
0      Pizza Place  0.11
1     Dessert Shop  0.08
2   Sandwich Place  0.08
3      Coffee Shop  0.05
4  Thai Restaurant  0.05


----Chinatown, Grange Park, Kensington Market----
             

In [26]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Find top 10 most common venues for each neighborhood

In [27]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhoods']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhoods'] = toronto_grouped['Neighborhoods']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Farmers Market,Bakery,Italian Restaurant,Steakhouse,Beer Bar,Seafood Restaurant,Café,Cheese Shop
1,"CN Tower, Downtown Toronto, King and Spadina, ...",Airport Terminal,Airport Service,Airport Lounge,Boat or Ferry,Airport Gate,Sculpture Garden,Plane,Airport Food Court,Airport,Boutique
2,"Cabbagetown, St. James Town",Coffee Shop,Restaurant,Park,Café,Italian Restaurant,Bakery,Pub,Pizza Place,Pharmacy,Breakfast Spot
3,Central Toronto_M4R,Coffee Shop,Yoga Studio,Bagel Shop,Fast Food Restaurant,Mexican Restaurant,Diner,Dessert Shop,Park,Clothing Store,Chinese Restaurant
4,Central Toronto_M4S,Pizza Place,Sandwich Place,Dessert Shop,Thai Restaurant,Sushi Restaurant,Italian Restaurant,Restaurant,Café,Coffee Shop,Gourmet Shop


#### Run k-means to cluster the neighborhoods

In [28]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhoods', 1)

# run k-means clustering
kmeans = KMeans(init='k-means++',n_clusters=kclusters, n_init=20,random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([1, 2, 1, 1, 1, 4, 1, 1, 0, 1, 4, 1, 1, 4, 1, 1, 1, 3, 1, 4, 1, 4, 1,
       1, 4, 4, 1, 1, 1, 1, 1], dtype=int32)

In [38]:
# add clustering labels
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

# merge two dataframes to add latitude/longitude for each labeled neighborhood
toronto_merged = pd.merge(Toronto,neighborhoods_venues_sorted,left_on='Neighborhood',right_on='Neighborhoods') 
toronto_merged.drop(columns=['Neighborhoods'],inplace=True)
toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4K,East Toronto,"East Toronto, Riverdale",43.679557,-79.352188,1,Greek Restaurant,Coffee Shop,Ice Cream Shop,Italian Restaurant,Furniture / Home Store,Yoga Studio,Bookstore,Brewery,Bubble Tea Shop,Café
1,M4L,East Toronto,"East Toronto, India Bazaar",43.668999,-79.315572,4,Park,Pizza Place,Sushi Restaurant,Ice Cream Shop,Italian Restaurant,Fish & Chips Shop,Liquor Store,Fast Food Restaurant,Movie Theater,Pet Store
2,M4M,East Toronto,East Toronto,43.659526,-79.340923,1,Café,Coffee Shop,Gastropub,Italian Restaurant,Bakery,American Restaurant,Yoga Studio,Convenience Store,Brewery,Seafood Restaurant
3,M4R,Central Toronto,Central Toronto_M4R,43.715383,-79.405678,1,Coffee Shop,Yoga Studio,Bagel Shop,Fast Food Restaurant,Mexican Restaurant,Diner,Dessert Shop,Park,Clothing Store,Chinese Restaurant
4,M4S,Central Toronto,Central Toronto_M4S,43.704324,-79.38879,1,Pizza Place,Sandwich Place,Dessert Shop,Thai Restaurant,Sushi Restaurant,Italian Restaurant,Restaurant,Café,Coffee Shop,Gourmet Shop


In [72]:
# add the neighborhoods with number of venues equal to or less than 10, label them as 5
kclusters=kclusters+1
toronto_low_venues=Toronto[Toronto['Neighborhood'].apply(lambda x: x in low_venue_neighborhood.index)].reset_index(drop=True)
toronto_low_venues.insert(5,'Cluster Labels',np.full(len(low_venue_neighborhood.index),5))
toronto_neighborhoods=pd.merge(toronto_merged,toronto_low_venues,how='outer',on=['Postcode','Borough','Neighborhood','Latitude','Longitude','Cluster Labels'])
toronto_neighborhoods

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4K,East Toronto,"East Toronto, Riverdale",43.679557,-79.352188,1,Greek Restaurant,Coffee Shop,Ice Cream Shop,Italian Restaurant,Furniture / Home Store,Yoga Studio,Bookstore,Brewery,Bubble Tea Shop,Café
1,M4L,East Toronto,"East Toronto, India Bazaar",43.668999,-79.315572,4,Park,Pizza Place,Sushi Restaurant,Ice Cream Shop,Italian Restaurant,Fish & Chips Shop,Liquor Store,Fast Food Restaurant,Movie Theater,Pet Store
2,M4M,East Toronto,East Toronto,43.659526,-79.340923,1,Café,Coffee Shop,Gastropub,Italian Restaurant,Bakery,American Restaurant,Yoga Studio,Convenience Store,Brewery,Seafood Restaurant
3,M4R,Central Toronto,Central Toronto_M4R,43.715383,-79.405678,1,Coffee Shop,Yoga Studio,Bagel Shop,Fast Food Restaurant,Mexican Restaurant,Diner,Dessert Shop,Park,Clothing Store,Chinese Restaurant
4,M4S,Central Toronto,Central Toronto_M4S,43.704324,-79.38879,1,Pizza Place,Sandwich Place,Dessert Shop,Thai Restaurant,Sushi Restaurant,Italian Restaurant,Restaurant,Café,Coffee Shop,Gourmet Shop
5,M4V,Central Toronto,"Deer Park, Central Toronto, Rathnelly, South Hill",43.686412,-79.400049,0,Coffee Shop,Pub,Light Rail Station,Fried Chicken Joint,Bagel Shop,Sports Bar,American Restaurant,Sushi Restaurant,Supermarket,Pizza Place
6,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675,1,Coffee Shop,Restaurant,Park,Café,Italian Restaurant,Bakery,Pub,Pizza Place,Pharmacy,Breakfast Spot
7,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,1,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Restaurant,Gay Bar,Burger Joint,Pub,Café,Gym,Bubble Tea Shop
8,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,1,Coffee Shop,Pub,Bakery,Park,Mexican Restaurant,Restaurant,Breakfast Spot,Café,Theater,Hotel
9,M5B,Downtown Toronto,Downtown Toronto,43.657162,-79.378937,1,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Middle Eastern Restaurant,Bakery,Tea Room,Bubble Tea Shop,Pizza Place,Diner


#### Visualize the clustered Toronto neighborhoods on map

In [73]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_neighborhoods['Latitude'], toronto_neighborhoods['Longitude'], toronto_neighborhoods['Neighborhood'], 
                                  toronto_neighborhoods['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters