# Segmenting and Clustering Neighborhoods in Toronto

In [None]:
import pandas as pd
import requests
#!conda install -c conda-forge lxml --yes
import lxml

# Part 1: Create the DataFrame

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

In [3]:
r = requests.get(url)

## 1. Scraping Toronto data with the help of Pandas library

In [4]:
dataframes=pd.read_html(r.text, header=0)

## 2. Get the DataFrame with the Toronto neighborhood data

In [5]:
toronto = dataframes[0]

In [6]:
toronto.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


## 3. Drop all rows with not assigned boroughs & Not assigned neighborhoods will renamed after their corresponding Borough

In [7]:
if len(toronto.loc[toronto.Borough == 'Not assigned']) == 0:
    print('None neighboorhood listed as non assigned after filtering')
else:
    print('Let\'s Drop {} non assigned Boroughs in Toronto'.format(len(toronto.loc[toronto.Borough == 'Not assigned'])))
    toronto.drop(toronto.loc[toronto.Borough == 'Not assigned'].index, inplace = True)
    toronto.reset_index(inplace = True, drop = True)
    

Let's Drop 77 non assigned Boroughs in Toronto


In [8]:
if len(toronto.loc[toronto.Neighbourhood == 'Not assigned']) == 0:
    print('None neighborhood listed as non assigned after filtering')
    



None neighborhood listed as non assigned after filtering


In [9]:
toronto.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [10]:
toronto.shape

(103, 3)

## 4. Combine Neighborhoods with the same Postal Code

### This is already fixed on the DataFrame 

## 5. Print  the shape of the DataFrame

In [11]:
toronto.shape

(103, 3)

# Part 2: Toronto Neighborhoods Location 

In [12]:
#!conda install -c conda-forge geopy --yes
#!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library
from geopy.geocoders import Nominatim 


In [13]:
#!wget 'https://cocl.us/Geospatial_data' -O 'toronto_coordinates.csv' 
print('Done')

Done


In [14]:
coord = pd.read_csv('toronto_coordinates.csv')

## Merge toronto data with the coordinates by doing an inner join on the dataframes

In [15]:
toronto_df = toronto.merge(coord, how = 'inner', left_on = 'Postal Code', right_on = 'Postal Code')

In [16]:
toronto_df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


# Part 3: Explore and cluster the neighborhoods in Toronto

In [17]:
## Explore Toronto's Neighborhoods

address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent = 'Toronto')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [18]:
# create a Map of Toronto with the surrounding neighborhoods

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#E52B50',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Foursquare credentials

In [19]:
CLIENT_ID = '0WC5F0U5FBMNZZTRU2JNK2DPMXAMCZIALHOKO5D3GLPBW3Q3' # your Foursquare ID
CLIENT_SECRET = 'TPBRN5MV3PU5M3TFKE40KGHFBSDLZB5ZABQZLVLABPX3MRQ1' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 0WC5F0U5FBMNZZTRU2JNK2DPMXAMCZIALHOKO5D3GLPBW3Q3
CLIENT_SECRET:TPBRN5MV3PU5M3TFKE40KGHFBSDLZB5ZABQZLVLABPX3MRQ1


In [20]:
### Pick all boroughs that contain the word Toronto

tor_data = toronto_df.loc[toronto_df['Borough'].str.contains('Toronto')].reset_index(drop = True)

In [21]:
tor_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [22]:
tor_data.Borough.unique()

array(['Downtown Toronto', 'East Toronto', 'West Toronto',
       'Central Toronto'], dtype=object)

4 Boroughs were selected by this filtering

In [23]:
radius = 500
limit = 100

In [73]:
def getNearbyVenues(names, neigh, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, n, lat, lng in zip(names, neigh, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            n,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Neighbourhood',
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [75]:
toronto_venues = getNearbyVenues(names=tor_data['Borough'],
                                   neigh = tor_data['Neighbourhood'],
                                   latitudes=tor_data['Latitude'],
                                   longitudes=tor_data['Longitude']
                                  )

Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
East Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
West Toronto
Downtown Toronto
West Toronto
East Toronto
Downtown Toronto
West Toronto
East Toronto
Downtown Toronto
East Toronto
Central Toronto
Central Toronto
Central Toronto
Central Toronto
West Toronto
Central Toronto
Central Toronto
West Toronto
Central Toronto
Downtown Toronto
West Toronto
Central Toronto
Downtown Toronto
Central Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
East Toronto


In [76]:
toronto_venues.shape

(1644, 8)

In [77]:
toronto_venues.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


In [79]:
#Venues per Borough
toronto_venues.groupby('Neighbourhood').count().Venue

Neighbourhood
Berczy Park                                                                                                    59
Brockton, Parkdale Village, Exhibition Place                                                                   23
Business reply mail Processing Centre, South Central Letter Processing Plant Toronto                           19
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport     17
Central Bay Street                                                                                             65
Christie                                                                                                       17
Church and Wellesley                                                                                           76
Commerce Court, Victoria Hotel                                                                                100
Davisville                                                                

In [80]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 233 uniques categories.


### Let's create one hot encodings for the venues

In [86]:

toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")


toronto_onehot['Borough'] = toronto_venues['Borough'] 
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 


fixed_columns = [toronto_onehot.columns[-2]] + [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Borough,Neighbourhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Borough.1
0,Downtown Toronto,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto
1,Downtown Toronto,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto
2,Downtown Toronto,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto
3,Downtown Toronto,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto
4,Downtown Toronto,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto


In [87]:
toronto_freq = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_freq

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.058824,0.058824,0.058824,0.117647,0.117647,0.117647,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.015385,0.0,0.0,0.015385,0.0,0.0,0.015385
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.013158,0.0,0.0,0.0,0.0,0.0,0.0,0.013158,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Find the top 5 venues per borough

In [88]:
tor_transposed = toronto_freq.iloc[:, 1:].T

In [92]:
tor_transposed.shape

(233, 39)

In [93]:
toronto.Neighbourhood.shape

(103,)

In [94]:
for i, bor in enumerate(toronto_freq.Neighbourhood, 0):
    print('For neighborhood {} the 5 most common venues and their frequencies are:'.format(bor))
    print('------------')
    print(tor_transposed[i].nlargest(5))
    print('------------')
    
    

For neighborhood Berczy Park the 5 most common venues and their frequencies are:
------------
Coffee Shop    0.084746
Bakery         0.033898
Beer Bar       0.033898
Café           0.033898
Cheese Shop    0.033898
Name: 0, dtype: float64
------------
For neighborhood Brockton, Parkdale Village, Exhibition Place the 5 most common venues and their frequencies are:
------------
Café              0.130435
Breakfast Spot    0.086957
Coffee Shop       0.086957
Bakery            0.043478
Bar               0.043478
Name: 1, dtype: float64
------------
For neighborhood Business reply mail Processing Centre, South Central Letter Processing Plant Toronto the 5 most common venues and their frequencies are:
------------
Light Rail Station    0.105263
Auto Workshop         0.052632
Brewery               0.052632
Burrito Place         0.052632
Butcher               0.052632
Name: 2, dtype: float64
------------
For neighborhood CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Qua

In [108]:
def common_venues(data, num_venues):
    cols = ['Neighbourhood']
    
    for i in range(num_venues):
        cols.append('CommonVenue' + str(i + 1))
        
    df = pd.DataFrame(columns = cols)
    row = []
    
    mydict = {}
    for i, bor in enumerate(toronto_freq.Neighbourhood, 0):
        row.append(bor)
        row.extend(data[i].nlargest(num_venues).index.to_list())
        
        for col, r in zip(cols, row):
            mydict[col] = r
        
        df = df.append(mydict, ignore_index = True)
        row = [] 
    
    
    return df
    

In [120]:
commondf = common_venues(tor_transposed, 15)

In [121]:
commondf

Unnamed: 0,Neighbourhood,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,CommonVenue6,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15
0,Berczy Park,Coffee Shop,Bakery,Beer Bar,Café,Cheese Shop,Cocktail Bar,Farmers Market,Restaurant,Seafood Restaurant,Art Gallery,BBQ Joint,Bagel Shop,Basketball Stadium,Beach,Bistro
1,"Brockton, Parkdale Village, Exhibition Place",Café,Breakfast Spot,Coffee Shop,Bakery,Bar,Burrito Place,Climbing Gym,Convenience Store,Furniture / Home Store,Grocery Store,Gym,Intersection,Italian Restaurant,Nightclub,Performing Arts Venue
2,"Business reply mail Processing Centre, South C...",Light Rail Station,Auto Workshop,Brewery,Burrito Place,Butcher,Comic Shop,Farmers Market,Fast Food Restaurant,Garden,Garden Center,Gym / Fitness Center,Park,Pizza Place,Recording Studio,Restaurant
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Airport Terminal,Airport,Airport Food Court,Airport Gate,Bar,Boat or Ferry,Boutique,Coffee Shop,Harbor / Marina,Plane,Rental Car Location,Sculpture Garden,Afghan Restaurant
4,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Japanese Restaurant,Sandwich Place,Bar,Bubble Tea Shop,Burger Joint,Department Store,Salad Place,Thai Restaurant,Art Museum,Comic Shop,Dessert Shop,Diner
5,Christie,Grocery Store,Café,Park,Athletics & Sports,Baby Store,Candy Store,Coffee Shop,Diner,Italian Restaurant,Nightclub,Restaurant,Afghan Restaurant,Airport,Airport Food Court,Airport Gate
6,Church and Wellesley,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Bubble Tea Shop,Hotel,Mediterranean Restaurant,Men's Store,Pub,Yoga Studio,Afghan Restaurant,American Restaurant,Arts & Crafts Store,Beer Bar
7,"Commerce Court, Victoria Hotel",Coffee Shop,Café,Restaurant,Hotel,American Restaurant,Gym,Deli / Bodega,Italian Restaurant,Japanese Restaurant,Seafood Restaurant,Beer Bar,Cocktail Bar,Gastropub,Tea Room,Thai Restaurant
8,Davisville,Dessert Shop,Pizza Place,Sandwich Place,Café,Coffee Shop,Gym,Italian Restaurant,Sushi Restaurant,Brewery,Diner,Farmers Market,Gas Station,Gourmet Shop,Greek Restaurant,Indian Restaurant
9,Davisville North,Breakfast Spot,Department Store,Food & Drink Shop,Gym,Gym / Fitness Center,Hotel,Park,Pizza Place,Sandwich Place,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service


# Perform Clustering

In [122]:
from sklearn.cluster import KMeans

In [123]:
toronto_freq

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.058824,0.058824,0.058824,0.117647,0.117647,0.117647,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.015385,0.0,0.0,0.015385,0.0,0.0,0.015385
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.013158,0.0,0.0,0.0,0.0,0.0,0.0,0.013158,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [124]:
# set number of clusters
clusters = 5

toronto_clust = toronto_freq.drop('Neighbourhood', axis = 1)

# run k-means clustering
kmeans = KMeans(n_clusters = clusters, random_state=0).fit(toronto_clust)


kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [125]:
commondf['clusters'] = kmeans.labels_

In [126]:
commondf

Unnamed: 0,Neighbourhood,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,CommonVenue6,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15,clusters
0,Berczy Park,Coffee Shop,Bakery,Beer Bar,Café,Cheese Shop,Cocktail Bar,Farmers Market,Restaurant,Seafood Restaurant,Art Gallery,BBQ Joint,Bagel Shop,Basketball Stadium,Beach,Bistro,0
1,"Brockton, Parkdale Village, Exhibition Place",Café,Breakfast Spot,Coffee Shop,Bakery,Bar,Burrito Place,Climbing Gym,Convenience Store,Furniture / Home Store,Grocery Store,Gym,Intersection,Italian Restaurant,Nightclub,Performing Arts Venue,0
2,"Business reply mail Processing Centre, South C...",Light Rail Station,Auto Workshop,Brewery,Burrito Place,Butcher,Comic Shop,Farmers Market,Fast Food Restaurant,Garden,Garden Center,Gym / Fitness Center,Park,Pizza Place,Recording Studio,Restaurant,0
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Airport Terminal,Airport,Airport Food Court,Airport Gate,Bar,Boat or Ferry,Boutique,Coffee Shop,Harbor / Marina,Plane,Rental Car Location,Sculpture Garden,Afghan Restaurant,0
4,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Japanese Restaurant,Sandwich Place,Bar,Bubble Tea Shop,Burger Joint,Department Store,Salad Place,Thai Restaurant,Art Museum,Comic Shop,Dessert Shop,Diner,0
5,Christie,Grocery Store,Café,Park,Athletics & Sports,Baby Store,Candy Store,Coffee Shop,Diner,Italian Restaurant,Nightclub,Restaurant,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,0
6,Church and Wellesley,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Bubble Tea Shop,Hotel,Mediterranean Restaurant,Men's Store,Pub,Yoga Studio,Afghan Restaurant,American Restaurant,Arts & Crafts Store,Beer Bar,0
7,"Commerce Court, Victoria Hotel",Coffee Shop,Café,Restaurant,Hotel,American Restaurant,Gym,Deli / Bodega,Italian Restaurant,Japanese Restaurant,Seafood Restaurant,Beer Bar,Cocktail Bar,Gastropub,Tea Room,Thai Restaurant,0
8,Davisville,Dessert Shop,Pizza Place,Sandwich Place,Café,Coffee Shop,Gym,Italian Restaurant,Sushi Restaurant,Brewery,Diner,Farmers Market,Gas Station,Gourmet Shop,Greek Restaurant,Indian Restaurant,0
9,Davisville North,Breakfast Spot,Department Store,Food & Drink Shop,Gym,Gym / Fitness Center,Hotel,Park,Pizza Place,Sandwich Place,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,0


In [127]:
toronto_final = tor_data

toronto_final = toronto_final.join(commondf.set_index('Neighbourhood'), on='Neighbourhood')
toronto_final.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,...,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15,clusters
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Coffee Shop,Bakery,Park,Pub,Breakfast Spot,...,Theater,Antique Shop,Art Gallery,Asian Restaurant,Bank,Beer Store,Chocolate Shop,Dessert Shop,Distribution Center,0
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Coffee Shop,Diner,Arts & Crafts Store,Bank,Bar,...,Burrito Place,Café,College Auditorium,College Cafeteria,Creperie,Distribution Center,Fried Chicken Joint,General Entertainment,Gym,0
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,Clothing Store,Coffee Shop,Bubble Tea Shop,Café,Cosmetics Shop,...,Italian Restaurant,Japanese Restaurant,Bookstore,Electronics Store,Fast Food Restaurant,Lingerie Store,Middle Eastern Restaurant,Pizza Place,Plaza,0
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,Café,Coffee Shop,Clothing Store,American Restaurant,Cocktail Bar,...,Restaurant,Beer Bar,Breakfast Spot,Creperie,Department Store,Farmers Market,Gastropub,Gym,Hotel,0
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,Coffee Shop,Health Food Store,Neighborhood,Pub,Trail,...,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,0


In [128]:
toronto_final.shape

(39, 21)

In [129]:
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

In [130]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(clusters)
ys = [i + x + (i*x)**2 for i in range(clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_final['Latitude'], toronto_final['Longitude'], toronto_final['Borough'], toronto_final['clusters']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine the clusters

In [131]:
toronto_final

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,...,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15,clusters
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Coffee Shop,Bakery,Park,Pub,Breakfast Spot,...,Theater,Antique Shop,Art Gallery,Asian Restaurant,Bank,Beer Store,Chocolate Shop,Dessert Shop,Distribution Center,0
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Coffee Shop,Diner,Arts & Crafts Store,Bank,Bar,...,Burrito Place,Café,College Auditorium,College Cafeteria,Creperie,Distribution Center,Fried Chicken Joint,General Entertainment,Gym,0
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,Clothing Store,Coffee Shop,Bubble Tea Shop,Café,Cosmetics Shop,...,Italian Restaurant,Japanese Restaurant,Bookstore,Electronics Store,Fast Food Restaurant,Lingerie Store,Middle Eastern Restaurant,Pizza Place,Plaza,0
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,Café,Coffee Shop,Clothing Store,American Restaurant,Cocktail Bar,...,Restaurant,Beer Bar,Breakfast Spot,Creperie,Department Store,Farmers Market,Gastropub,Gym,Hotel,0
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,Coffee Shop,Health Food Store,Neighborhood,Pub,Trail,...,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,0
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,Coffee Shop,Bakery,Beer Bar,Café,Cheese Shop,...,Farmers Market,Restaurant,Seafood Restaurant,Art Gallery,BBQ Joint,Bagel Shop,Basketball Stadium,Beach,Bistro,0
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,Coffee Shop,Italian Restaurant,Café,Japanese Restaurant,Sandwich Place,...,Bubble Tea Shop,Burger Joint,Department Store,Salad Place,Thai Restaurant,Art Museum,Comic Shop,Dessert Shop,Diner,0
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564,Grocery Store,Café,Park,Athletics & Sports,Baby Store,...,Coffee Shop,Diner,Italian Restaurant,Nightclub,Restaurant,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,0
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,Coffee Shop,Café,Clothing Store,Hotel,Restaurant,...,Gym,Thai Restaurant,American Restaurant,Bakery,Bar,Bookstore,Concert Hall,Cosmetics Shop,Pizza Place,0
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,Bakery,Pharmacy,Bank,Bar,Brewery,...,Grocery Store,Middle Eastern Restaurant,Music Venue,Park,Supermarket,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,0


In [132]:
# Cluster 1
toronto_final.loc[toronto_final['clusters'] == 0, toronto_final.columns[[1] + [2] + list(range(5, toronto_final.shape[1] - 1))]]

Unnamed: 0,Borough,Neighbourhood,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,CommonVenue6,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15
0,Downtown Toronto,"Regent Park, Harbourfront",Coffee Shop,Bakery,Park,Pub,Breakfast Spot,Café,Theater,Antique Shop,Art Gallery,Asian Restaurant,Bank,Beer Store,Chocolate Shop,Dessert Shop,Distribution Center
1,Downtown Toronto,"Queen's Park, Ontario Provincial Government",Coffee Shop,Diner,Arts & Crafts Store,Bank,Bar,Beer Bar,Burrito Place,Café,College Auditorium,College Cafeteria,Creperie,Distribution Center,Fried Chicken Joint,General Entertainment,Gym
2,Downtown Toronto,"Garden District, Ryerson",Clothing Store,Coffee Shop,Bubble Tea Shop,Café,Cosmetics Shop,Hotel,Italian Restaurant,Japanese Restaurant,Bookstore,Electronics Store,Fast Food Restaurant,Lingerie Store,Middle Eastern Restaurant,Pizza Place,Plaza
3,Downtown Toronto,St. James Town,Café,Coffee Shop,Clothing Store,American Restaurant,Cocktail Bar,Cosmetics Shop,Restaurant,Beer Bar,Breakfast Spot,Creperie,Department Store,Farmers Market,Gastropub,Gym,Hotel
4,East Toronto,The Beaches,Coffee Shop,Health Food Store,Neighborhood,Pub,Trail,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium
5,Downtown Toronto,Berczy Park,Coffee Shop,Bakery,Beer Bar,Café,Cheese Shop,Cocktail Bar,Farmers Market,Restaurant,Seafood Restaurant,Art Gallery,BBQ Joint,Bagel Shop,Basketball Stadium,Beach,Bistro
6,Downtown Toronto,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Japanese Restaurant,Sandwich Place,Bar,Bubble Tea Shop,Burger Joint,Department Store,Salad Place,Thai Restaurant,Art Museum,Comic Shop,Dessert Shop,Diner
7,Downtown Toronto,Christie,Grocery Store,Café,Park,Athletics & Sports,Baby Store,Candy Store,Coffee Shop,Diner,Italian Restaurant,Nightclub,Restaurant,Afghan Restaurant,Airport,Airport Food Court,Airport Gate
8,Downtown Toronto,"Richmond, Adelaide, King",Coffee Shop,Café,Clothing Store,Hotel,Restaurant,Deli / Bodega,Gym,Thai Restaurant,American Restaurant,Bakery,Bar,Bookstore,Concert Hall,Cosmetics Shop,Pizza Place
9,West Toronto,"Dufferin, Dovercourt Village",Bakery,Pharmacy,Bank,Bar,Brewery,Café,Grocery Store,Middle Eastern Restaurant,Music Venue,Park,Supermarket,Afghan Restaurant,Airport,Airport Food Court,Airport Gate


In [133]:
# Cluster 2
toronto_final.loc[toronto_final['clusters'] == 1, toronto_final.columns[[1] + [2] + list(range(5, toronto_final.shape[1] - 1))]]

Unnamed: 0,Borough,Neighbourhood,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,CommonVenue6,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15
29,Central Toronto,"Moore Park, Summerhill East",Trail,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant


In [134]:
# Cluster 3
toronto_final.loc[toronto_final['clusters'] == 2, toronto_final.columns[[1] + [2] + list(range(5, toronto_final.shape[1] - 1))]]

Unnamed: 0,Borough,Neighbourhood,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,CommonVenue6,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15
19,Central Toronto,Roselawn,Garden,Home Service,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store


In [135]:
# Cluster 4
toronto_final.loc[toronto_final['clusters'] == 3, toronto_final.columns[[1] + [2] +  list(range(5, toronto_final.shape[1] - 1))]]

Unnamed: 0,Borough,Neighbourhood,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,CommonVenue6,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15
21,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",Bus Line,Jewelry Store,Park,Sushi Restaurant,Trail,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium


In [136]:
# Cluster 5
toronto_final.loc[toronto_final['clusters'] == 4, toronto_final.columns[[1] + [2] +  list(range(5, toronto_final.shape[1] - 1))]]

Unnamed: 0,Borough,Neighbourhood,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,CommonVenue6,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15
18,Central Toronto,Lawrence Park,Park,Bus Line,Swim School,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum
33,Downtown Toronto,Rosedale,Park,Playground,Trail,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum


As it is shown in cluster 1 most of bakeries, restaurants and coffe shops are clustered together. While the rest of the clusters are less populated and since most of them include venues as airport they could be clustered together into one