<h1 align='center' style="color:#e3453a">Segmenting and Clustering Neighborhoods in Toronto</h1>

### PART 1:

#### Importing libraries, packages, modules

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium
from sklearn.cluster import KMeans

#### Web-Scraping

Extracting required data from the Wikipedia Page and converting it into a Dataframe<br>
<a href='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'>List of Postal Codes of Canada</a>

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table', attrs={'class':'wikitable sortable'})
temp_df = pd.read_html(str(table))
df = pd.DataFrame([])
df['Postal Code'] = temp_df[0]['Postal Code']
df['Borough'] = temp_df[0]['Borough']
df['Neighborhood'] = temp_df[0]['Neighborhood']
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


#### Data Cleaning

Ignore cells with 'Borough' that are <i><b>Not assigned</b></i>

In [3]:
df = df[df['Borough'] != 'Not assigned'].reset_index(drop=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


Replace <b>Not assigned</b> 'Neigborhood' with Borough Name

In [4]:
for i in range(len(df)):
    if df.loc[i, 'Neighborhood'] == 'Not assigned':
        df.loc[i, 'Neighborhood'] = df.loc[i, 'Borough']
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


Checking the shape of the cleaned dataframe

In [5]:
df.shape

(103, 3)

### Part 2:

Load the Coordinates of each Postal Code from the csv file

In [6]:
coords = pd.read_csv("Downloads/Geospatial_Coordinates.csv")
coords.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Merge 'df' and 'coords' to get the coordinates

In [7]:
df = df.merge(coords, on='Postal Code', how='left')
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### Part 3:

Get location of Toronto, Canada

In [8]:
geo_locator = Nominatim(user_agent='user-agent')
location = geo_locator.geocode('Toronto')
latitude = location.latitude
longitude = location.longitude
print(f"Coordinates of Toronto: {latitude}, {longitude}")

Coordinates of Toronto: 43.6534817, -79.3839347


Plotting the map of toronto and adding the required markers

In [9]:
toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, long, b, n in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    folium.CircleMarker(
        [lat, long],
        radius=4,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.5,
        popup = folium.Popup('{}: {}'.format(n, b), parse_html=True)
    ).add_to(toronto)

In [10]:
toronto

All Boroughs in Toronto

In [11]:
df.Borough.unique()

array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

Selecting Borough that contain the word <b>Toronto</b>

In [12]:
toronto_data = df[(df['Borough'] == 'Downtown Toronto') | (df['Borough'] == 'East Toronto') | (df['Borough'] == 'West Toronto') | (df['Borough'] == 'Central Toronto')]
toronto_data.reset_index(inplace=True, drop=True)
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


Display the map of Toronto Boroughs

In [13]:
toronto_map = folium.Map(location=[latitude, longitude])

for lat, long, b, n in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    #Different marker color for each borough
    if b == 'Downtown Toronto':
        col = 'red'
    if b == 'East Toronto':
        col = 'blue'
    if b == 'West Toronto':
        col = 'yellow'
    if b == 'Central Toronto':
        col = 'green'
    folium.CircleMarker(
        [lat, long],
        radius=4,
        popup=folium.Popup('{}: {}'.format(n, b), parse_html=True),
        color=col,
        fill=True,
        fill_color=col,
        fill_opacity=0.5
    ).add_to(toronto_map)
toronto_map

Foursquare API to explore the neighborhoods

In [40]:
# @hidden_cell
#API Credentials
login = pd.read_csv("Downloads/login.csv")
CLIENT_ID = login['CLIENT_ID'].values[0] # your Foursquare ID
CLIENT_SECRET = login['CLIENT_SECRET'].values[0] # your Foursquare Secret
VERSION = login['VERSION'].values[0] 

First Neighborhood in the 'toronto_data' DataFrame

In [15]:
n = toronto_data.loc[0, 'Neighborhood']
lat = toronto_data.loc[0, 'Latitude']
lng = toronto_data.loc[0, 'Longitude']
print(f"Neighborhood : {n}\nLatitude : {lat}\nLongitude: {lng}")

Neighborhood : Regent Park, Harbourfront
Latitude : 43.6542599
Longitude: -79.3606359


Get the top 100 venues that are within a radius of 500 meters from the first neighborhood

In [16]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    lat, 
    lng, 
    radius, 
    LIMIT)

In [17]:
res = requests.get(url).json()
res

{'meta': {'code': 200, 'requestId': '5efc9910f5be0232e51e24d0'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 46,
  'suggestedBounds': {'ne': {'lat': 43.6587599045, 'lng': -79.3544279001486},
   'sw': {'lat': 43.6497598955, 'lng': -79.36684389985142}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label': 'display',
 

In [18]:
#Get category of each Venue
def get_category(row):
    try:
        category = row['categories']
    except:
        category = row['venue.categories']
        
    if len(category) == 0:
        return None
    else:
        return category[0]['name']

Get top 100 nearby venues

In [19]:
venues = res['response']['groups'][0]['items']
nearby_venues = json_normalize(venues)
cols = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, cols]
nearby_venues['venue.categories'] = nearby_venues.apply(get_category, axis=1)
nearby_venues.rename(columns={'venue.name': 'Name', 'venue.categories': 'Category', 'venue.location.lat': 'Latitude', 'venue.location.lng': 'Longitude'}, inplace=True)
nearby_venues.head()

Unnamed: 0,Name,Category,Latitude,Longitude
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,Body Blitz Spa East,Spa,43.654735,-79.359874
4,Dominion Pub and Kitchen,Pub,43.656919,-79.358967


Get the top 100 venues that are within a radius of 500 meters(All Neighborhoods)

In [20]:
radius = 500
LIMIT = 100
venue_data = pd.DataFrame(columns=['Name', 'Category', 'Venue_Lat', 'Venue_Long', 'Borough', 'Neighborhood'])
for lat, lng, n, b in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood'], toronto_data['Borough']):
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    lat, 
    lng, 
    radius, 
    LIMIT)
    res = requests.get(url).json()
    venues = res['response']['groups'][0]['items']
    nearby_venues = json_normalize(venues)
    cols = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
    nearby_venues = nearby_venues.loc[:, cols]
    nearby_venues['venue.categories'] = nearby_venues.apply(get_category, axis=1)
    nearby_venues.rename(columns={'venue.name': 'Name', 'venue.categories': 'Category', 'venue.location.lat': 'Venue_Lat', 'venue.location.lng': 'Venue_Long'}, inplace=True)
    nearby_venues['Borough'] = b
    nearby_venues['Neighborhood'] = n
    venue_data = venue_data.append(nearby_venues, ignore_index=True)

In [21]:
venue_data.head()

Unnamed: 0,Name,Category,Venue_Lat,Venue_Long,Borough,Neighborhood
0,Roselle Desserts,Bakery,43.653447,-79.362017,Downtown Toronto,"Regent Park, Harbourfront"
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809,Downtown Toronto,"Regent Park, Harbourfront"
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008,Downtown Toronto,"Regent Park, Harbourfront"
3,Body Blitz Spa East,Spa,43.654735,-79.359874,Downtown Toronto,"Regent Park, Harbourfront"
4,Dominion Pub and Kitchen,Pub,43.656919,-79.358967,Downtown Toronto,"Regent Park, Harbourfront"


In [22]:
venue_data.shape

(1617, 6)

Number of venues for each Borough

In [23]:
venue_data.Borough.value_counts()

Downtown Toronto    1223
West Toronto         161
East Toronto         123
Central Toronto      110
Name: Borough, dtype: int64

Number of venues returned for each neighborhood

In [24]:
countdf = venue_data.groupby('Neighborhood', as_index=False).count()
countdf = countdf[['Neighborhood', 'Category']]
countdf

Unnamed: 0,Neighborhood,Category
0,Berczy Park,58
1,"Brockton, Parkdale Village, Exhibition Place",22
2,"Business reply mail Processing Centre, South C...",15
3,"CN Tower, King and Spadina, Railway Lands, Har...",15
4,Central Bay Street,66
5,Christie,18
6,Church and Wellesley,74
7,"Commerce Court, Victoria Hotel",100
8,Davisville,33
9,Davisville North,9


Number of unique categories returned

In [25]:
print(f"Unique Categories: {len(venue_data['Category'].unique())}")

Unique Categories: 234


Analyze Each Neighborhood

In [26]:
dummy = pd.get_dummies(venue_data[['Category']], prefix="", prefix_sep="")
dummy['0Neighborhood'] = venue_data['Neighborhood']
cols = ['0Neighborhood'] + list(dummy.columns[:-1])
dummy = dummy[cols]
dummy.head()

Unnamed: 0,0Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Group each neighborhood by taking the mean of the frequency of occurrence of each category

In [27]:
dummy_group = dummy.groupby('0Neighborhood').mean().reset_index()
dummy_group

Unnamed: 0,0Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.066667,0.066667,0.133333,0.133333,0.133333,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.015152,0.0,0.0,0.015152,0.0,0.0,0.015152
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.013514,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.027027
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
dummy_group.shape

(39, 235)

Top 5 most common venue of each Neighborhood

In [29]:
for n in dummy_group['0Neighborhood']:
    print(f"Neighborhood: {n}\n")
    temp = dummy_group[dummy_group['0Neighborhood'] == n].T.reset_index()
    temp.columns = ['venue', 'frequency']
    temp = temp.iloc[1:]
    temp['frequency'] = temp['frequency'].astype(float)
    temp = temp.round({'frequency': 2})
    print(temp.sort_values('frequency', ascending=False).reset_index(drop=True).head())

Neighborhood: Berczy Park

          venue  frequency
0   Coffee Shop       0.09
1  Cocktail Bar       0.05
2      Beer Bar       0.03
3   Cheese Shop       0.03
4          Café       0.03
Neighborhood: Brockton, Parkdale Village, Exhibition Place

                venue  frequency
0                Café       0.14
1      Breakfast Spot       0.09
2         Coffee Shop       0.09
3  Italian Restaurant       0.05
4             Stadium       0.05
Neighborhood: Business reply mail Processing Centre, South Central Letter Processing Plant Toronto

                  venue  frequency
0           Yoga Studio       0.07
1         Auto Workshop       0.07
2  Gym / Fitness Center       0.07
3    Light Rail Station       0.07
4         Garden Center       0.07
Neighborhood: CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport

              venue  frequency
0    Airport Lounge       0.13
1   Airport Service       0.13
2  Airport Terminal       0.

Display the top 10 venues for each neighborhood

In [30]:
# Returns the top 10 most common venues in a neighborhood
def get_common_venues(row, n):
    cat = row.iloc[1:]
    cat = cat.sort_values(ascending=False)
    return cat.index.values[0:n]


columns = ['0Neighborhood']
for i in range(10):
    columns.append('Venue {}'.format(i+1))
top_10 = pd.DataFrame(columns=columns)
top_10['0Neighborhood'] = dummy_group['0Neighborhood']
for i in np.arange(len(dummy_group)):
    top_10.iloc[i, 1:] = get_common_venues(dummy_group.iloc[i, :], 10)

In [31]:
top_10

Unnamed: 0,0Neighborhood,Venue 1,Venue 2,Venue 3,Venue 4,Venue 5,Venue 6,Venue 7,Venue 8,Venue 9,Venue 10
0,Berczy Park,Coffee Shop,Cocktail Bar,Cheese Shop,Seafood Restaurant,Bakery,Beer Bar,Café,Restaurant,Hotel,Diner
1,"Brockton, Parkdale Village, Exhibition Place",Café,Breakfast Spot,Coffee Shop,Gym,Climbing Gym,Convenience Store,Burrito Place,Stadium,Restaurant,Italian Restaurant
2,"Business reply mail Processing Centre, South C...",Yoga Studio,Auto Workshop,Garden Center,Garden,Light Rail Station,Fast Food Restaurant,Farmers Market,Comic Shop,Park,Pizza Place
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Airport Terminal,Harbor / Marina,Boat or Ferry,Rental Car Location,Coffee Shop,Plane,Bar,Sculpture Garden
4,Central Bay Street,Coffee Shop,Japanese Restaurant,Sandwich Place,Italian Restaurant,Café,Burger Joint,Salad Place,Bar,Department Store,Thai Restaurant
5,Christie,Grocery Store,Café,Park,Coffee Shop,Nightclub,Italian Restaurant,Restaurant,Baby Store,Candy Store,Athletics & Sports
6,Church and Wellesley,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Restaurant,Gay Bar,Yoga Studio,Men's Store,Hotel,Mediterranean Restaurant,Bubble Tea Shop
7,"Commerce Court, Victoria Hotel",Coffee Shop,Restaurant,Café,Hotel,American Restaurant,Gym,Seafood Restaurant,Japanese Restaurant,Italian Restaurant,Deli / Bodega
8,Davisville,Dessert Shop,Sandwich Place,Pizza Place,Italian Restaurant,Café,Sushi Restaurant,Coffee Shop,Gym,Farmers Market,Diner
9,Davisville North,Gym,Hotel,Gym / Fitness Center,Pizza Place,Department Store,Sandwich Place,Breakfast Spot,Food & Drink Shop,Park,General Entertainment


Clustering the neighborhoods into 5 clusters using kmeans

In [32]:
cluster = dummy_group.drop('0Neighborhood', 1)
kmeans = KMeans(n_clusters=10, random_state=0)
kmeans.fit(cluster)
kmeans.labels_[0:10]

array([3, 1, 8, 3, 3, 1, 3, 3, 3, 8])

Creating a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [33]:
top_10.insert(0, 'Cluster', kmeans.labels_)

new_df = toronto_data
new_df = toronto_data.merge(top_10, left_on='Neighborhood', right_on='0Neighborhood')
new_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster,0Neighborhood,Venue 1,Venue 2,Venue 3,Venue 4,Venue 5,Venue 6,Venue 7,Venue 8,Venue 9,Venue 10
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,3,"Regent Park, Harbourfront",Coffee Shop,Park,Pub,Bakery,Café,Breakfast Spot,Theater,Spa,Dessert Shop,Ice Cream Shop
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,3,"Queen's Park, Ontario Provincial Government",Coffee Shop,Sushi Restaurant,Diner,Park,Bar,Beer Bar,Smoothie Shop,Sandwich Place,Burrito Place,Café
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,3,"Garden District, Ryerson",Clothing Store,Coffee Shop,Cosmetics Shop,Bubble Tea Shop,Middle Eastern Restaurant,Café,Japanese Restaurant,Italian Restaurant,Bookstore,Ramen Restaurant
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,3,St. James Town,Coffee Shop,Café,Cocktail Bar,American Restaurant,Restaurant,Gastropub,Lingerie Store,Department Store,Hotel,Gym
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,7,The Beaches,Neighborhood,Health Food Store,Pub,Trail,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Department Store,Donut Shop


In [34]:
new_df.drop('0Neighborhood', axis=1, inplace=True)

In [35]:
new_df.tail()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster,Venue 1,Venue 2,Venue 3,Venue 4,Venue 5,Venue 6,Venue 7,Venue 8,Venue 9,Venue 10
34,M5W,Downtown Toronto,Stn A PO Boxes,43.646435,-79.374846,3,Coffee Shop,Café,Seafood Restaurant,Italian Restaurant,Cocktail Bar,Beer Bar,Hotel,Restaurant,Japanese Restaurant,Lounge
35,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675,3,Café,Coffee Shop,Italian Restaurant,Pizza Place,Restaurant,Market,Pub,Pet Store,Bakery,Chinese Restaurant
36,M5X,Downtown Toronto,"First Canadian Place, Underground city",43.648429,-79.38228,3,Coffee Shop,Café,Hotel,Restaurant,Gym,American Restaurant,Salad Place,Steakhouse,Seafood Restaurant,Japanese Restaurant
37,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,3,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Restaurant,Gay Bar,Yoga Studio,Men's Store,Hotel,Mediterranean Restaurant,Bubble Tea Shop
38,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,8,Yoga Studio,Auto Workshop,Garden Center,Garden,Light Rail Station,Fast Food Restaurant,Farmers Market,Comic Shop,Park,Pizza Place


Visualize the clusters

In [37]:
cluster_map = folium.Map(location=[latitude, longitude], zoom_start=13)

x = np.arange(10)
y = [i+x+(i*x)**2 for i in range(10)]
colors_array = cm.rainbow(np.linspace(0, 1, len(y)))
colors_ = [colors.rgb2hex(i) for i in colors_array]
marker_colors = []
for lat, long, n, c in zip(new_df['Latitude'], new_df['Longitude'], new_df['Neighborhood'], new_df['Cluster']):
    label = folium.Popup(n + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=label,
        color=colors_[c-1],
        fill=True,
        fill_color=colors_[c-1],
        fill_opacity=0.5
    ).add_to(cluster_map)
cluster_map

Examine few clusters

In [38]:
#Cluster 1
new_df[new_df['Cluster'] == 1].head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster,Venue 1,Venue 2,Venue 3,Venue 4,Venue 5,Venue 6,Venue 7,Venue 8,Venue 9,Venue 10
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564,1,Grocery Store,Café,Park,Coffee Shop,Nightclub,Italian Restaurant,Restaurant,Baby Store,Candy Store,Athletics & Sports
14,M6K,West Toronto,"Brockton, Parkdale Village, Exhibition Place",43.636847,-79.428191,1,Café,Breakfast Spot,Coffee Shop,Gym,Climbing Gym,Convenience Store,Burrito Place,Stadium,Restaurant,Italian Restaurant
17,M4M,East Toronto,Studio District,43.659526,-79.340923,1,Café,Coffee Shop,Brewery,Gastropub,Bakery,American Restaurant,Yoga Studio,Convenience Store,Sandwich Place,Cheese Shop
22,M6P,West Toronto,"High Park, The Junction South",43.661608,-79.464763,1,Café,Thai Restaurant,Mexican Restaurant,Arts & Crafts Store,Music Venue,Italian Restaurant,Diner,Bar,Fried Chicken Joint,Bakery
24,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,1,Sandwich Place,Café,Coffee Shop,Liquor Store,Pizza Place,Donut Shop,Burger Joint,Middle Eastern Restaurant,Indian Restaurant,Pub


In [39]:
#Cluster 5
new_df[new_df['Cluster']==5].head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster,Venue 1,Venue 2,Venue 3,Venue 4,Venue 5,Venue 6,Venue 7,Venue 8,Venue 9,Venue 10
33,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,5,Park,Playground,Trail,Yoga Studio,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
