In [1]:
# The code was removed by Watson Studio for sharing.

# Part 1: Scraping and Preping Data

In [2]:
#importing pandas library
import pandas as pd


In [3]:
# Use Pandas to read the Wikipedia source page and convert its table to a dataframe
tables = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
tables[0].columns = tables[0].iloc[0]
df = tables[0]

# Dataframe will consist of 3 columns: PostalCode, Borough, and Neighborhood
df.columns = ['PostalCode', 'Borough', 'Neighborhood']
df = df.iloc[1:]

# Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
df = df[df.Borough != "Not assigned"].reset_index(drop=True)
df


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


The table on the wikipedia page has be changed to have neighborhoods under the same postal code grouped together. This removed the need to combine like postal codes. Addtionally, the table lacks any borough without an nieghborhood. 

In [4]:
# Use the .shape method to print the number of rows of your dataframe.
df.shape


(103, 3)

# Part 2: Adding Latitude and Longitude Coordinates

In [5]:
# Import Coordinates from CSV provided
coord = pd.read_csv('http://cocl.us/Geospatial_data')

# Merge tables using 'PostalCode'
coord.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)
df_wCoord = pd.merge(df, coord, on = 'PostalCode')
df_wCoord


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


# Part 3: Explore the Data

### Import useful libraries

In [6]:
#Import useful libraries 
import numpy as np # library to handle data in a vectorized manner

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

### Addtional Code to install Folium


In [7]:
Folium = project.get_file('folium-0.11.0.tar.gz')

 # Install and import the library
!pip install Folium 
import folium # map rendering library




### Create a map of Toronto with markers for Neighborhoods

In [8]:
address = 'Toronto'

location = Nominatim().geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

  app.launch_new_instance()


The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [9]:
for col in df.columns: 
    print(col) 

PostalCode
Borough
Neighborhood


In [10]:
# create map of Toronto, with markers, using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, borough, neighborhood in zip(df_wCoord['Latitude'], df_wCoord['Longitude'], df_wCoord['Borough'], df_wCoord['Neighborhood']):
    label = '{}, {}'.format('Neighborhood', 'Borough')
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Filter Boroughs containing 'Toronto'

In [11]:
# filter borough names that contain the word Toronto
borough_names = list(df_wCoord.Borough.unique())
borough_wToronto = []

for x in borough_names:
    if "toronto" in x.lower():
        borough_wToronto.append(x)
        
borough_wToronto

['Downtown Toronto', 'East Toronto', 'West Toronto', 'Central Toronto']

In [12]:
# table with Boroughs containing 'York'
borough_wToronto = df_wCoord[df_wCoord['Borough'].isin(borough_wToronto)].reset_index(drop=True)
borough_wToronto


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [13]:
map_toronto2 = folium.Map(location=[latitude, longitude], zoom_start=9)

for lat, lng, label in zip(borough_wToronto['Latitude'], borough_wToronto['Longitude'], borough_wToronto['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto2)  
    
    
map_toronto2

### Foursquare Connection

In [14]:
CLIENT_ID = '4CUDCLWWIYCCU44Y5ZGMTN0EXZKRJD4GN03LAO4LSJBSTTKI' 
CLIENT_SECRET = 'MNCFQVZANUEKUY51VVTLAGFO2RREGXLZQB1XJTWVQV5ISUQU'
VERSION = '20180605' 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 4CUDCLWWIYCCU44Y5ZGMTN0EXZKRJD4GN03LAO4LSJBSTTKI
CLIENT_SECRET:MNCFQVZANUEKUY51VVTLAGFO2RREGXLZQB1XJTWVQV5ISUQU


### Creating a dataframe From Fousquare data

In [15]:
#Limit to Top 100 places within 500 meters
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(borough_wToronto['Latitude'], borough_wToronto['Longitude'], borough_wToronto['PostalCode'], borough_wToronto['Borough'], borough_wToronto['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [16]:
# Converting retrived information into a dataframe
venues_toronto = pd.DataFrame(venues)

# column names
venues_toronto.columns = ['PostalCode', 'Borough', 'Neighborhood','Name', 'Latitude', 'Longitude', 'Category']

print(venues_toronto.shape)
venues_toronto.head()

(1617, 7)


Unnamed: 0,PostalCode,Borough,Neighborhood,Name,Latitude,Longitude,Category
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",Roselle Desserts,43.653447,-79.362017,Bakery
1,M5A,Downtown Toronto,"Regent Park, Harbourfront",Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,M5A,Downtown Toronto,"Regent Park, Harbourfront",Body Blitz Spa East,43.654735,-79.359874,Spa
4,M5A,Downtown Toronto,"Regent Park, Harbourfront",Dominion Pub and Kitchen,43.656919,-79.358967,Pub


In [17]:
# Number of different category by 'Neighborhood'
print('There are {} uniques categories.'.format(len(venues_toronto['Category'].unique())))
venues_toronto['Category'].unique()

There are 234 uniques categories.


array(['Bakery', 'Coffee Shop', 'Distribution Center', 'Spa', 'Pub',
       'Park', 'Restaurant', 'Breakfast Spot', 'Gym / Fitness Center',
       'Historic Site', 'Farmers Market', 'Performing Arts Venue',
       'Chocolate Shop', 'Dessert Shop', 'French Restaurant', 'Café',
       'Yoga Studio', 'Theater', 'Event Space', 'Shoe Store',
       'Ice Cream Shop', 'Art Gallery', 'Cosmetics Shop',
       'Asian Restaurant', 'Electronics Store', 'Bank', 'Beer Store',
       'Health Food Store', 'Wine Shop', 'Antique Shop',
       'Italian Restaurant', 'Sushi Restaurant', 'Creperie', 'Beer Bar',
       'Arts & Crafts Store', 'Burrito Place', 'Mexican Restaurant',
       'Hobby Shop', 'Diner', 'Fried Chicken Joint', 'Discount Store',
       'Smoothie Shop', 'Sandwich Place', 'Gym', 'Bar',
       'College Auditorium', 'Comic Shop', 'Clothing Store', 'Tea Room',
       'Plaza', 'Music Venue', 'Pizza Place', 'Thai Restaurant',
       'Ramen Restaurant', 'College Rec Center', 'Sporting Goods Shop

In [18]:
print('There are {} uniques categories.'.format(len(venues_toronto['PostalCode'].unique())))
venues_toronto['PostalCode'].unique()

print(venues_toronto.shape)

There are 39 uniques categories.
(1617, 7)


### Analyze each area

In [19]:
# one hot encoding
toronto_onehot = pd.get_dummies(venues_toronto[['Category']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column 
toronto_onehot['PostalCode'] = venues_toronto['PostalCode'] 
toronto_onehot['Borough'] = venues_toronto['Borough'] 
toronto_onehot['Neighborhood'] = venues_toronto['Neighborhood'] 

# move postal, borough and neighborhood column
columns = list(toronto_onehot.columns[-2:]) + list(toronto_onehot.columns[:-2])
toronto_onehot = toronto_onehot[columns]

toronto_onehot


Unnamed: 0,PostalCode,Borough,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,M5A,Downtown Toronto,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M5A,Downtown Toronto,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M5A,Downtown Toronto,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M5A,Downtown Toronto,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M5A,Downtown Toronto,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,M5A,Downtown Toronto,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,M5A,Downtown Toronto,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,M5A,Downtown Toronto,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,M5A,Downtown Toronto,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,M5A,Downtown Toronto,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
# grouped by the following and take the mean of the frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby(["PostalCode", "Borough", "Neighborhood"]).mean().reset_index()

print(toronto_grouped.shape)
toronto_grouped


(39, 236)


Unnamed: 0,PostalCode,Borough,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,M4E,East Toronto,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M4K,East Toronto,"The Danforth West, Riverdale",0.0,0.0,0.0,0.0,0.0,0.0,0.023256,...,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256
2,M4L,East Toronto,"India Bazaar, The Beaches West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,East Toronto,Studio District,0.0,0.0,0.0,0.0,0.0,0.0,0.05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.025
4,M4N,Central Toronto,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M4P,Central Toronto,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M4R,Central Toronto,"North Toronto West, Lawrence Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556
7,M4S,Central Toronto,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M4T,Central Toronto,"Moore Park, Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",0.0,0.0,0.0,0.0,0.0,0.0,0.055556,...,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0


### Finding the top 10 kinds of venues and putting it into a dataframe

In [21]:
#each neighborhood along with the top 5 most common venues
    
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[4:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')
    

----The Beaches----
                      venue  freq
0         Health Food Store  0.25
1                       Pub  0.25
2                     Trail  0.25
3   New American Restaurant  0.00
4  Mediterranean Restaurant  0.00


----The Danforth West, Riverdale----
                    venue  freq
0        Greek Restaurant  0.19
1             Coffee Shop  0.07
2      Italian Restaurant  0.07
3          Ice Cream Shop  0.05
4  Furniture / Home Store  0.05


----India Bazaar, The Beaches West----
                  venue  freq
0  Fast Food Restaurant  0.10
1        Sandwich Place  0.10
2             Pet Store  0.05
3            Restaurant  0.05
4     Food & Drink Shop  0.05


----Studio District----
                 venue  freq
0                 Café  0.10
1          Coffee Shop  0.08
2              Brewery  0.05
3  American Restaurant  0.05
4            Gastropub  0.05


----Lawrence Park----
                venue  freq
0  Photography Studio  0.25
1                Park  0.25
2            Bus

In [24]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[4:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


In [28]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

print(neighborhoods_venues_sorted.shape)


(39, 11)


In [29]:
neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,The Beaches,Health Food Store,Trail,Pub,Yoga Studio,Dessert Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
1,"The Danforth West, Riverdale",Greek Restaurant,Italian Restaurant,Coffee Shop,Restaurant,Furniture / Home Store,Ice Cream Shop,Bookstore,Yoga Studio,Brewery,Japanese Restaurant
2,"India Bazaar, The Beaches West",Sandwich Place,Fast Food Restaurant,Pet Store,Pub,Brewery,Liquor Store,Board Shop,Fish & Chips Shop,Restaurant,Italian Restaurant
3,Studio District,Café,Coffee Shop,Brewery,Gastropub,Bakery,American Restaurant,Convenience Store,Seafood Restaurant,Sandwich Place,Cheese Shop
4,Lawrence Park,Photography Studio,Park,Bus Line,Swim School,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop


### Cluster

In [55]:
# number of clusters
kclusters = 6

toronto_clustering = toronto_grouped.drop(['Neighborhood', 'Borough', 'PostalCode'], 1)

# k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_clustering)

# check cluster labels 
kmeans.labels_

array([1, 1, 1, 1, 3, 0, 1, 1, 5, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [62]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
toronto_merged = borough_wToronto.copy()

# add clustering labels
toronto_merged["Cluster Labels"] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index("Neighborhood"), on="Neighborhood")

print(toronto_merged.shape)
toronto_merged


(39, 16)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1,Coffee Shop,Park,Pub,Bakery,Café,Breakfast Spot,Theater,Beer Store,Bank,Dessert Shop
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1,Coffee Shop,Sushi Restaurant,Diner,Park,Bar,Beer Bar,Smoothie Shop,Burrito Place,Sandwich Place,Café
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1,Clothing Store,Coffee Shop,Bubble Tea Shop,Middle Eastern Restaurant,Cosmetics Shop,Café,Japanese Restaurant,Lingerie Store,Fast Food Restaurant,Bakery
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1,Café,Coffee Shop,Gastropub,Restaurant,Cocktail Bar,American Restaurant,Italian Restaurant,Clothing Store,Moroccan Restaurant,Cosmetics Shop
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,3,Health Food Store,Trail,Pub,Yoga Studio,Dessert Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Cocktail Bar,Bakery,Restaurant,Beer Bar,Cheese Shop,Seafood Restaurant,Café,Farmers Market,Juice Bar
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,1,Coffee Shop,Sandwich Place,Italian Restaurant,Japanese Restaurant,Café,Salad Place,Burger Joint,Bubble Tea Shop,Bar,Department Store
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564,1,Grocery Store,Café,Park,Coffee Shop,Nightclub,Candy Store,Italian Restaurant,Diner,Restaurant,Athletics & Sports
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,5,Coffee Shop,Restaurant,Café,Gym,Deli / Bodega,Hotel,Thai Restaurant,Bookstore,Pizza Place,Concert Hall
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,1,Bakery,Pharmacy,Grocery Store,Pet Store,Music Venue,Middle Eastern Restaurant,Café,Brewery,Brazilian Restaurant,Supermarket


In [61]:
# new map
map_c = folium.Map(location=[latitude, longitude], zoom_start=12)

#color scheme
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_c)
       
map_c



### Cluster Examination

#### Cluster 1

In [66]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Downtown Toronto,0,Coffee Shop,Cocktail Bar,Bakery,Restaurant,Beer Bar,Cheese Shop,Seafood Restaurant,Café,Farmers Market,Juice Bar


#### Cluster 2

In [67]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,1,Coffee Shop,Park,Pub,Bakery,Café,Breakfast Spot,Theater,Beer Store,Bank,Dessert Shop
1,Downtown Toronto,1,Coffee Shop,Sushi Restaurant,Diner,Park,Bar,Beer Bar,Smoothie Shop,Burrito Place,Sandwich Place,Café
2,Downtown Toronto,1,Clothing Store,Coffee Shop,Bubble Tea Shop,Middle Eastern Restaurant,Cosmetics Shop,Café,Japanese Restaurant,Lingerie Store,Fast Food Restaurant,Bakery
3,Downtown Toronto,1,Café,Coffee Shop,Gastropub,Restaurant,Cocktail Bar,American Restaurant,Italian Restaurant,Clothing Store,Moroccan Restaurant,Cosmetics Shop
6,Downtown Toronto,1,Coffee Shop,Sandwich Place,Italian Restaurant,Japanese Restaurant,Café,Salad Place,Burger Joint,Bubble Tea Shop,Bar,Department Store
7,Downtown Toronto,1,Grocery Store,Café,Park,Coffee Shop,Nightclub,Candy Store,Italian Restaurant,Diner,Restaurant,Athletics & Sports
9,West Toronto,1,Bakery,Pharmacy,Grocery Store,Pet Store,Music Venue,Middle Eastern Restaurant,Café,Brewery,Brazilian Restaurant,Supermarket
11,West Toronto,1,Bar,Asian Restaurant,Café,Restaurant,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Men's Store,Yoga Studio,Record Shop,Pizza Place
12,East Toronto,1,Greek Restaurant,Italian Restaurant,Coffee Shop,Restaurant,Furniture / Home Store,Ice Cream Shop,Bookstore,Yoga Studio,Brewery,Japanese Restaurant
13,Downtown Toronto,1,Coffee Shop,Hotel,Café,Restaurant,Italian Restaurant,Seafood Restaurant,Salad Place,Japanese Restaurant,American Restaurant,Sporting Goods Shop


#### Cluster 3


In [69]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Downtown Toronto,2,Coffee Shop,Aquarium,Café,Hotel,Scenic Lookout,Fried Chicken Joint,Restaurant,Brewery,Sporting Goods Shop,Italian Restaurant
23,Central Toronto,2,Coffee Shop,Clothing Store,Sporting Goods Shop,Fast Food Restaurant,Mexican Restaurant,Diner,Park,Chinese Restaurant,Rental Car Location,Restaurant


#### Cluster 4


In [70]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,East Toronto,3,Health Food Store,Trail,Pub,Yoga Studio,Dessert Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant


#### Cluster 5


In [72]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,West Toronto,4,Café,Mexican Restaurant,Thai Restaurant,Bakery,Cajun / Creole Restaurant,Italian Restaurant,Discount Store,Fried Chicken Joint,Bar,Diner


#### Cluster 6


In [73]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 5, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Downtown Toronto,5,Coffee Shop,Restaurant,Café,Gym,Deli / Bodega,Hotel,Thai Restaurant,Bookstore,Pizza Place,Concert Hall
