In [1]:
import pandas as pd
import numpy as np
import json
import requests
from sklearn.cluster import KMeans
import folium

## Read in data from wikipedia using Pandas.read_html

In [2]:
# Import table using pandas

wiki_link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wiki_import = pd.read_html(wiki_link)
wiki_import

[    Postal Code           Borough  \
 0           M1A      Not assigned   
 1           M2A      Not assigned   
 2           M3A        North York   
 3           M4A        North York   
 4           M5A  Downtown Toronto   
 ..          ...               ...   
 175         M5Z      Not assigned   
 176         M6Z      Not assigned   
 177         M7Z      Not assigned   
 178         M8Z         Etobicoke   
 179         M9Z      Not assigned   
 
                                           Neighborhood  
 0                                         Not assigned  
 1                                         Not assigned  
 2                                            Parkwoods  
 3                                     Victoria Village  
 4                            Regent Park, Harbourfront  
 ..                                                 ...  
 175                                       Not assigned  
 176                                       Not assigned  
 177                

## Begin cleaning data

In [3]:
#select the correct table

df = wiki_import[0].iloc[1:]
df.head(), df.shape

(  Postal Code           Borough                      Neighborhood
 1         M2A      Not assigned                      Not assigned
 2         M3A        North York                         Parkwoods
 3         M4A        North York                  Victoria Village
 4         M5A  Downtown Toronto         Regent Park, Harbourfront
 5         M6A        North York  Lawrence Manor, Lawrence Heights,
 (179, 3))

In [4]:
#If a cell has a borough but neighborhood is 'Not assigned', reassign neighborhood to the borough (per assignment instructions0.

df['Neighborhood'] = df.apply(lambda x: x[1] if x[2] == 'Not assigned' else x[2], axis = 1)

# Mask dataframe to only select rows that have assigned boroughs
mask = df['Borough'] != 'Not assigned'
selected = df[mask].reset_index(drop = True)
selected.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
#print shape for verification

print(selected.shape)

(103, 3)


## Begin exploring neighborhoods

In [6]:
# Import latitude and longitude data
latlng = pd.read_csv('https://cocl.us/Geospatial_data') # could not get geocoder library to operate
latlng.shape

(103, 3)

In [7]:
#merge neighborhoods with lat/long data
full_df = selected.merge(latlng, left_on = 'Postal Code', right_on = 'Postal Code')
full_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [8]:
# used my Foursquare info to request data, deleted after the fact for privacy/security

client_id = 'xyz'
client_secret = 'xyz'
version = '20180605'

In [9]:
#make request to foursquare api and store results
results = []
for i, neighborhood in enumerate(full_df['Neighborhood']):
    nla = full_df.loc[i, 'Latitude']
    nlo = full_df.loc[i, 'Longitude']
    radius = 500
    limit = 100
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
    client_id,
    client_secret,
    nla,
    nlo,
    version,
    radius,
    limit,)
    results.append(requests.get(url).json())

results[0:5]

[{'meta': {'code': 200, 'requestId': '5f04eb4c6da43a2157f05e2d'},
   'headerLocation': 'Parkwoods - Donalda',
   'headerFullLocation': 'Parkwoods - Donalda, Toronto',
   'headerLocationGranularity': 'neighborhood',
   'totalResults': 3,
   'suggestedBounds': {'ne': {'lat': 43.757758604500005,
     'lng': -79.32343823984928},
    'sw': {'lat': 43.7487585955, 'lng': -79.33587476015072}},
   'groups': [{'type': 'Recommended Places',
     'name': 'recommended',
     'items': [{'reasons': {'count': 0,
        'items': [{'summary': 'This spot is popular',
          'type': 'general',
          'reasonName': 'globalInteractionReason'}]},
       'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
        'name': 'Brookbanks Park',
        'location': {'address': 'Toronto',
         'lat': 43.751976046055574,
         'lng': -79.33214044722958,
         'labeledLatLngs': [{'label': 'display',
           'lat': 43.751976046055574,
           'lng': -79.33214044722958}],
         'distance': 245,
       

In [10]:
#read json data into a dataframe by iterating through results
venues = pd.DataFrame()
for result in results:
    venues_info = result['response']['groups'][0]['items']
    venues_info = pd.json_normalize(venues_info)
    venues = venues.append(venues_info)

#drop venues with no postal code
venues = venues.dropna(subset = ['venue.location.postalCode'])

#select features needed from venues
venues = venues[['venue.name', 'venue.categories', 'venue.location.postalCode']]
venues.columns = ['Name', 'Categories', 'Postal_Code']

#clean postal code and venue categories
venues['Postal_Code'] = venues['Postal_Code'].apply(lambda x: x.split()[0])
venues['Categories'] = venues['Categories'].apply(lambda x: x[0]['name'])
venues.reset_index(drop = True, inplace = True)
venues.head()

Unnamed: 0,Name,Categories,Postal_Code
0,Corrosion Service Company Limited,Construction & Landscaping,L6C
1,Tim Hortons,Coffee Shop,M4A
2,Eglinton Ave E & Sloane Ave/Bermondsey Rd,Intersection,M4A
3,Cash Money,Financial or Legal Service,M4A
4,Roselle Desserts,Bakery,M5A


In [11]:
#merge venues with neighborhood information
new_df = full_df.merge(venues, left_on = 'Postal Code', right_on = 'Postal_Code').drop(['Postal_Code'], axis = 1)
new_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Name,Categories
0,M4A,North York,Victoria Village,43.725882,-79.315572,Tim Hortons,Coffee Shop
1,M4A,North York,Victoria Village,43.725882,-79.315572,Eglinton Ave E & Sloane Ave/Bermondsey Rd,Intersection
2,M4A,North York,Victoria Village,43.725882,-79.315572,Cash Money,Financial or Legal Service
3,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,Bakery
4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,Distribution Center


## Begin standardizing data for KMeans Clustering

In [12]:
#one hot encode for venue category
toronto_onehot = pd.get_dummies(new_df[['Categories']], prefix = '', prefix_sep = '')
toronto_onehot['Neighborhood'] = new_df['Neighborhood']
toronto_onehot

Unnamed: 0,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Neighborhood
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Victoria Village
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Victoria Village
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Victoria Village
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Regent Park, Harbourfront"
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Regent Park, Harbourfront"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1606,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Mimico NW, The Queensway West, South of Bloor,..."
1607,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Mimico NW, The Queensway West, South of Bloor,..."
1608,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Mimico NW, The Queensway West, South of Bloor,..."
1609,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Mimico NW, The Queensway West, South of Bloor,..."


In [13]:
#group by neighborhood and get average (percentage) of venues by category
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.000000,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,"Wexford, Maryvale",0.166667,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0
73,"Willowdale, Willowdale East",0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
74,"Willowdale, Willowdale West",0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
75,Woodbine Heights,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.2,0.000000,0.0,0.0,0.0,0.0,0.0,0.0


## Cluster by KMeans and view neighborhoods grouped by cluster label

In [14]:
kclusters = 5
toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

#fit algorithm
kmeans = KMeans(n_clusters = kclusters, random_state = 0).fit(toronto_grouped_clustering)
kmeans.labels_

array([3, 3, 2, 2, 2, 2, 2, 3, 2, 1, 3, 2, 2, 2, 3, 2, 2, 3, 3, 2, 3, 3,
       3, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 3, 3, 2, 2, 4, 2, 3, 2, 2, 3,
       3, 0, 3, 3, 2, 2, 3, 2, 2, 3, 3, 2, 3, 1, 3, 2, 2, 2, 2, 2, 2, 3,
       2, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3])

In [15]:
#function to view most common venues in a neighborhood
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [16]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

#add cluster labels
neighborhoods_venues_sorted.insert(0 ,'Label', kmeans.labels_)
neighborhoods_venues_sorted.head()

Unnamed: 0,Label,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,3,Agincourt,Latin American Restaurant,Breakfast Spot,Clothing Store,Yoga Studio,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
1,3,"Alderwood, Long Branch",Pizza Place,Coffee Shop,Pharmacy,Pub,Sandwich Place,Cupcake Shop,Eastern European Restaurant,Donut Shop,Doner Restaurant,Distribution Center
2,2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Ice Cream Shop,Deli / Bodega,Supermarket,Mobile Phone Shop,Pizza Place,Pharmacy,Gas Station,Sandwich Place
3,2,Bayview Village,Bank,Chinese Restaurant,Café,Yoga Studio,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
4,2,"Bedford Park, Lawrence Manor East",Coffee Shop,Restaurant,Greek Restaurant,Thai Restaurant,Pharmacy,Pub,Café,Butcher,Sandwich Place,Liquor Store


In [17]:
#finally join all information
toronto_final = full_df.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on = 'Neighborhood').reset_index(drop = True)
toronto_final.dropna(inplace = True)
toronto_final.reset_index(drop=True, inplace = True)

toronto_final

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4A,North York,Victoria Village,43.725882,-79.315572,2.0,Financial or Legal Service,Coffee Shop,Intersection,Yoga Studio,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
1,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636,2.0,Coffee Shop,Bakery,Pub,Café,Breakfast Spot,Bistro,Restaurant,Spa,Shoe Store,Event Space
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,2.0,Clothing Store,Boutique,Event Space,Coffee Shop,Department Store,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
3,M3B,North York,Don Mills,43.745906,-79.352188,2.0,Gym,Restaurant,Beer Store,Japanese Restaurant,Grocery Store,Discount Store,Supermarket,Chinese Restaurant,Sporting Goods Shop,Art Gallery
4,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,3.0,Gym / Fitness Center,Pizza Place,Bank,Athletics & Sports,Gastropub,Fast Food Restaurant,Pharmacy,Comfort Food Restaurant,Event Space,Ethiopian Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484,3.0,Pizza Place,Coffee Shop,Pharmacy,Pub,Sandwich Place,Cupcake Shop,Eastern European Restaurant,Donut Shop,Doner Restaurant,Distribution Center
77,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675,2.0,Coffee Shop,Restaurant,Pizza Place,Bakery,Caribbean Restaurant,Bank,Liquor Store,Café,Beer Store,Hot Dog Joint
78,M5X,Downtown Toronto,"First Canadian Place, Underground city",43.648429,-79.382280,2.0,Coffee Shop,Café,Bookstore,Gym / Fitness Center,American Restaurant,Burger Joint,Bakery,Gluten-free Restaurant,Salad Place,Cupcake Shop
79,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160,2.0,Coffee Shop,Sushi Restaurant,Restaurant,Gay Bar,Burrito Place,Distribution Center,Beer Bar,Japanese Restaurant,Creperie,Arts & Crafts Store


## View neighborhoods grouped by cluster label

## Cluster 0

In [18]:
toronto_final.loc[toronto_final['Label'] == 0, toronto_final.columns[[2] + list(range(6, toronto_final.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
67,"Moore Park, Summerhill East",Fried Chicken Joint,Yoga Studio,Farmers Market,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Distribution Center


## Cluster 1

In [19]:
toronto_final.loc[toronto_final['Label'] == 1, toronto_final.columns[[2] + list(range(6, toronto_final.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Caledonia-Fairbanks,Women's Store,Fast Food Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Distribution Center
24,Scarborough Village,Women's Store,Fast Food Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Distribution Center


## Cluster 2

In [20]:
toronto_final.loc[toronto_final['Label'] == 2, toronto_final.columns[[2] + list(range(6, toronto_final.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Victoria Village,Financial or Legal Service,Coffee Shop,Intersection,Yoga Studio,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
1,"Regent Park, Harbourfront",Coffee Shop,Bakery,Pub,Café,Breakfast Spot,Bistro,Restaurant,Spa,Shoe Store,Event Space
2,"Lawrence Manor, Lawrence Heights",Clothing Store,Boutique,Event Space,Coffee Shop,Department Store,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
3,Don Mills,Gym,Restaurant,Beer Store,Japanese Restaurant,Grocery Store,Discount Store,Supermarket,Chinese Restaurant,Sporting Goods Shop,Art Gallery
5,"Garden District, Ryerson",Clothing Store,Coffee Shop,Cosmetics Shop,Furniture / Home Store,Café,Bookstore,Burger Joint,Department Store,Fast Food Restaurant,Italian Restaurant
7,Don Mills,Gym,Restaurant,Beer Store,Japanese Restaurant,Grocery Store,Discount Store,Supermarket,Chinese Restaurant,Sporting Goods Shop,Art Gallery
9,St. James Town,Italian Restaurant,Restaurant,Gastropub,Café,Gym,Salon / Barbershop,Japanese Restaurant,Department Store,Beer Bar,Coffee Shop
10,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",Café,Liquor Store,Coffee Shop,Pet Store,Pharmacy,Pizza Place,Home Service,Beer Store,Distribution Center,Dessert Shop
13,Berczy Park,Coffee Shop,Seafood Restaurant,Café,Cocktail Bar,Breakfast Spot,French Restaurant,Hotel,Restaurant,Irish Pub,Beer Bar
15,Leaside,Coffee Shop,Burger Joint,Pet Store,Bank,Restaurant,Liquor Store,Bike Shop,Beer Store,Bagel Shop,Sporting Goods Shop


## Cluster 3

In [21]:
toronto_final.loc[toronto_final['Label'] == 3, toronto_final.columns[[2] + list(range(6, toronto_final.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,"Parkview Hill, Woodbine Gardens",Gym / Fitness Center,Pizza Place,Bank,Athletics & Sports,Gastropub,Fast Food Restaurant,Pharmacy,Comfort Food Restaurant,Event Space,Ethiopian Restaurant
6,Glencairn,Pizza Place,Metro Station,Pub,Yoga Studio,Deli / Bodega,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant
8,Woodbine Heights,Park,Athletics & Sports,Pharmacy,Beer Store,Video Store,Doner Restaurant,Dessert Shop,Diner,Discount Store,Distribution Center
11,"Guildwood, Morningside, West Hill",Rental Car Location,Restaurant,Electronics Store,Seafood Restaurant,Intersection,Bank,Breakfast Spot,Convenience Store,Convention Center,Ethiopian Restaurant
12,The Beaches,Yoga Studio,Health Food Store,Pub,Deli / Bodega,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant
18,Cedarbrae,Hakka Restaurant,Gas Station,Bank,Fried Chicken Joint,Athletics & Sports,Thai Restaurant,Convenience Store,Dessert Shop,Event Space,Ethiopian Restaurant
19,Hillcrest Village,Fast Food Restaurant,Restaurant,Colombian Restaurant,Comfort Food Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant
21,Thorncliffe Park,Yoga Studio,Bank,Gym,Gym / Fitness Center,Gas Station,Fast Food Restaurant,Discount Store,Middle Eastern Restaurant,Pharmacy,Pizza Place
23,"Dufferin, Dovercourt Village",Pharmacy,Bakery,Grocery Store,Supermarket,Bar,Bank,Brewery,Fried Chicken Joint,Pizza Place,Middle Eastern Restaurant
31,Downsview,Grocery Store,Business Service,Bank,Athletics & Sports,Discount Store,Liquor Store,Yoga Studio,Diner,Falafel Restaurant,Event Space


## Cluster 4

In [22]:
toronto_final.loc[toronto_final['Label'] == 4, toronto_final.columns[[2] + list(range(6, toronto_final.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
61,"Kingsview Village, St. Phillips, Martin Grove ...",Sandwich Place,Yoga Studio,Farmers Market,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Distribution Center


## Map neighborhoods, color code by cluster

In [25]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[43.654260,-79.360636], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_final['Latitude'], toronto_final['Longitude'], toronto_final['Neighborhood'], toronto_final['Label'].astype('int')):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters