# Segmentation and Clustering assignment

In [349]:
import pandas
import bs4
import urllib.request
import numpy as np
import pandas as pd
import requests
import json
import matplotlib.cm as cm
import matplotlib.colors as colors
from pandas.io.json import json_normalize

Used beautifulSoup package for achieving a table from web

In [33]:
def scrape_table(url):
    pg = urllib.request.urlopen(url).read()
    Beautiful = bs4.BeautifulSoup(pg)
    table = Beautiful.find("table")
    header = [head.find_all(text=True)[0].strip() for head in table.find_all("th")]
    data = [[datas2.find_all(text=True)[0].strip() for datas2 in datas1.find_all("td")] for datas1 in table.find_all("tr")]
    dataframe = pandas.DataFrame(data, columns = header)
    return dataframe

In [34]:
Toronto_Postal = scrape_table('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
Toronto_Postal.drop(0,axis=0, inplace = True)
Toronto_Postal.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


Drop rows with 'Not assigned' value

In [35]:
Toronto_Postal_Code = Toronto_Postal[Toronto_Postal['Borough'] != 'Not assigned']
Toronto_Postal_Code.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Downtown Toronto,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern
14,M3B,North York,Don Mills North


Concatenate values with same Postcode, Borough and rearranged them.

In [36]:
Toronto = Toronto_Postal_Code.groupby(['Postcode','Borough'])['Neighbourhood'].apply(', ' .join).reset_index()
Toronto.sort_values(['Postcode'])

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


In [37]:
Toronto.shape

(103, 3)

Data preprocessing is done

### Getting latitude, longitude merged into the original dataframe

In [42]:
import geocoder
lat_lng_coords = None
latitude = []
longitude = []
for i in range(Toronto.shape[0]):
    address = '{}, Toronto, Ontario'.format(Toronto.at[i, 'Postcode'])
    g = geocoder.arcgis(address)
    latitude.append(g.latlng[0])
    longitude.append(g.latlng[1])

In [43]:
Latitude = pd.DataFrame(latitude, columns = ['Latitude'])
Longitude = pd.DataFrame(longitude, columns = ['Longitude'])

In [44]:
Toronto.shape

(103, 3)

In [376]:
Toronto_merged = Toronto.join(Latitude).join(Longitude)
Toronto_merged

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.785665,-79.158725
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.765815,-79.175193
3,M1G,Scarborough,Woburn,43.768369,-79.217590
4,M1H,Scarborough,Cedarbrae,43.769688,-79.239440
...,...,...,...,...,...
98,M9N,York,Weston,43.704845,-79.517546
99,M9P,Etobicoke,Westmount,43.696505,-79.530252
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.686810,-79.557284
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.743145,-79.584664


### Excludes rows not including 'Toronto' in 'Borough' column

In [123]:
num = np.zeros(Toronto_merged.shape[0])
Template = pandas.Series(Template)

In [125]:
for i in range(Toronto_merged['Borough'].shape[0]):
    if "Toronto" in Toronto_merged['Borough'][i]:
        Template[i] = True
    else:
        Template[i] = False


In [129]:
Toronto_merged = Toronto_merged[Template]

### Data mapping

In [130]:
from geopy.geocoders import Nominatim
address = 'Toronto'
geolocator = Nominatim(user_agent = 'To_explorer')
location = geolocator.geocode(address)

In [131]:
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto is {}, {}'.format(latitude, longitude))

The geograpical coordinate of Toronto is 43.653963, -79.387207


In [132]:
import folium
map_Toronto = folium.Map(location = [latitude, longitude], zoom_start=12)

In [133]:
for lat, lng, borough, neighborhood in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Borough'], Toronto_merged['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='red',
        fill=False,
        parse_html=False
    ).add_to(map_Toronto)

In [135]:
map_Toronto

## Explore venues in Toronto using Foursquare API

I will search top 100 venues that are in the first neighborhood in my dataframe

In [138]:
CLIENT_ID = '2OFQNQUE1KNXITHBRLENLT0JNBBZUSWISREFXMRWYTS1Q5AW' # your Foursquare ID
CLIENT_SECRET = '1G5XGH3HF0C2U4KLKBFAEEH0IZUAVBYOLPENKIKQ0IDOQA2P' # your Foursquare Secret
VERSION = '20200228' # Foursquare API version

In [149]:
Toronto_merged.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676531,-79.295425
41,M4K,East Toronto,"The Danforth West, Riverdale",43.683178,-79.355105
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.667965,-79.314667
43,M4M,East Toronto,Studio District,43.660629,-79.334855
44,M4N,Central Toronto,Lawrence Park,43.72842,-79.387133


In [157]:
neighborhood_latitude = Toronto_merged.iloc[0, 3]
neighborhood_longitude = Toronto_merged.iloc[0, 4]
neighborhood_name = Toronto_merged.iloc[0, 2]
print('Latitude and longitude values of {} are {}, {}'.format(neighborhood_name,
                                                             neighborhood_latitude,
                                                             neighborhood_longitude))

Latitude and longitude values of The Beaches are 43.67653121600006, -79.29542499999997


In [190]:
LIMIT = 10
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    Limit)

In [194]:
requests.get(url).json()['response']['groups'][0]['items']

[{'reasons': {'count': 0,
   'items': [{'summary': 'This spot is popular',
     'type': 'general',
     'reasonName': 'globalInteractionReason'}]},
  'venue': {'id': '4bd461bc77b29c74a07d9282',
   'name': 'Glen Manor Ravine',
   'location': {'address': 'Glen Manor',
    'crossStreet': 'Queen St.',
    'lat': 43.67682094413784,
    'lng': -79.29394208780985,
    'labeledLatLngs': [{'label': 'display',
      'lat': 43.67682094413784,
      'lng': -79.29394208780985}],
    'distance': 123,
    'cc': 'CA',
    'city': 'Toronto',
    'state': 'ON',
    'country': 'Canada',
    'formattedAddress': ['Glen Manor (Queen St.)', 'Toronto ON', 'Canada']},
   'categories': [{'id': '4bf58dd8d48988d159941735',
     'name': 'Trail',
     'pluralName': 'Trails',
     'shortName': 'Trail',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/hikingtrail_',
      'suffix': '.png'},
     'primary': True}],
   'photos': {'count': 0, 'groups': []}},
  'referralId': 'e-0-4bd461bc77b

In [195]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [192]:
# type your answer here

Toronto_venues = getNearbyVenues(names=Toronto_merged['Neighbourhood'],
                                   latitudes=Toronto_merged['Latitude'],
                                   longitudes=Toronto_merged['Longitude']
                                  )



The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The Junction Sout

## Analyzing Each Neighborhood

In [202]:
Toronto_venues[['Venue Category']]

Unnamed: 0,Venue Category
0,Trail
1,Health Food Store
2,Pub
3,Pizza Place
4,Neighborhood
...,...
313,Pizza Place
314,Café
315,Burrito Place
316,Brazilian Restaurant


In [203]:
#One hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']])
Toronto_onehot['Neighborhood'] = Toronto_venues['Neighborhood']
Toronto_onehot

Unnamed: 0,Venue Category_American Restaurant,Venue Category_Art Gallery,Venue Category_Asian Restaurant,Venue Category_BBQ Joint,Venue Category_Baby Store,Venue Category_Bakery,Venue Category_Bank,Venue Category_Bar,Venue Category_Beer Bar,Venue Category_Beer Store,...,Venue Category_Tech Startup,Venue Category_Tennis Court,Venue Category_Thai Restaurant,Venue Category_Theater,Venue Category_Theme Restaurant,Venue Category_Trail,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Vietnamese Restaurant,Venue Category_Yoga Studio,Neighborhood
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,The Beaches
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,The Beaches
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,The Beaches
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,The Beaches
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,The Beaches
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
313,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Business Reply Mail Processing Centre 969 Eastern
314,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Business Reply Mail Processing Centre 969 Eastern
315,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Business Reply Mail Processing Centre 969 Eastern
316,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Business Reply Mail Processing Centre 969 Eastern


In [217]:
#Move 'Neighborhood' column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

In [306]:
Toronto_grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()

In [281]:
#Print each neighborhood along with the top 5 most common values
num_top_venues = 5
for hood in Toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighborhood']==hood].T.reset_index()
    temp.columns = ['Neigh', 'freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq' : 2})
    temp = temp.sort_values('freq', ascending = False).reset_index(drop=True)
    print(temp.head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
                                 Neigh  freq
0   Venue Category_American Restaurant   0.1
1  Venue Category_Gym / Fitness Center   0.1
2    Venue Category_Seafood Restaurant   0.1
3            Venue Category_Restaurant   0.1
4            Venue Category_Steakhouse   0.1


----Berczy Park----
                                          Neigh  freq
0              Venue Category_French Restaurant   0.1
1                   Venue Category_Liquor Store   0.1
2                     Venue Category_Restaurant   0.1
3  Venue Category_Vegetarian / Vegan Restaurant   0.1
4                           Venue Category_Park   0.1


----Brockton, Exhibition Place, Parkdale Village----
                                          Neigh  freq
0         Venue Category_Furniture / Home Store   0.2
1                    Venue Category_Coffee Shop   0.2
2                 Venue Category_Breakfast Spot   0.1
3  Venue Category_Vegetarian / Vegan Restaurant   0.1
4             Venue Catego

### Put the datas into pandas dataframe

In [282]:
#Function returning sorted datas
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

In [294]:
Toronto_grouped.iloc[1,:]

Neighborhood                                    Berczy Park
Venue Category_American Restaurant                        0
Venue Category_Art Gallery                                0
Venue Category_Asian Restaurant                           0
Venue Category_BBQ Joint                                  0
                                                   ...     
Venue Category_Theme Restaurant                           0
Venue Category_Trail                                      0
Venue Category_Vegetarian / Vegan Restaurant            0.1
Venue Category_Vietnamese Restaurant                      0
Venue Category_Yoga Studio                                0
Name: 1, Length: 119, dtype: object

In [295]:
return_most_common_venues(Toronto_grouped.iloc[1,:], num_top_venues).shape

(10,)

In [297]:
Toronto_grouped.shape

(38, 119)

In [360]:
#Create new dataframe displaying top 10 venues
num_top_venues = 10
indicators = ['st', 'nd', 'rd']
columns = ['Neighborhood']

for i in np.arange(num_top_venues):
    try:
        columns.append('{}{} most frequent venue'.format((i+1), indicators[i]))
    except:
        columns.append('{}th most frequent venue'.format(i))

top_10 = pd.DataFrame(index = range(Toronto_grouped.shape[0]), columns = columns)
top_10.Neighborhood = Toronto_grouped.Neighborhood

for i in np.arange(Toronto_grouped.shape[0]):
    top_10.iloc[i,1:] = return_most_common_venues(Toronto_grouped.iloc[i,:], num_top_venues)

top_10

Unnamed: 0,Neighborhood,1st most frequent venue,2nd most frequent venue,3rd most frequent venue,3th most frequent venue,4th most frequent venue,5th most frequent venue,6th most frequent venue,7th most frequent venue,8th most frequent venue,9th most frequent venue
0,"Adelaide, King, Richmond",Venue Category_American Restaurant,Venue Category_Gym / Fitness Center,Venue Category_Seafood Restaurant,Venue Category_Steakhouse,Venue Category_Café,Venue Category_Bakery,Venue Category_Restaurant,Venue Category_Coffee Shop,Venue Category_Greek Restaurant,Venue Category_Vegetarian / Vegan Restaurant
1,Berczy Park,Venue Category_Park,Venue Category_Museum,Venue Category_Concert Hall,Venue Category_French Restaurant,Venue Category_Liquor Store,Venue Category_Tea Room,Venue Category_Restaurant,Venue Category_Thai Restaurant,Venue Category_Cocktail Bar,Venue Category_Vegetarian / Vegan Restaurant
2,"Brockton, Exhibition Place, Parkdale Village",Venue Category_Coffee Shop,Venue Category_Furniture / Home Store,Venue Category_Gym,Venue Category_Bakery,Venue Category_Breakfast Spot,Venue Category_Italian Restaurant,Venue Category_Seafood Restaurant,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Gastropub,Venue Category_Fast Food Restaurant
3,Business Reply Mail Processing Centre 969 Eastern,Venue Category_Restaurant,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Concert Hall,Venue Category_Burrito Place,Venue Category_Mediterranean Restaurant,Venue Category_Brazilian Restaurant,Venue Category_Hotel,Venue Category_Pizza Place,Venue Category_Speakeasy,Venue Category_Café
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Venue Category_Yoga Studio,Venue Category_Speakeasy,Venue Category_Caribbean Restaurant,Venue Category_Café,Venue Category_Ramen Restaurant,Venue Category_Men's Store,Venue Category_Restaurant,Venue Category_Market,Venue Category_Park,Venue Category_Hotel
5,"Cabbagetown, St. James Town",Venue Category_Café,Venue Category_Indian Restaurant,Venue Category_Japanese Restaurant,Venue Category_Farm,Venue Category_Diner,Venue Category_Bakery,Venue Category_Jewelry Store,Venue Category_Restaurant,Venue Category_Italian Restaurant,Venue Category_Farmers Market
6,Central Bay Street,Venue Category_Coffee Shop,Venue Category_Sushi Restaurant,Venue Category_Neighborhood,Venue Category_Modern European Restaurant,Venue Category_Miscellaneous Shop,Venue Category_Poke Place,Venue Category_Ramen Restaurant,Venue Category_Bubble Tea Shop,Venue Category_Seafood Restaurant,Venue Category_Gastropub
7,"Chinatown, Grange Park, Kensington Market",Venue Category_Vietnamese Restaurant,Venue Category_Café,Venue Category_Organic Grocery,Venue Category_Noodle House,Venue Category_Mexican Restaurant,Venue Category_Cocktail Bar,Venue Category_Gaming Cafe,Venue Category_Record Shop,Venue Category_Yoga Studio,Venue Category_Discount Store
8,Christie,Venue Category_Café,Venue Category_Grocery Store,Venue Category_Baby Store,Venue Category_Coffee Shop,Venue Category_Playground,Venue Category_Candy Store,Venue Category_Italian Restaurant,Venue Category_Yoga Studio,Venue Category_Dog Run,Venue Category_Farm
9,Church and Wellesley,Venue Category_Juice Bar,Venue Category_Pizza Place,Venue Category_Salon / Barbershop,Venue Category_Breakfast Spot,Venue Category_Steakhouse,Venue Category_Bookstore,Venue Category_Burger Joint,Venue Category_Restaurant,Venue Category_Theme Restaurant,Venue Category_Park


## Cluster Neighborhoods

In [327]:
from sklearn.cluster import KMeans
kclusters = 5
Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhood', axis=1)
kmeans = KMeans(n_clusters = kclusters, random_state = 0)
kmeans.fit(Toronto_grouped_clustering)
kmeans.labels_
top_10.insert(0, 'Cluster labels', kmeans.labels_)

array([3, 3, 1, 3, 3, 3, 1, 3, 3, 3, 3, 0, 0, 1, 1, 0, 3, 4, 3, 1, 0, 0,
       2, 0, 0, 0, 1, 1, 0, 3, 3, 1, 3, 1, 3, 0, 0, 0], dtype=int32)

In [391]:
Toronto_merged_final = Toronto_merged.rename(columns = {'Neighbourhood':'Neighborhood'}).join(top_10.set_index('Neighborhood'), on='Neighborhood')

In [409]:
Toronto_merged_final.dropna(axis=0, inplace=True)

In [413]:
Toronto_merged_final.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster labels,1st most frequent venue,2nd most frequent venue,3rd most frequent venue,3th most frequent venue,4th most frequent venue,5th most frequent venue,6th most frequent venue,7th most frequent venue,8th most frequent venue,9th most frequent venue
37,M4E,East Toronto,The Beaches,43.676531,-79.295425,0.0,Venue Category_Pub,Venue Category_Trail,Venue Category_Health Food Store,Venue Category_Pizza Place,Venue Category_Neighborhood,Venue Category_Yoga Studio,Venue Category_Eastern European Restaurant,Venue Category_Dog Run,Venue Category_Discount Store,Venue Category_Diner
41,M4K,East Toronto,"The Danforth West, Riverdale",43.683178,-79.355105,0.0,Venue Category_Park,Venue Category_Grocery Store,Venue Category_Ice Cream Shop,Venue Category_Discount Store,Venue Category_Bus Line,Venue Category_Yoga Studio,Venue Category_Fish & Chips Shop,Venue Category_Creperie,Venue Category_Department Store,Venue Category_Dessert Shop
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.667965,-79.314667,0.0,Venue Category_Pet Store,Venue Category_Sushi Restaurant,Venue Category_Ice Cream Shop,Venue Category_Italian Restaurant,Venue Category_Fish & Chips Shop,Venue Category_Fast Food Restaurant,Venue Category_Liquor Store,Venue Category_Park,Venue Category_Pub,Venue Category_Gym
43,M4M,East Toronto,Studio District,43.660629,-79.334855,1.0,Venue Category_Coffee Shop,Venue Category_Boutique,Venue Category_Neighborhood,Venue Category_Vietnamese Restaurant,Venue Category_Latin American Restaurant,Venue Category_Bakery,Venue Category_Pizza Place,Venue Category_Sushi Restaurant,Venue Category_Italian Restaurant,Venue Category_Yoga Studio
44,M4N,Central Toronto,Lawrence Park,43.72842,-79.387133,2.0,Venue Category_Swim School,Venue Category_Bus Line,Venue Category_Yoga Studio,Venue Category_Fish & Chips Shop,Venue Category_Cosmetics Shop,Venue Category_Creperie,Venue Category_Department Store,Venue Category_Dessert Shop,Venue Category_Diner,Venue Category_Discount Store


## Visualize it

In [415]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged_final['Latitude'], Toronto_merged_final['Longitude'], Toronto_merged_final['Neighborhood'], Toronto_merged_final['Cluster labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color = rainbow[int(cluster)],
        fill=True,
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters