### Import Libraries for the Project.

In [2]:
import numpy as np
import pandas as pd
import json
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!pip install folium
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


### Steps to retrieve and clean data:
1. Copy and paste table from Wiki page into a spreadsheet.
2. Used -Find/Replace- to clean the data...
3. Find and Replace "Not assigned" with " "
4. Find and Replace " / " with ", "
5. Save file as toronto.csv

### Make data available on Google Drive
1. Upload toronto.csv to Google Drive
2. Open file.
3. File->Publish to Web option creates a link to access the file.
4. Copy link address for retrieveal in the next step.

### Retrieve toronto.csv from Google Drive

In [3]:
toronto= 'https://docs.google.com/spreadsheets/d/e/2PACX-1vRPizJaGpeJBdVVkNqFBsS8QhfNWMuMU1FbICFZ12PHyvZ7qp4IxS3xB4E9f3M4fs92MuaC_87d1LF3/pub?gid=681690534&single=true&output=csv'

### Read toronto.csv as a pandas dataframe.

In [4]:
toronto_data = pd.read_csv(toronto)
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Clean up NaN rows.

In [5]:
toronto_data = toronto_data.dropna()
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Print the number of rows in the dataframe.

In [6]:
toronto_data.shape

(103, 3)

### Retrieve PostalCode data.

In [7]:
!wget -q -O 'toronto_codes.csv' https://cocl.us/Geospatial_data
print('Data downloaded!')

Data downloaded!


### Read the postal code data into a dataframe.

In [8]:
toronto_codes = 'toronto_codes.csv'
toronto_codes = pd.read_csv(toronto_codes)
toronto_codes.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merge data from the two tables.

In [9]:
toronto_merge=pd.merge(toronto_data,toronto_codes,on='Postal Code')
toronto_merge.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### Follow the steps from the New York City lab.

#### Use geopy library to get the latitude and longitude values of Toronto.

In [10]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


#### Create a map of Toronto with neighborhoods superimposed on top.

In [11]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for Latitude, Longitude, Borough, Neighborhood in zip(toronto_merge['Latitude'], toronto_merge['Longitude'], toronto_merge['Borough'], toronto_merge['Neighborhood']):
    label = '{}, {}'.format(Neighborhood, Borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [Latitude, Longitude],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [12]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20200416' # Foursquare API version

### Explore the neighborhoods in Toronto.

#### First test it out on one neighborhood.

#### Get the latitude and longitude.

In [13]:
toronto_merge.loc[0, 'Neighborhood']
neighborhood_latitude = toronto_merge.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = toronto_merge.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = toronto_merge.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


#### Create the foursquare API request URL

In [None]:
LIMIT = 300 # limit of number of venues returned by Foursquare API
radius = 1000 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

#### Set "result" variable to the information returned from foursquare.

In [15]:
results = requests.get(url).json()

####  Creates a function that extracts the category of the venue.

In [16]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#### Create the venues list.

In [17]:
venues = results['response']['groups'][0]['items']

#### Filter the data in the venues list.

In [18]:
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Allwyn's Bakery,Caribbean Restaurant,43.75984,-79.324719
1,Brookbanks Park,Park,43.751976,-79.33214
2,Tim Hortons,Café,43.760668,-79.326368
3,A&W,Fast Food Restaurant,43.760643,-79.326865
4,Bruno's valu-mart,Grocery Store,43.746143,-79.32463


### Now run the search on all of Toronto.

#### Create the function that searches all the neighborhoods.

In [19]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Run the function

In [20]:
toronto_venues = getNearbyVenues(names=toronto_merge['Neighborhood'],
                                   latitudes=toronto_merge['Latitude'],
                                   longitudes=toronto_merge['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview
The Danforth West, Ri

#### Initial display to verify data.

In [21]:
print(toronto_venues.shape)
toronto_venues.head()

(2154, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant


#### Total venues returned for each neighborhood sorted from most to fewest venues.

In [22]:
venue_count=toronto_venues.groupby('Neighborhood').count()
venue_count=venue_count[['Venue']]
venue_count=venue_count.sort_values(by=['Venue'],ascending=False)
venue_count=venue_count.rename(columns={'Venue':'Total Venues'})
venue_count.head()

Unnamed: 0_level_0,Total Venues
Neighborhood,Unnamed: 1_level_1
"Toronto Dominion Centre, Design Exchange",100
"Harbourfront East, Union Station, Toronto Islands",100
"First Canadian Place, Underground city",100
"Commerce Court, Victoria Hotel",100
"Garden District, Ryerson",100


In [23]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue'].unique())))

There are 1411 uniques categories.


#### The top 20 neighborhoods by total venues.

In [24]:
top_20=venue_count[0:20]
top_20

Unnamed: 0_level_0,Total Venues
Neighborhood,Unnamed: 1_level_1
"Toronto Dominion Centre, Design Exchange",100
"Harbourfront East, Union Station, Toronto Islands",100
"First Canadian Place, Underground city",100
"Commerce Court, Victoria Hotel",100
"Garden District, Ryerson",100
"Richmond, Adelaide, King",97
Stn A PO Boxes,95
St. James Town,85
Church and Wellesley,77
"Fairview, Henry Farm, Oriole",66


### Analyze the top 20 neighborhoods.

In [25]:
top_20=pd.merge(top_20,toronto_venues,on='Neighborhood')
top_20=top_20[['Neighborhood','Neighborhood Latitude','Neighborhood Longitude','Venue','Venue Latitude','Venue Longitude','Venue Category']]
top_20.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576,Equinox Bay Street,43.6481,-79.379989,Gym
1,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576,Canoe,43.647452,-79.38132,Restaurant
2,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576,Walrus Pub & Beer Hall,43.647375,-79.379515,Pub
3,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576,Pilot Coffee Roasters,43.648835,-79.380936,Coffee Shop
4,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576,The Fairmont Royal York,43.645449,-79.381508,Hotel


In [26]:
top_20.shape

(1405, 7)

### Prepare the data for K-Means Testing

In [28]:
# one hot encoding
toronto_onehot = pd.get_dummies(top_20[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot.insert(0,'Neighborhoods',top_20['Neighborhood'])
# move neighborhood column to the first column
#fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
#toronto_onehot = toronto_onehot[fixed_columns]

In [29]:
toronto_onehot.shape

(1405, 205)

In [30]:
toronto_grouped = toronto_onehot.groupby('Neighborhoods').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhoods,American Restaurant,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.017857,0.0,0.0,0.017857,0.017857,0.035714,...,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0
1,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.015625,0.0,0.0,0.015625,0.0,0.0,0.015625
2,Church and Wellesley,0.012987,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025974
3,"Commerce Court, Victoria Hotel",0.04,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0
4,"Fairview, Henry Farm, Oriole",0.015152,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.030303,...,0.015152,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.015152,0.0
5,"First Canadian Place, Underground city",0.03,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.01,...,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0
6,"Garden District, Ryerson",0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,...,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0
7,"Harbourfront East, Union Station, Toronto Islands",0.0,0.0,0.05,0.01,0.0,0.0,0.0,0.0,0.01,...,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0
8,"Kensington Market, Chinatown, Grange Park",0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.015385,...,0.0,0.0,0.0,0.046154,0.0,0.046154,0.015385,0.0,0.0,0.0
9,"Little Portugal, Trinity",0.0,0.0,0.0,0.023256,0.0,0.046512,0.0,0.0,0.0,...,0.0,0.0,0.0,0.046512,0.0,0.023256,0.023256,0.0,0.0,0.023256


In [31]:
toronto_grouped.shape

(20, 205)

In [32]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhoods']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhoods'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
         venue  freq
0  Coffee Shop  0.05
1   Restaurant  0.04
2       Bakery  0.04
3         Café  0.04
4  Cheese Shop  0.04


----Central Bay Street----
                 venue  freq
0          Coffee Shop  0.19
1   Italian Restaurant  0.06
2       Sandwich Place  0.06
3                 Café  0.06
4  Japanese Restaurant  0.03


----Church and Wellesley----
                 venue  freq
0          Coffee Shop  0.06
1              Gay Bar  0.05
2  Japanese Restaurant  0.05
3     Sushi Restaurant  0.04
4           Restaurant  0.04


----Commerce Court, Victoria Hotel----
         venue  freq
0  Coffee Shop  0.10
1         Café  0.07
2   Restaurant  0.07
3        Hotel  0.06
4          Gym  0.04


----Fairview, Henry Farm, Oriole----
                  venue  freq
0        Clothing Store  0.15
1           Coffee Shop  0.08
2  Fast Food Restaurant  0.06
3        Cosmetics Shop  0.05
4   Japanese Restaurant  0.03


----First Canadian Place, Underground city----
         ve

In [34]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Return a table of the Top 10 Venues in each Neighborhood.

In [35]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhoods']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhoods'] = toronto_grouped['Neighborhoods']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Seafood Restaurant,Café,Cheese Shop,Restaurant,Cocktail Bar,Italian Restaurant,Beer Bar,Bakery,Farmers Market
1,Central Bay Street,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Bubble Tea Shop,Japanese Restaurant,Ice Cream Shop,Sushi Restaurant,Burger Joint,Salad Place
2,Church and Wellesley,Coffee Shop,Gay Bar,Japanese Restaurant,Restaurant,Sushi Restaurant,Men's Store,Gastropub,Dance Studio,Mediterranean Restaurant,Yoga Studio
3,"Commerce Court, Victoria Hotel",Coffee Shop,Café,Restaurant,Hotel,American Restaurant,Gym,Seafood Restaurant,Deli / Bodega,Japanese Restaurant,Italian Restaurant
4,"Fairview, Henry Farm, Oriole",Clothing Store,Coffee Shop,Fast Food Restaurant,Cosmetics Shop,Bakery,Food Court,Japanese Restaurant,Bus Station,Tea Room,Restaurant
5,"First Canadian Place, Underground city",Coffee Shop,Café,Restaurant,Hotel,American Restaurant,Bar,Gym,Japanese Restaurant,Seafood Restaurant,Steakhouse
6,"Garden District, Ryerson",Coffee Shop,Clothing Store,Café,Bubble Tea Shop,Cosmetics Shop,Italian Restaurant,Japanese Restaurant,Middle Eastern Restaurant,Tea Room,Ramen Restaurant
7,"Harbourfront East, Union Station, Toronto Islands",Coffee Shop,Aquarium,Café,Italian Restaurant,Restaurant,Hotel,Brewery,Sporting Goods Shop,Scenic Lookout,Fried Chicken Joint
8,"Kensington Market, Chinatown, Grange Park",Café,Coffee Shop,Mexican Restaurant,Bar,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Burger Joint,Chinese Restaurant,Dessert Shop,Dumpling Restaurant
9,"Little Portugal, Trinity",Bar,Restaurant,Coffee Shop,Vegetarian / Vegan Restaurant,Asian Restaurant,Men's Store,Café,Brewery,Italian Restaurant,Japanese Restaurant


### Cluster the Top 20 Neighborhods by venue similarity.

In [36]:
# set number of clusters
kclusters = 5

toronto_clusters = toronto_grouped.drop('Neighborhoods', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_clusters)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 1, 2, 1, 2, 1, 2, 1, 4, 4], dtype=int32)

In [37]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [44]:
toronto_merged = toronto_merge

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhoods'), on='Neighborhood')
toronto_merged=toronto_merged.dropna()
toronto_merged=toronto_merged.astype({'Cluster Labels':'int32'})
toronto_merged # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,Coffee Shop,Park,Pub,Bakery,Café,Mexican Restaurant,Theater,Breakfast Spot,Yoga Studio,Shoe Store
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,2,Coffee Shop,Clothing Store,Café,Bubble Tea Shop,Cosmetics Shop,Italian Restaurant,Japanese Restaurant,Middle Eastern Restaurant,Tea Room,Ramen Restaurant
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,2,Café,Coffee Shop,Cocktail Bar,American Restaurant,Hotel,Restaurant,Beer Bar,Department Store,Seafood Restaurant,Park
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,2,Coffee Shop,Seafood Restaurant,Café,Cheese Shop,Restaurant,Cocktail Bar,Italian Restaurant,Beer Bar,Bakery,Farmers Market
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,1,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Bubble Tea Shop,Japanese Restaurant,Ice Cream Shop,Sushi Restaurant,Burger Joint,Salad Place
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,1,Coffee Shop,Café,Restaurant,Gym,Deli / Bodega,Thai Restaurant,Hotel,Sushi Restaurant,Salad Place,Clothing Store
33,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,2,Clothing Store,Coffee Shop,Fast Food Restaurant,Cosmetics Shop,Bakery,Food Court,Japanese Restaurant,Bus Station,Tea Room,Restaurant
36,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,1,Coffee Shop,Aquarium,Café,Italian Restaurant,Restaurant,Hotel,Brewery,Sporting Goods Shop,Scenic Lookout,Fried Chicken Joint
37,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975,4,Bar,Restaurant,Coffee Shop,Vegetarian / Vegan Restaurant,Asian Restaurant,Men's Store,Café,Brewery,Italian Restaurant,Japanese Restaurant
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Restaurant,Furniture / Home Store,Bookstore,Ice Cream Shop,Pub,Pizza Place,Lounge


In [45]:
toronto_merged.shape

(21, 16)

In [51]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)                             
                             
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.vector_layers.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color='green',
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters