# This is the Notebook for the capstone project of the IBM Professional Data Science Certificate

## Part 1

In [1]:
import pandas as pd
import numpy as np


In [2]:
print("Hello Capstone Project Course!")

Hello Capstone Project Course!


## Part 2

In [3]:
from geopy.geocoders import Nominatim
import folium
import requests
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import os

#### Scraping postal codes data from Wikipedia 

In [4]:
# Load postal code data into a dataframe
URL = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
codes = pd.read_html(URL, attrs={'class':'wikitable'})
codes = codes[0]
codes.columns = ['Postcode', 'Borough', 'Neighborhood']
codes.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


#### Load the longitude and latitude data

In [5]:
lat_long = pd.read_csv('http://cocl.us/Geospatial_data')
lat_long.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Merge the postal codes dataframe and the latitude and longitude dataframes

In [6]:
Toronto = codes.merge(lat_long, how='inner', left_on='Postcode',right_on='Postal Code')
Toronto.drop(columns=['Postal Code'], inplace=True)
Toronto.head()


Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Heights,43.718518,-79.464763
4,M6A,North York,Lawrence Manor,43.718518,-79.464763


#### Explore and cluster the neighborhoods in Toronto.

##### Selected Toronto Neighborhood 

In [7]:
mask = Toronto.Borough.str.contains('Toronto', case=False)

TOR = Toronto.loc[mask,:].reset_index(drop=True)
TOR.head()


Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
1,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
2,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937
3,M5B,Downtown Toronto,Garden District,43.657162,-79.378937
4,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418


In [8]:
boroughs = TOR.Borough.unique().tolist()
boroughs

['Downtown Toronto', 'East Toronto', 'West Toronto', 'Central Toronto']

In [9]:
address = 'Toronto, ON'
geolocator = Nominatim(user_agent="TO_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [10]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(TOR['Latitude'], TOR['Longitude'], TOR['Borough'], TOR['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto
   

<folium.features.CircleMarker at 0x2470e19f390>

<folium.features.CircleMarker at 0x2470df5e278>

<folium.features.CircleMarker at 0x2470dc6bb70>

<folium.features.CircleMarker at 0x2470e54fc88>

<folium.features.CircleMarker at 0x2470df5e3c8>

<folium.features.CircleMarker at 0x2470e55a6a0>

<folium.features.CircleMarker at 0x2470e54fef0>

<folium.features.CircleMarker at 0x2470e54f978>

<folium.features.CircleMarker at 0x2470e54dd68>

<folium.features.CircleMarker at 0x2470e55aac8>

<folium.features.CircleMarker at 0x2470e54db00>

<folium.features.CircleMarker at 0x2470e54dd30>

<folium.features.CircleMarker at 0x2470e57c908>

<folium.features.CircleMarker at 0x2470e54d860>

<folium.features.CircleMarker at 0x2470e55a940>

<folium.features.CircleMarker at 0x2470e54d198>

<folium.features.CircleMarker at 0x2470e54f7f0>

<folium.features.CircleMarker at 0x2470e55af60>

<folium.features.CircleMarker at 0x2470e55ac18>

<folium.features.CircleMarker at 0x2470e57ca90>

<folium.features.CircleMarker at 0x2470e597eb8>

<folium.features.CircleMarker at 0x2470e57c6d8>

<folium.features.CircleMarker at 0x2470e5972e8>

<folium.features.CircleMarker at 0x2470e5b7e48>

<folium.features.CircleMarker at 0x2470e57ca20>

<folium.features.CircleMarker at 0x2470e5b7470>

<folium.features.CircleMarker at 0x2470e597a20>

<folium.features.CircleMarker at 0x2470e57ccf8>

<folium.features.CircleMarker at 0x2470e57c320>

<folium.features.CircleMarker at 0x2470e5b7128>

<folium.features.CircleMarker at 0x2470e5b71d0>

<folium.features.CircleMarker at 0x2470e57c7f0>

<folium.features.CircleMarker at 0x2470e597f60>

<folium.features.CircleMarker at 0x2470e5b7400>

<folium.features.CircleMarker at 0x2470e5ac5f8>

<folium.features.CircleMarker at 0x2470e5b7898>

<folium.features.CircleMarker at 0x2470e5b7c50>

<folium.features.CircleMarker at 0x2470e5b70f0>

<folium.features.CircleMarker at 0x2470e57cc18>

<folium.features.CircleMarker at 0x2470e5ac780>

<folium.features.CircleMarker at 0x2470e5b0eb8>

<folium.features.CircleMarker at 0x2470e607a58>

<folium.features.CircleMarker at 0x2470e5b7e10>

<folium.features.CircleMarker at 0x2470e607240>

<folium.features.CircleMarker at 0x2470e5b00b8>

<folium.features.CircleMarker at 0x2470e5ac320>

<folium.features.CircleMarker at 0x2470e607e48>

<folium.features.CircleMarker at 0x2470e5b0f28>

<folium.features.CircleMarker at 0x2470e5ac828>

<folium.features.CircleMarker at 0x2470e5b0ac8>

<folium.features.CircleMarker at 0x2470e5b01d0>

<folium.features.CircleMarker at 0x2470e607ef0>

<folium.features.CircleMarker at 0x2470e615e80>

<folium.features.CircleMarker at 0x2470e5b0400>

<folium.features.CircleMarker at 0x2470e597b00>

<folium.features.CircleMarker at 0x2470e6075c0>

<folium.features.CircleMarker at 0x2470e607eb8>

<folium.features.CircleMarker at 0x2470e615908>

<folium.features.CircleMarker at 0x2470e615240>

<folium.features.CircleMarker at 0x2470e607748>

<folium.features.CircleMarker at 0x2470e6155f8>

<folium.features.CircleMarker at 0x2470e5e9ac8>

<folium.features.CircleMarker at 0x2470e615860>

<folium.features.CircleMarker at 0x2470e6429e8>

<folium.features.CircleMarker at 0x2470e5e9390>

<folium.features.CircleMarker at 0x2470e6157f0>

<folium.features.CircleMarker at 0x2470e5e9240>

<folium.features.CircleMarker at 0x2470e642080>

<folium.features.CircleMarker at 0x2470e5e9550>

<folium.features.CircleMarker at 0x2470e5e9d68>

<folium.features.CircleMarker at 0x2470e642c88>

<folium.features.CircleMarker at 0x2470e65b978>

<folium.features.CircleMarker at 0x2470e642ac8>

<folium.features.CircleMarker at 0x2470e65beb8>

Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.

##### Define Foursquare credentials


In [14]:
CLIENT_ID = os.environ.get('FOURSQUARE_CLIENT_ID') # your Foursquare ID
CLIENT_SECRET = os.environ.get('FOURSQUARE_CLIENT_SECRET')  # your Foursquare Secret
VERSION = os.environ.get('FOURSQUARE_VERSION') # Foursquare API version

##### Let's explore the first neighborhood in our dataframe.¶

In [12]:
TOR.loc[0,'Neighborhood']

'Harbourfront'

##### Now, let's get the top 100 venues that are in Marble Hill within a radius of 500 meters.¶

In [33]:
# type your answer here
search_query = TOR.loc[0,'Neighborhood']
radius = 500
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)



###### Send the get request and exaine the results

In [34]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e528bd802a172001b82b0e1'},
 'response': {'venues': [{'id': '4dfe6d6462848aca58a2c71d',
    'name': 'Harbourfront International Marketplace',
    'location': {'address': 'Queens Quay',
     'lat': 43.65457941726759,
     'lng': -79.38929312785676,
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.65457941726759,
       'lng': -79.38929312785676}],
     'distance': 181,
     'cc': 'CA',
     'city': 'Toronto',
     'state': 'ON',
     'country': 'Canada',
     'formattedAddress': ['Queens Quay', 'Toronto ON', 'Canada']},
    'categories': [{'id': '50be8ee891d4fa8dcc7199a7',
      'name': 'Market',
      'pluralName': 'Markets',
      'shortName': 'Market',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/market_',
       'suffix': '.png'},
      'primary': True}],
    'referralId': 'v-1582468455',
    'hasPerk': False}]}}

In [17]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

##### Now write the code to run the above function on each neighborhood and create a new dataframe called *toronto_venues*.

In [18]:
toronto_venues = getNearbyVenues(names=TOR['Neighborhood'],
                                   latitudes=TOR['Latitude'],
                                   longitudes=TOR['Longitude']
                                  )

Harbourfront
Queen's Park
Ryerson
Garden District
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Adelaide
King
Richmond
Dovercourt Village
Dufferin
Harbourfront East
Toronto Islands
Union Station
Little Portugal
Trinity
The Danforth West
Riverdale
Design Exchange
Toronto Dominion Centre
Brockton
Exhibition Place
Parkdale Village
The Beaches West
India Bazaar
Commerce Court
Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North
Forest Hill West
High Park
The Junction South
North Toronto West
The Annex
North Midtown
Yorkville
Parkdale
Roncesvalles
Davisville
Harbord
University of Toronto
Runnymede
Swansea
Moore Park
Summerhill East
Chinatown
Grange Park
Kensington Market
Deer Park
Forest Hill SE
Rathnelly
South Hill
Summerhill West
CN Tower
Bathurst Quay
Island airport
Harbourfront West
King and Spadina
Railway Lands
South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown
St. James Town
First Canadian Place
Underground city

In [19]:
print(toronto_venues.shape)

(3237, 7)


In [20]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,100,100,100,100,100,100
Bathurst Quay,17,17,17,17,17,17
Berczy Park,56,56,56,56,56,56
Brockton,22,22,22,22,22,22
Business Reply Mail Processing Centre 969 Eastern,15,15,15,15,15,15
...,...,...,...,...,...,...
Underground city,100,100,100,100,100,100
Union Station,100,100,100,100,100,100
University of Toronto,36,36,36,36,36,36
Victoria Hotel,100,100,100,100,100,100


In [21]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 240 uniques categories.


#### Let's analyze each neighbourhood

In [22]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


##### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category¶

In [23]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,...,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Adelaide,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.020000,0.0,0.00,0.000000,0.0,0.02,...,0.0,0.040000,0.010000,0.0,0.0,0.0,0.0,0.00,0.020000,0.000000,0.0,0.01,0.0,0.0,0.01
1,Bathurst Quay,0.000000,0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.000000,0.0,0.00,0.000000,0.0,0.00,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.00,0.000000,0.000000,0.0,0.00,0.0,0.0,0.00
2,Berczy Park,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.00,0.017857,0.0,0.00,...,0.0,0.017857,0.000000,0.0,0.0,0.0,0.0,0.00,0.017857,0.000000,0.0,0.00,0.0,0.0,0.00
3,Brockton,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.00,0.000000,0.0,0.00,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.00,0.000000,0.000000,0.0,0.00,0.0,0.0,0.00
4,Business Reply Mail Processing Centre 969 Eastern,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.00,0.000000,0.0,0.00,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.00,0.000000,0.000000,0.0,0.00,0.0,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,Underground city,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.030000,0.0,0.00,0.010000,0.0,0.03,...,0.0,0.020000,0.010000,0.0,0.0,0.0,0.0,0.01,0.010000,0.000000,0.0,0.01,0.0,0.0,0.00
69,Union Station,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.05,0.010000,0.0,0.00,...,0.0,0.000000,0.010000,0.0,0.0,0.0,0.0,0.01,0.010000,0.000000,0.0,0.01,0.0,0.0,0.00
70,University of Toronto,0.027778,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.00,0.000000,0.0,0.00,...,0.0,0.000000,0.027778,0.0,0.0,0.0,0.0,0.00,0.000000,0.027778,0.0,0.00,0.0,0.0,0.00
71,Victoria Hotel,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.030000,0.0,0.00,0.010000,0.0,0.01,...,0.0,0.020000,0.000000,0.0,0.0,0.0,0.0,0.00,0.020000,0.000000,0.0,0.01,0.0,0.0,0.00


##### Let's put that into a pandas dataframe¶

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [24]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [25]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Thai Restaurant,Bar,Burger Joint,Steakhouse,Bakery,Sushi Restaurant,Cosmetics Shop,Restaurant
1,Bathurst Quay,Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Boat or Ferry,Coffee Shop,Rental Car Location,Sculpture Garden,Boutique,Plane
2,Berczy Park,Coffee Shop,Farmers Market,Cocktail Bar,Café,French Restaurant,Cheese Shop,Seafood Restaurant,Bakery,Steakhouse,Beer Bar
3,Brockton,Café,Coffee Shop,Breakfast Spot,Furniture / Home Store,Italian Restaurant,Stadium,Bar,Intersection,Bakery,Climbing Gym
4,Business Reply Mail Processing Centre 969 Eastern,Pizza Place,Auto Workshop,Comic Shop,Restaurant,Butcher,Burrito Place,Brewery,Skate Park,Spa,Farmers Market


### Cluster Neighborhoods

In [26]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 1, 2, 2, 2, 1, 2, 2, 2, 2])

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [27]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = TOR

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,2,Coffee Shop,Pub,Park,Bakery,Theater,Café,Breakfast Spot,Restaurant,Mexican Restaurant,Ice Cream Shop
1,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494,2,Coffee Shop,Park,Gym,Yoga Studio,Burrito Place,Italian Restaurant,Beer Bar,Japanese Restaurant,Juice Bar,Seafood Restaurant
2,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937,2,Coffee Shop,Clothing Store,Café,Japanese Restaurant,Bakery,Cosmetics Shop,Thai Restaurant,Bubble Tea Shop,Restaurant,Diner
3,M5B,Downtown Toronto,Garden District,43.657162,-79.378937,2,Coffee Shop,Clothing Store,Café,Japanese Restaurant,Bakery,Cosmetics Shop,Thai Restaurant,Bubble Tea Shop,Restaurant,Diner
4,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,2,Coffee Shop,Café,Restaurant,Park,Bakery,Diner,Breakfast Spot,Italian Restaurant,Clothing Store,American Restaurant


Let's visualize the resulting clusters

In [43]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       

map_clusters


<folium.features.CircleMarker at 0x24710c9a400>

<folium.features.CircleMarker at 0x24710d9fc18>

<folium.features.CircleMarker at 0x24710da29e8>

<folium.features.CircleMarker at 0x24710d9fba8>

<folium.features.CircleMarker at 0x24710d7fe48>

<folium.features.CircleMarker at 0x24710da2ba8>

<folium.features.CircleMarker at 0x2470e8c1b00>

<folium.features.CircleMarker at 0x24710d7f208>

<folium.features.CircleMarker at 0x24710d9fcc0>

<folium.features.CircleMarker at 0x24710da28d0>

<folium.features.CircleMarker at 0x24710d7f978>

<folium.features.CircleMarker at 0x24710da2668>

<folium.features.CircleMarker at 0x2470facb668>

<folium.features.CircleMarker at 0x24710dc6358>

<folium.features.CircleMarker at 0x24710d9ff98>

<folium.features.CircleMarker at 0x24710dc6e48>

<folium.features.CircleMarker at 0x24710dca0f0>

<folium.features.CircleMarker at 0x24710d9f5c0>

<folium.features.CircleMarker at 0x24710da2550>

<folium.features.CircleMarker at 0x24710dcadd8>

<folium.features.CircleMarker at 0x24710dc6588>

<folium.features.CircleMarker at 0x24710dcac50>

<folium.features.CircleMarker at 0x24710dca908>

<folium.features.CircleMarker at 0x24710de5f98>

<folium.features.CircleMarker at 0x24710dc6860>

<folium.features.CircleMarker at 0x24710dc6a90>

<folium.features.CircleMarker at 0x24710dc6470>

<folium.features.CircleMarker at 0x24710de5668>

<folium.features.CircleMarker at 0x24710de5a58>

<folium.features.CircleMarker at 0x24710dc6f28>

<folium.features.CircleMarker at 0x24710de58d0>

<folium.features.CircleMarker at 0x24710dcacc0>

<folium.features.CircleMarker at 0x24710de50f0>

<folium.features.CircleMarker at 0x24710de7470>

<folium.features.CircleMarker at 0x24710de5828>

<folium.features.CircleMarker at 0x24710de5a90>

<folium.features.CircleMarker at 0x24710de77f0>

<folium.features.CircleMarker at 0x24710e08748>

<folium.features.CircleMarker at 0x24710de7da0>

<folium.features.CircleMarker at 0x24710e089b0>

<folium.features.CircleMarker at 0x24710de7c18>

<folium.features.CircleMarker at 0x24710de78d0>

<folium.features.CircleMarker at 0x24710de5b38>

<folium.features.CircleMarker at 0x24710e08550>

<folium.features.CircleMarker at 0x24710de5208>

<folium.features.CircleMarker at 0x24710e08198>

<folium.features.CircleMarker at 0x24710dff518>

<folium.features.CircleMarker at 0x24710e08d30>

<folium.features.CircleMarker at 0x24710de5080>

<folium.features.CircleMarker at 0x24710dffeb8>

<folium.features.CircleMarker at 0x24710e1dbe0>

<folium.features.CircleMarker at 0x24710dff208>

<folium.features.CircleMarker at 0x24710e1d278>

<folium.features.CircleMarker at 0x24710e08860>

<folium.features.CircleMarker at 0x24710e1d5c0>

<folium.features.CircleMarker at 0x24710dffc18>

<folium.features.CircleMarker at 0x24710e1d780>

<folium.features.CircleMarker at 0x24710dffa58>

<folium.features.CircleMarker at 0x24710e1dba8>

<folium.features.CircleMarker at 0x24710dff860>

<folium.features.CircleMarker at 0x24710e1d9e8>

<folium.features.CircleMarker at 0x24710e460f0>

<folium.features.CircleMarker at 0x24710e2df98>

<folium.features.CircleMarker at 0x24710e46d30>

<folium.features.CircleMarker at 0x24710e2d588>

<folium.features.CircleMarker at 0x24710dff3c8>

<folium.features.CircleMarker at 0x24710e2d128>

<folium.features.CircleMarker at 0x24710e46a20>

<folium.features.CircleMarker at 0x24710e2d400>

<folium.features.CircleMarker at 0x24710e467f0>

<folium.features.CircleMarker at 0x24710e2dd68>

<folium.features.CircleMarker at 0x24710e46c18>

<folium.features.CircleMarker at 0x24710e3a6a0>

<folium.features.CircleMarker at 0x24710e2d4a8>

##### Let's examine cluster 2, the one with the most neighborhoods

In [29]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,2,Coffee Shop,Pub,Park,Bakery,Theater,Café,Breakfast Spot,Restaurant,Mexican Restaurant,Ice Cream Shop
1,Downtown Toronto,2,Coffee Shop,Park,Gym,Yoga Studio,Burrito Place,Italian Restaurant,Beer Bar,Japanese Restaurant,Juice Bar,Seafood Restaurant
2,Downtown Toronto,2,Coffee Shop,Clothing Store,Café,Japanese Restaurant,Bakery,Cosmetics Shop,Thai Restaurant,Bubble Tea Shop,Restaurant,Diner
3,Downtown Toronto,2,Coffee Shop,Clothing Store,Café,Japanese Restaurant,Bakery,Cosmetics Shop,Thai Restaurant,Bubble Tea Shop,Restaurant,Diner
4,Downtown Toronto,2,Coffee Shop,Café,Restaurant,Park,Bakery,Diner,Breakfast Spot,Italian Restaurant,Clothing Store,American Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...
69,Downtown Toronto,2,Coffee Shop,Café,Restaurant,Park,Bakery,Diner,Breakfast Spot,Italian Restaurant,Clothing Store,American Restaurant
70,Downtown Toronto,2,Coffee Shop,Café,Restaurant,Steakhouse,Gastropub,Asian Restaurant,American Restaurant,Deli / Bodega,Burger Joint,Gym
71,Downtown Toronto,2,Coffee Shop,Café,Restaurant,Steakhouse,Gastropub,Asian Restaurant,American Restaurant,Deli / Bodega,Burger Joint,Gym
72,Downtown Toronto,2,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Restaurant,Café,Fast Food Restaurant,Pub,Gym,Hotel
