# Applied Data Science Capstone - Segment & Cluster

## Question 1

### Scraping Data From Wikipedia

In [89]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
website_text = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_text,'xml')

table = soup.find('table',{'class':'wikitable sortable'})
table_rows = table.find_all('tr')
#table_rows

### Converting Data Into a Pandas Dataframe

In [27]:
data = []
for row in table_rows:
    data.append([t.text.strip() for t in row.find_all('td')])

df = pandas.DataFrame(data, columns=['PostalCode', 'Borough', 'Neighbourhood'])
df = df[~df['PostalCode'].isnull()]  # to filter out bad rows

In [28]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


### Removing Boroughs That Are Not Assigned

In [29]:
df = df[df.Borough != 'Not assigned']
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights


In [30]:
df[df.Borough == 'Not assigned']

Unnamed: 0,PostalCode,Borough,Neighbourhood


### Replacing Neighbourhoods That Are Not Assigned To Equal Borough

In [31]:
df[df.Neighbourhood == 'Not assigned']

Unnamed: 0,PostalCode,Borough,Neighbourhood
9,M7A,Queen's Park,Not assigned


In [32]:
df = df.replace(to_replace ="Not assigned", 
                 value ="Queen\'s Park")

### Check That All Boroughs and Neighbourhoods Are Assigned

In [33]:
df[df.Borough == 'Not assigned']

Unnamed: 0,PostalCode,Borough,Neighbourhood


In [34]:
df[df.Neighbourhood == 'Not assigned']

Unnamed: 0,PostalCode,Borough,Neighbourhood


### Merging Neighbourhoods With The Same PostalCode

In [35]:
df[df.PostalCode == 'M5A']

Unnamed: 0,PostalCode,Borough,Neighbourhood
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park


In [36]:
df[df.Neighbourhood == 'Harbourfront']

Unnamed: 0,PostalCode,Borough,Neighbourhood
5,M5A,Downtown Toronto,Harbourfront


In [37]:
df[df.Neighbourhood == 'Regent Park']

Unnamed: 0,PostalCode,Borough,Neighbourhood
6,M5A,Downtown Toronto,Regent Park


In [38]:
df = df[df.Neighbourhood != 'Harbourfront']

In [39]:
df = df.replace(to_replace ="Regent Park", 
                 value ="Regent Park, Harbourfront")

In [40]:
df[df.PostalCode == 'M5A']

Unnamed: 0,PostalCode,Borough,Neighbourhood
6,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Displaying The Shape and Final Dataframe

In [41]:
df.shape

(210, 3)

In [42]:
df

Unnamed: 0,PostalCode,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
6,M5A,Downtown Toronto,"Regent Park, Harbourfront"
7,M6A,North York,Lawrence Heights
8,M6A,North York,Lawrence Manor
9,M7A,Queen's Park,Queen's Park
11,M9A,Etobicoke,Islington Avenue
12,M1B,Scarborough,Rouge
13,M1B,Scarborough,Malvern
15,M3B,North York,Don Mills North


## Question 2

### Adding Latitudes and Longitudes To Dataframe

In [43]:
!wget -O Geospatial_Coordinates.csv https://cocl.us/Geospatial_data

--2019-06-22 07:43:35--  https://cocl.us/Geospatial_data
Resolving cocl.us (cocl.us)... 159.8.72.228
Connecting to cocl.us (cocl.us)|159.8.72.228|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2019-06-22 07:43:36--  https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.box.com (ibm.box.com)... 185.235.236.197
Connecting to ibm.box.com (ibm.box.com)|185.235.236.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2019-06-22 07:43:36--  https://ibm.box.com/public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Reusing existing connection to ibm.box.com:443.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.ent.box.com/public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2019-06-22 07

In [44]:
location = pd.read_csv('Geospatial_Coordinates.csv')
location.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)
location.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [45]:
df_location = pd.merge(df,location,on='PostalCode')

In [46]:
df_location

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,Lawrence Heights,43.718518,-79.464763
4,M6A,North York,Lawrence Manor,43.718518,-79.464763
5,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
6,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
7,M1B,Scarborough,Rouge,43.806686,-79.194353
8,M1B,Scarborough,Malvern,43.806686,-79.194353
9,M3B,North York,Don Mills North,43.745906,-79.352188


## Question 3

### Install Libraries

In [52]:
import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.3.1               |             py_0          25 KB  conda-forge
    certifi-2018.8.24          |        py35_1001         139 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    altair-2.2.2               |           py35_1         462 KB  conda-forge
    openssl-1.0.2r             |       h14c3975_0         3.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         4.0 MB

The following NEW packages will

### Filter Downtown Toronto Only

In [145]:
#df_location_filtered = df_location
df_location_filtered = df_location[df_location.Borough == 'Downtown Toronto']

In [146]:
df_location_filtered.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
12,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937
13,M5B,Downtown Toronto,Garden District,43.657162,-79.378937
26,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
36,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


### Plotting A Map of Toronto

In [147]:
latitude = 43.6529
longitude = -79.3849
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6529, -79.3849.


In [148]:
# create map of New York using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_location_filtered['Latitude'], df_location_filtered['Longitude'], df_location_filtered['Borough'], df_location_filtered['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

### Define Foursquare Credentials and Version

In [149]:
CLIENT_ID = 'KJGU4QFJY0VTPJD0J34C3BVOLQAP4YDWA54IWG2R1MO3WAMQ' # your Foursquare ID
CLIENT_SECRET = 'KTABSOHL0GFSVLX5FFZORSQ4MYQSQP3SNQVFNMQ4T0RN0QZO' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: KJGU4QFJY0VTPJD0J34C3BVOLQAP4YDWA54IWG2R1MO3WAMQ
CLIENT_SECRET:KTABSOHL0GFSVLX5FFZORSQ4MYQSQP3SNQVFNMQ4T0RN0QZO


### Exploring The Neighbourhoods

In [150]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [151]:
venues = getNearbyVenues(names=df_location_filtered['Neighbourhood'],
                                   latitudes=df_location_filtered['Latitude'],
                                   longitudes=df_location_filtered['Longitude']
                                  )

Regent Park, Harbourfront
Ryerson
Garden District
St. James Town
Berczy Park
Central Bay Street
Christie
Adelaide
King
Richmond
Harbourfront East
Toronto Islands
Union Station
Design Exchange
Toronto Dominion Centre
Commerce Court
Victoria Hotel
Harbord
University of Toronto
Chinatown
Grange Park
Kensington Market
CN Tower
Bathurst Quay
Island airport
Harbourfront West
King and Spadina
Railway Lands
South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown
St. James Town
First Canadian Place
Underground city
Church and Wellesley


In [152]:
print(venues.shape)
venues.head()

(2463, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Toronto Cooper Koo Family Cherry St YMCA Centre,43.653191,-79.357947,Gym / Fitness Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park, Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


In [153]:
venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,100,100,100,100,100,100
Bathurst Quay,16,16,16,16,16,16
Berczy Park,55,55,55,55,55,55
CN Tower,16,16,16,16,16,16
Cabbagetown,46,46,46,46,46,46
Central Bay Street,88,88,88,88,88,88
Chinatown,100,100,100,100,100,100
Christie,15,15,15,15,15,15
Church and Wellesley,87,87,87,87,87,87
Commerce Court,100,100,100,100,100,100


In [154]:
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))

There are 207 uniques categories.


### Analyse Each Neighbourhoods

In [155]:
# one hot encoding
onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
onehot['Neighbourhood'] = venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

onehot.head()

Unnamed: 0,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [156]:
onehot.shape

(2463, 208)

In [157]:
grouped = onehot.groupby('Neighbourhood').mean().reset_index()
grouped

Unnamed: 0,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Adelaide,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0
1,Bathurst Quay,0.0,0.0,0.0625,0.0625,0.0625,0.125,0.125,0.125,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,CN Tower,0.0,0.0,0.0625,0.0625,0.0625,0.125,0.125,0.125,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Cabbagetown,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,...,0.0,0.0,0.011364,0.0,0.011364,0.0,0.011364,0.0,0.0,0.011364
6,Chinatown,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.06,0.0,0.0,0.03,0.01,0.0,0.0,0.0
7,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Church and Wellesley,0.011494,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,...,0.0,0.0,0.0,0.011494,0.0,0.011494,0.0,0.011494,0.0,0.011494
9,Commerce Court,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0


In [158]:
grouped.shape

(35, 208)

In [159]:
num_top_venues = 5

for hood in grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = grouped[grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide----
                 venue  freq
0          Coffee Shop  0.06
1                 Café  0.05
2           Steakhouse  0.04
3  American Restaurant  0.04
4                  Bar  0.04


----Bathurst Quay----
              venue  freq
0    Airport Lounge  0.12
1   Airport Service  0.12
2  Airport Terminal  0.12
3          Boutique  0.06
4     Boat or Ferry  0.06


----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1        Cocktail Bar  0.05
2      Farmers Market  0.04
3  Seafood Restaurant  0.04
4            Beer Bar  0.04


----CN Tower----
              venue  freq
0    Airport Lounge  0.12
1   Airport Service  0.12
2  Airport Terminal  0.12
3          Boutique  0.06
4     Boat or Ferry  0.06


----Cabbagetown----
         venue  freq
0  Coffee Shop  0.09
1   Restaurant  0.07
2          Pub  0.04
3       Bakery  0.04
4         Park  0.04


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.16
1  Italian Restaurant  0.05
2  

### Converting Data Into A Pandas Dataframe

In [160]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [161]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = grouped['Neighbourhood']

for ind in np.arange(grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Bar,Steakhouse,American Restaurant,Burger Joint,Bakery,Cosmetics Shop,Gym,Hotel
1,Bathurst Quay,Airport Lounge,Airport Service,Airport Terminal,Harbor / Marina,Plane,Boat or Ferry,Sculpture Garden,Bar,Coffee Shop,Boutique
2,Berczy Park,Coffee Shop,Cocktail Bar,Farmers Market,Seafood Restaurant,Café,Bakery,Steakhouse,Cheese Shop,Beer Bar,Italian Restaurant
3,CN Tower,Airport Lounge,Airport Service,Airport Terminal,Harbor / Marina,Plane,Boat or Ferry,Sculpture Garden,Bar,Coffee Shop,Boutique
4,Cabbagetown,Coffee Shop,Restaurant,Pub,Pizza Place,Italian Restaurant,Park,Bakery,Café,Breakfast Spot,Caribbean Restaurant


### Cluster Neighbourhood

In [162]:
# set number of clusters
kclusters = 5

grouped_clustering = grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 1, 0, 1, 0, 0, 4, 3, 0, 0], dtype=int32)

In [163]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

merged = df_location_filtered

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
merged = merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Pub,Bakery,Park,Restaurant,Mexican Restaurant,Breakfast Spot,Café,Theater,Farmers Market
12,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937,0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Middle Eastern Restaurant,Fast Food Restaurant,Ramen Restaurant,Diner,Pizza Place,Bubble Tea Shop
13,M5B,Downtown Toronto,Garden District,43.657162,-79.378937,0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Middle Eastern Restaurant,Fast Food Restaurant,Ramen Restaurant,Diner,Pizza Place,Bubble Tea Shop
26,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Café,Restaurant,Hotel,Bakery,Italian Restaurant,Pizza Place,Breakfast Spot,Gastropub,Park
36,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Cocktail Bar,Farmers Market,Seafood Restaurant,Café,Bakery,Steakhouse,Cheese Shop,Beer Bar,Italian Restaurant


In [174]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighbourhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

In [175]:
map_clusters

### Examining Clusters

### Cluster 1

In [169]:
merged.loc[merged['Cluster Labels'] == 0, merged.columns[[1] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,0,Coffee Shop,Pub,Bakery,Park,Restaurant,Mexican Restaurant,Breakfast Spot,Café,Theater,Farmers Market
12,Downtown Toronto,0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Middle Eastern Restaurant,Fast Food Restaurant,Ramen Restaurant,Diner,Pizza Place,Bubble Tea Shop
13,Downtown Toronto,0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Middle Eastern Restaurant,Fast Food Restaurant,Ramen Restaurant,Diner,Pizza Place,Bubble Tea Shop
26,Downtown Toronto,0,Coffee Shop,Café,Restaurant,Hotel,Bakery,Italian Restaurant,Pizza Place,Breakfast Spot,Gastropub,Park
36,Downtown Toronto,0,Coffee Shop,Cocktail Bar,Farmers Market,Seafood Restaurant,Café,Bakery,Steakhouse,Cheese Shop,Beer Bar,Italian Restaurant
40,Downtown Toronto,0,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Burger Joint,Middle Eastern Restaurant,Salad Place,Ice Cream Shop,Restaurant,Indian Restaurant
48,Downtown Toronto,0,Coffee Shop,Café,Bar,Steakhouse,American Restaurant,Burger Joint,Bakery,Cosmetics Shop,Gym,Hotel
49,Downtown Toronto,0,Coffee Shop,Café,Bar,Steakhouse,American Restaurant,Burger Joint,Bakery,Cosmetics Shop,Gym,Hotel
50,Downtown Toronto,0,Coffee Shop,Café,Bar,Steakhouse,American Restaurant,Burger Joint,Bakery,Cosmetics Shop,Gym,Hotel
60,Downtown Toronto,0,Coffee Shop,Hotel,Aquarium,Café,Restaurant,Italian Restaurant,Fried Chicken Joint,Bakery,Pizza Place,Sporting Goods Shop


### Cluster 2

In [170]:
merged.loc[merged['Cluster Labels'] == 1, merged.columns[[1] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
163,Downtown Toronto,1,Airport Lounge,Airport Service,Airport Terminal,Harbor / Marina,Plane,Boat or Ferry,Sculpture Garden,Bar,Coffee Shop,Boutique
164,Downtown Toronto,1,Airport Lounge,Airport Service,Airport Terminal,Harbor / Marina,Plane,Boat or Ferry,Sculpture Garden,Bar,Coffee Shop,Boutique
165,Downtown Toronto,1,Airport Lounge,Airport Service,Airport Terminal,Harbor / Marina,Plane,Boat or Ferry,Sculpture Garden,Bar,Coffee Shop,Boutique
166,Downtown Toronto,1,Airport Lounge,Airport Service,Airport Terminal,Harbor / Marina,Plane,Boat or Ferry,Sculpture Garden,Bar,Coffee Shop,Boutique
167,Downtown Toronto,1,Airport Lounge,Airport Service,Airport Terminal,Harbor / Marina,Plane,Boat or Ferry,Sculpture Garden,Bar,Coffee Shop,Boutique
168,Downtown Toronto,1,Airport Lounge,Airport Service,Airport Terminal,Harbor / Marina,Plane,Boat or Ferry,Sculpture Garden,Bar,Coffee Shop,Boutique
169,Downtown Toronto,1,Airport Lounge,Airport Service,Airport Terminal,Harbor / Marina,Plane,Boat or Ferry,Sculpture Garden,Bar,Coffee Shop,Boutique


### Cluster 3

In [171]:
merged.loc[merged['Cluster Labels'] == 2, merged.columns[[1] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
182,Downtown Toronto,2,Park,Playground,Trail,Yoga Studio,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant


### Cluster 4

In [172]:
merged.loc[merged['Cluster Labels'] == 3, merged.columns[[1] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
41,Downtown Toronto,3,Grocery Store,Café,Park,Italian Restaurant,Diner,Restaurant,Nightclub,Baby Store,Convenience Store,Coffee Shop


### Cluster 5

In [173]:
merged.loc[merged['Cluster Labels'] == 4, merged.columns[[1] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
142,Downtown Toronto,4,Café,Bakery,Restaurant,Japanese Restaurant,Bar,Bookstore,Italian Restaurant,Beer Bar,Beer Store,Nightclub
143,Downtown Toronto,4,Café,Bakery,Restaurant,Japanese Restaurant,Bar,Bookstore,Italian Restaurant,Beer Bar,Beer Store,Nightclub
151,Downtown Toronto,4,Café,Vegetarian / Vegan Restaurant,Coffee Shop,Mexican Restaurant,Bar,Bakery,Dumpling Restaurant,Vietnamese Restaurant,Chinese Restaurant,Noodle House
152,Downtown Toronto,4,Café,Vegetarian / Vegan Restaurant,Coffee Shop,Mexican Restaurant,Bar,Bakery,Dumpling Restaurant,Vietnamese Restaurant,Chinese Restaurant,Noodle House
153,Downtown Toronto,4,Café,Vegetarian / Vegan Restaurant,Coffee Shop,Mexican Restaurant,Bar,Bakery,Dumpling Restaurant,Vietnamese Restaurant,Chinese Restaurant,Noodle House
