## Part 1 - Creating Initial Dataframe

In [329]:
#!conda install -c conda-forge folium=0.5.0 --yes
import folium

In [330]:
# Scrape for Canadian Postal Codes
import requests
wiki_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

#Pull raw html data from wikipedia page
from bs4 import BeautifulSoup
soup = BeautifulSoup(wiki_url, 'lxml')
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":920980179,"wgRevisionId":920980179,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Communications in Ontario","Postal codes in Canada","Toronto","Ontario-related lists"],"wgBreakFrames":!1,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgMonthNames

In [331]:
#find table in wikipedia page html
canada_table = soup.find('table',{'class': 'wikitable'})
canada_table

<table class="wikitable sortable">
<tbody><tr>
<th>Postcode</th>
<th>Borough</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
</td></tr>
<tr>
<td>M4A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Harbourfront_(Toronto)" title="Harbourfront (Toronto)">Harbourfront</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Regent_Park" title="Regent Park">Regent Park</a>
</td></tr>
<tr>
<td>M6A</td>

In [332]:
#build empty lists to append data from wikipedia to
postal_codes = []
boroughs = []
neighborhoods = []

#append data by looking in html for specific rows and cells
for row in canada_table.findAll('tr')[1:]:
    postal_cells = row.findAll('td')[0]
    boroughs_cells = row.findAll('td')[1]
    neighborhoods_cells = row.findAll('td')[2]
    postal_codes.append(postal_cells.text)
    boroughs.append(boroughs_cells.text)
    neighborhoods.append(neighborhoods_cells.text[:-1])
    

print(neighborhoods[0:10])

['Not assigned', 'Not assigned', 'Parkwoods', 'Victoria Village', 'Harbourfront', 'Regent Park', 'Lawrence Heights', 'Lawrence Manor', 'Not assigned', 'Not assigned']


In [333]:
import pandas as pd

#build dataframe out of appended lists
canada_df = pd.DataFrame()
canada_df['PostalCode'] = postal_codes
canada_df['Borough'] = boroughs
canada_df['Neighborhood'] = neighborhoods

canada_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [334]:
import numpy as np

#remove rows of non-assigned boroughs
canada_df = canada_df[canada_df['Borough'] != 'Not assigned']
#group by borough and aggregate neighborhood by csv
canada_df = canada_df.groupby('Borough').agg({'PostalCode' : 'first','Neighborhood' : ','.join}).reset_index().reindex(columns = canada_df.columns)
#replace non-assigned neighborhoods by borough name
canada_df['Neighborhood'] = canada_df['Neighborhood'].replace('Not assigned', canada_df['Borough'])

canada_df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M4N,Central Toronto,"Lawrence Park,Roselawn,Davisville North,Forest..."
1,M5A,Downtown Toronto,"Harbourfront,Regent Park,Ryerson,Garden Distri..."
2,M4E,East Toronto,"The Beaches,The Danforth West,Riverdale,The Be..."
3,M4B,East York,"Woodbine Gardens,Parkview Hill,Woodbine Height..."
4,M9A,Etobicoke,"Islington Avenue,Cloverdale,Islington,Martin G..."
5,M7R,Mississauga,Canada Post Gateway Processing Centre
6,M3A,North York,"Parkwoods,Victoria Village,Lawrence Heights,La..."
7,M7A,Queen's Park,Queen's Park
8,M1B,Scarborough,"Rouge,Malvern,Highland Creek,Rouge Hill,Port U..."
9,M6H,West Toronto,"Dovercourt Village,Dufferin,Little Portugal,Tr..."


In [335]:
canada_df.shape

(11, 3)

## Part 2 - Importing Geocoder and Appending Coordinates

In [336]:
# running the while loop timed out, so importing csv and appending instead
geospacial_data = pd.read_csv('/Users/jonathan/Desktop/ibmdatascience/ibmdatascienceproject/Geospatial_Coordinates_Canada.csv')
geospacial_data = geospacial_data.rename(columns = {'Postal Code' : 'PostalCode'})
canada_df = canada_df.merge(geospacial_data, how = 'inner')

canada_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4N,Central Toronto,"Lawrence Park,Roselawn,Davisville North,Forest...",43.72802,-79.38879
1,M5A,Downtown Toronto,"Harbourfront,Regent Park,Ryerson,Garden Distri...",43.65426,-79.360636
2,M4E,East Toronto,"The Beaches,The Danforth West,Riverdale,The Be...",43.676357,-79.293031
3,M4B,East York,"Woodbine Gardens,Parkview Hill,Woodbine Height...",43.706397,-79.309937
4,M9A,Etobicoke,"Islington Avenue,Cloverdale,Islington,Martin G...",43.667856,-79.532242
5,M7R,Mississauga,Canada Post Gateway Processing Centre,43.636966,-79.615819
6,M3A,North York,"Parkwoods,Victoria Village,Lawrence Heights,La...",43.753259,-79.329656
7,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
8,M1B,Scarborough,"Rouge,Malvern,Highland Creek,Rouge Hill,Port U...",43.806686,-79.194353
9,M6H,West Toronto,"Dovercourt Village,Dufferin,Little Portugal,Tr...",43.669005,-79.442259


## Part 3 - Exploring and Clustering the Neighborhoods in Toronto

In [337]:
#create toronto df
toronto_df = pd.DataFrame()
toronto_df['PostalCode'] = postal_codes
toronto_df['Borough'] = boroughs
toronto_df['Neighborhood'] = neighborhoods

#remove rows of non-assigned boroughs
toronto_df = toronto_df[toronto_df['Borough'] != 'Not assigned']
#make dataframe into just toronto values
toronto_df = toronto_df[toronto_df['Borough'].str.contains('Toronto')].reset_index(drop = True)

#appending csv of spacial data
toronto_df = toronto_df.merge(geospacial_data, how = 'inner')

toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
1,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
2,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937
3,M5B,Downtown Toronto,Garden District,43.657162,-79.378937
4,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418


In [338]:
#initial latitude and longitude arbirtrary so chose the first row of toronto_df
latitude_initial = toronto_df.loc[0]['Latitude']
longitude_initial = toronto_df.loc[0]['Longitude']

#create map of toronto using latitude_initial and longitude_initial
map_toronto = folium.Map(location = [latitude_initial, longitude_initial], zoom_start = 11)
#add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
    [lat, lng],
    radius = 5,
    popup = label,
    color = 'green',
    fill = True,
    fill_opacity = 0.8,
    parse_html = False).add_to(map_toronto)


map_toronto

In [339]:
#Define Foursquare Credentials and Version
CLIENT_ID = '5JPKEAE0LLNYHMOY25GDEMO4VTGRTF3ENNT5UDADBFJ1Z3RK'
CLIENT_SECRET = 'XJEWQH4DL5YVH0V0PES2G1FQ0A0II3CWHJ3QPOW5TKEWJ2SE'
VERSION = '20180604'
LIMIT = 100
print('Your credentials:')
print('CLIENT_ID: '+ CLIENT_ID)
print('CLIENT_SECRET: '+CLIENT_SECRET)

Your credentials:
CLIENT_ID: 5JPKEAE0LLNYHMOY25GDEMO4VTGRTF3ENNT5UDADBFJ1Z3RK
CLIENT_SECRET: XJEWQH4DL5YVH0V0PES2G1FQ0A0II3CWHJ3QPOW5TKEWJ2SE


In [340]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [341]:
LIMIT = 100
toronto_venues = getNearbyVenues(names=toronto_df['Neighborhood'],
                                 latitudes=toronto_df['Latitude'],
                                 longitudes=toronto_df['Longitude']
                                )

Harbourfront
Regent Park
Ryerson
Garden District
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Adelaide
King
Richmond
Dovercourt Village
Dufferin
Harbourfront East
Toronto Islands
Union Station
Little Portugal
Trinity
The Danforth West
Riverdale
Design Exchange
Toronto Dominion Centre
Brockton
Exhibition Place
Parkdale Village
The Beaches West
India Bazaar
Commerce Court
Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North
Forest Hill West
High Park
The Junction South
North Toronto West
The Annex
North Midtown
Yorkville
Parkdale
Roncesvalles
Davisville
Harbord
University of Toronto
Runnymede
Swansea
Moore Park
Summerhill East
Chinatown
Grange Park
Kensington Market
Deer Park
Forest Hill SE
Rathnelly
South Hill
Summerhill West
CN Tower
Bathurst Quay
Island airport
Harbourfront West
King and Spadina
Railway Lands
South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown
St. James Town
First Canadian Place
Underground city


In [342]:
print(toronto_venues.shape)
toronto_venues.head()

(3319, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Harbourfront,43.65426,-79.360636,Toronto Cooper Koo Family Cherry St YMCA Centre,43.653191,-79.357947,Gym / Fitness Center
3,Harbourfront,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Harbourfront,43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


In [343]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,100,100,100,100,100,100
Bathurst Quay,16,16,16,16,16,16
Berczy Park,57,57,57,57,57,57
Brockton,23,23,23,23,23,23
Business Reply Mail Processing Centre 969 Eastern,15,15,15,15,15,15
CN Tower,16,16,16,16,16,16
Cabbagetown,45,45,45,45,45,45
Central Bay Street,87,87,87,87,87,87
Chinatown,100,100,100,100,100,100
Christie,17,17,17,17,17,17


In [344]:
#nubmer of unique categories
print('There are {} unique categories'.format(len(toronto_venues['Venue Category'].unique())))

There are 237 unique categories


In [345]:
#Analyze each neighborhood

#one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

#add neighborhood col back to df
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood']

#move neighborhood col to first col
fixed_cols = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_cols]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [346]:
toronto_onehot.shape

(3319, 237)

In [347]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,Adelaide,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.030000,...,0.000,0.000000,0.00000,0.00,0.010000,0.000000,0.000000,0.010000,0.000000,0.01
1,Bathurst Quay,0.000000,0.000000,0.0625,0.0625,0.0625,0.125,0.1875,0.125,0.000000,...,0.000,0.000000,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
2,Berczy Park,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.000,0.000000,0.00000,0.00,0.017544,0.000000,0.000000,0.000000,0.000000,0.00
3,Brockton,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.000,0.000000,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
4,Business Reply Mail Processing Centre 969 Eastern,0.066667,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.000,0.000000,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
5,CN Tower,0.000000,0.000000,0.0625,0.0625,0.0625,0.125,0.1875,0.125,0.000000,...,0.000,0.000000,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
6,Cabbagetown,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.000,0.000000,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
7,Central Bay Street,0.011494,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.011494,...,0.000,0.000000,0.00000,0.00,0.011494,0.000000,0.000000,0.011494,0.000000,0.00
8,Chinatown,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.000,0.010000,0.00000,0.00,0.060000,0.000000,0.040000,0.010000,0.000000,0.00
9,Christie,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.000,0.000000,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00


In [348]:
toronto_grouped.shape

(73, 237)

In [349]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide----
              venue  freq
0       Coffee Shop  0.07
1              Café  0.05
2               Bar  0.04
3   Thai Restaurant  0.04
4  Asian Restaurant  0.03


----Bathurst Quay----
              venue  freq
0   Airport Service  0.19
1    Airport Lounge  0.12
2  Airport Terminal  0.12
3             Plane  0.06
4          Boutique  0.06


----Berczy Park----
            venue  freq
0     Coffee Shop  0.07
1    Cocktail Bar  0.05
2      Steakhouse  0.04
3  Farmers Market  0.04
4            Café  0.04


----Brockton----
            venue  freq
0            Café  0.09
1  Breakfast Spot  0.09
2     Coffee Shop  0.09
3    Climbing Gym  0.04
4  Sandwich Place  0.04


----Business Reply Mail Processing Centre 969 Eastern----
           venue  freq
0    Yoga Studio  0.07
1  Auto Workshop  0.07
2     Comic Shop  0.07
3           Park  0.07
4     Restaurant  0.07


----CN Tower----
              venue  freq
0   Airport Service  0.19
1    Airport Lounge  0.12
2  Airport Terminal  0.

         venue  freq
0  Coffee Shop  0.15
1         Park  0.08
2          Pub  0.06
3         Café  0.06
4       Bakery  0.06


----Richmond----
              venue  freq
0       Coffee Shop  0.07
1              Café  0.05
2               Bar  0.04
3   Thai Restaurant  0.04
4  Asian Restaurant  0.03


----Riverdale----
                    venue  freq
0        Greek Restaurant  0.19
1             Coffee Shop  0.10
2      Italian Restaurant  0.07
3          Ice Cream Shop  0.07
4  Furniture / Home Store  0.05


----Roncesvalles----
            venue  freq
0  Breakfast Spot  0.13
1       Gift Shop  0.13
2       Bookstore  0.07
3            Bank  0.07
4             Bar  0.07


----Rosedale----
           venue  freq
0           Park  0.50
1     Playground  0.25
2          Trail  0.25
3    Yoga Studio  0.00
4  Movie Theater  0.00


----Roselawn----
               venue  freq
0             Garden   1.0
1             Lounge   0.0
2   Malay Restaurant   0.0
3             Market   0.0
4  Martia

In [350]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Bar,Thai Restaurant,Steakhouse,Hotel,Sushi Restaurant,Asian Restaurant,American Restaurant,Restaurant
1,Bathurst Quay,Airport Service,Airport Terminal,Airport Lounge,Harbor / Marina,Plane,Sculpture Garden,Boutique,Boat or Ferry,Coffee Shop,Airport Food Court
2,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Farmers Market,Steakhouse,Cheese Shop,Seafood Restaurant,Beer Bar,Café,Sporting Goods Shop
3,Brockton,Café,Coffee Shop,Breakfast Spot,Pet Store,Furniture / Home Store,Sandwich Place,Burrito Place,Restaurant,Stadium,Italian Restaurant
4,Business Reply Mail Processing Centre 969 Eastern,Yoga Studio,Auto Workshop,Comic Shop,Pizza Place,Restaurant,Burrito Place,Brewery,Skate Park,Light Rail Station,Spa


In [351]:
from sklearn.cluster import KMeans

#cluseter
kclusters=10 #because that's fun

toronto_grouped_clustered = toronto_grouped.drop('Neighborhood',1)

#kmeans
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustered)

#check cluster labels generated for each row in the df
kmeans.labels_[0:11]

array([7, 6, 7, 7, 1, 6, 7, 7, 7, 7, 7], dtype=int32)

In [352]:
# add cluster labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on = 'Neighborhood')

toronto_merged.tail()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
69,M4X,Downtown Toronto,St. James Town,43.667967,-79.367675,7,Coffee Shop,Restaurant,Café,Breakfast Spot,Italian Restaurant,Bakery,Hotel,Gastropub,Clothing Store,Pizza Place
70,M5X,Downtown Toronto,First Canadian Place,43.648429,-79.38228,7,Coffee Shop,Café,Hotel,Restaurant,Steakhouse,Bar,Gym,Gastropub,Deli / Bodega,Asian Restaurant
71,M5X,Downtown Toronto,Underground city,43.648429,-79.38228,7,Coffee Shop,Café,Hotel,Restaurant,Steakhouse,Bar,Gym,Gastropub,Deli / Bodega,Asian Restaurant
72,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,7,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Burger Joint,Restaurant,Yoga Studio,Mediterranean Restaurant,Men's Store,Café
73,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,1,Yoga Studio,Auto Workshop,Comic Shop,Pizza Place,Restaurant,Burrito Place,Brewery,Skate Park,Light Rail Station,Spa


In [353]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

#initial latitude and longitude arbirtrary so chose the first row of toronto_df
latitude_initial = toronto_df.loc[0]['Latitude']
longitude_initial = toronto_df.loc[0]['Longitude']

# create map
map_clusters = folium.Map(location=[latitude_initial, longitude_initial], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [354]:
#define function to get cluster information
from IPython.display import display, HTML
def get_cluster(cluster_number):
    display(toronto_merged.loc[toronto_merged['Cluster Labels'] == cluster_number, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]])
    

In [355]:
#get cluster information for all clusters
for i in range(10):
    print('Cluster ',i+1,':')
    get_cluster(i)

Cluster  1 :


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,West Toronto,0,Pharmacy,Bakery,Supermarket,Art Gallery,Music Venue,Middle Eastern Restaurant,Café,Brewery,Bar,Bank
13,West Toronto,0,Pharmacy,Bakery,Supermarket,Art Gallery,Music Venue,Middle Eastern Restaurant,Café,Brewery,Bar,Bank


Cluster  2 :


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,East Toronto,1,Greek Restaurant,Coffee Shop,Ice Cream Shop,Italian Restaurant,Furniture / Home Store,Liquor Store,Sports Bar,Spa,Bookstore,Brewery
20,East Toronto,1,Greek Restaurant,Coffee Shop,Ice Cream Shop,Italian Restaurant,Furniture / Home Store,Liquor Store,Sports Bar,Spa,Bookstore,Brewery
26,East Toronto,1,Park,Pizza Place,Ice Cream Shop,Pub,Brewery,Sandwich Place,Burger Joint,Fast Food Restaurant,Burrito Place,Fish & Chips Shop
27,East Toronto,1,Park,Pizza Place,Ice Cream Shop,Pub,Brewery,Sandwich Place,Burger Joint,Fast Food Restaurant,Burrito Place,Fish & Chips Shop
54,Central Toronto,1,Coffee Shop,Pub,Light Rail Station,American Restaurant,Sushi Restaurant,Bagel Shop,Restaurant,Fried Chicken Joint,Sports Bar,Supermarket
55,Central Toronto,1,Coffee Shop,Pub,Light Rail Station,American Restaurant,Sushi Restaurant,Bagel Shop,Restaurant,Fried Chicken Joint,Sports Bar,Supermarket
56,Central Toronto,1,Coffee Shop,Pub,Light Rail Station,American Restaurant,Sushi Restaurant,Bagel Shop,Restaurant,Fried Chicken Joint,Sports Bar,Supermarket
57,Central Toronto,1,Coffee Shop,Pub,Light Rail Station,American Restaurant,Sushi Restaurant,Bagel Shop,Restaurant,Fried Chicken Joint,Sports Bar,Supermarket
58,Central Toronto,1,Coffee Shop,Pub,Light Rail Station,American Restaurant,Sushi Restaurant,Bagel Shop,Restaurant,Fried Chicken Joint,Sports Bar,Supermarket
73,East Toronto,1,Yoga Studio,Auto Workshop,Comic Shop,Pizza Place,Restaurant,Burrito Place,Brewery,Skate Park,Light Rail Station,Spa


Cluster  3 :


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Central Toronto,2,Garden,Women's Store,Dessert Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


Cluster  4 :


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
34,Central Toronto,3,Trail,Jewelry Store,Mexican Restaurant,Sushi Restaurant,Women's Store,Dim Sum Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
35,Central Toronto,3,Trail,Jewelry Store,Mexican Restaurant,Sushi Restaurant,Women's Store,Dim Sum Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant


Cluster  5 :


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
49,Central Toronto,4,Gym,Restaurant,Trail,Summer Camp,Women's Store,Dog Run,Dim Sum Restaurant,Diner,Discount Store,Donut Shop
50,Central Toronto,4,Gym,Restaurant,Trail,Summer Camp,Women's Store,Dog Run,Dim Sum Restaurant,Diner,Discount Store,Donut Shop


Cluster  6 :


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
66,Downtown Toronto,5,Park,Playground,Trail,Women's Store,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


Cluster  7 :


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
59,Downtown Toronto,6,Airport Service,Airport Terminal,Airport Lounge,Harbor / Marina,Plane,Sculpture Garden,Boutique,Boat or Ferry,Coffee Shop,Airport Food Court
60,Downtown Toronto,6,Airport Service,Airport Terminal,Airport Lounge,Harbor / Marina,Plane,Sculpture Garden,Boutique,Boat or Ferry,Coffee Shop,Airport Food Court
61,Downtown Toronto,6,Airport Service,Airport Terminal,Airport Lounge,Harbor / Marina,Plane,Sculpture Garden,Boutique,Boat or Ferry,Coffee Shop,Airport Food Court
62,Downtown Toronto,6,Airport Service,Airport Terminal,Airport Lounge,Harbor / Marina,Plane,Sculpture Garden,Boutique,Boat or Ferry,Coffee Shop,Airport Food Court
63,Downtown Toronto,6,Airport Service,Airport Terminal,Airport Lounge,Harbor / Marina,Plane,Sculpture Garden,Boutique,Boat or Ferry,Coffee Shop,Airport Food Court
64,Downtown Toronto,6,Airport Service,Airport Terminal,Airport Lounge,Harbor / Marina,Plane,Sculpture Garden,Boutique,Boat or Ferry,Coffee Shop,Airport Food Court
65,Downtown Toronto,6,Airport Service,Airport Terminal,Airport Lounge,Harbor / Marina,Plane,Sculpture Garden,Boutique,Boat or Ferry,Coffee Shop,Airport Food Court


Cluster  8 :


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,7,Coffee Shop,Park,Bakery,Café,Pub,Mexican Restaurant,Breakfast Spot,Restaurant,Theater,Yoga Studio
1,Downtown Toronto,7,Coffee Shop,Park,Bakery,Café,Pub,Mexican Restaurant,Breakfast Spot,Restaurant,Theater,Yoga Studio
2,Downtown Toronto,7,Clothing Store,Coffee Shop,Cosmetics Shop,Café,Bakery,Middle Eastern Restaurant,Japanese Restaurant,Ramen Restaurant,Fast Food Restaurant,Lingerie Store
3,Downtown Toronto,7,Clothing Store,Coffee Shop,Cosmetics Shop,Café,Bakery,Middle Eastern Restaurant,Japanese Restaurant,Ramen Restaurant,Fast Food Restaurant,Lingerie Store
4,Downtown Toronto,7,Coffee Shop,Restaurant,Café,Breakfast Spot,Italian Restaurant,Bakery,Hotel,Gastropub,Clothing Store,Pizza Place
6,Downtown Toronto,7,Coffee Shop,Cocktail Bar,Bakery,Farmers Market,Steakhouse,Cheese Shop,Seafood Restaurant,Beer Bar,Café,Sporting Goods Shop
7,Downtown Toronto,7,Coffee Shop,Sandwich Place,Italian Restaurant,Café,Ice Cream Shop,Burger Joint,Salad Place,Bubble Tea Shop,Bar,Spa
8,Downtown Toronto,7,Grocery Store,Café,Park,Candy Store,Diner,Italian Restaurant,Baby Store,Restaurant,Athletics & Sports,Convenience Store
9,Downtown Toronto,7,Coffee Shop,Café,Bar,Thai Restaurant,Steakhouse,Hotel,Sushi Restaurant,Asian Restaurant,American Restaurant,Restaurant
10,Downtown Toronto,7,Coffee Shop,Café,Bar,Thai Restaurant,Steakhouse,Hotel,Sushi Restaurant,Asian Restaurant,American Restaurant,Restaurant


Cluster  9 :


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,East Toronto,8,Pub,Health Food Store,Trail,Women's Store,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Dumpling Restaurant


Cluster  10 :


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
31,Central Toronto,9,Bus Line,Park,Lawyer,Swim School,Women's Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
