In [1]:
import pandas as pd
import numpy as np
import requests
from lxml import html, etree
from bs4 import BeautifulSoup

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

In [3]:
r = requests.get(url)

In [4]:
soup = BeautifulSoup(r.text, "html.parser")

### Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.

In [5]:
# Use Beautiful soup to find the table
post_table = soup.find("table",class_="wikitable sortable")
# Iterate through soup object and extract text from table
table_list = []
for rows in post_table.find_all('tr'):
    if rows.find_all('th'):
        postal_code = rows.find_all('th')[0].get_text()
        borough = rows.find_all('th')[1].get_text()
        neighborhood = rows.find_all('th')[2].get_text()[:-1]
    else:
        if rows.find_all('td')[1].a:
            postal_code = rows.find_all('td')[0].get_text()
            borough = rows.find_all('td')[1].get_text()
            neighborhood = rows.find_all('td')[2].get_text()[:-1]
        else:
            continue
    table_list.append([postal_code, borough, neighborhood])

In [6]:
# Create df from list
df = pd.DataFrame(table_list)
# Assign first row as header
df.columns = df.iloc[0]
# Create dataframe without first row
df = df[1:]
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M3A,North York,Parkwoods
2,M4A,North York,Victoria Village
3,M5A,Downtown Toronto,Harbourfront
4,M5A,Downtown Toronto,Regent Park
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Queen's Park,Not assigned
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,Rouge
10,M1B,Scarborough,Malvern


In [7]:
# Rename column headers
df = df.rename(columns={'Postcode':'PostalCode', 'Borough':'Borough', 'Neighbourhood':'Neighborhood'})

### More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in the above table.

In [8]:
# Groupby PostalCode and apply lambda function to combine neighborhoods together
grouped_df = df.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(lambda x: "%s" % ', '.join(x)).reset_index()
grouped_df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


###### If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.

In [9]:
# Find out how many rows have 'Neighborhood' = 'Not assigned'
grouped_df[grouped_df['Neighborhood'] == 'Not assigned']

Unnamed: 0,PostalCode,Borough,Neighborhood
83,M7A,Queen's Park,Not assigned


In [10]:
# Iterate through DF to assign Borough to Neighborhood
for i, row in grouped_df.iterrows():
    if grouped_df['Neighborhood'][i] == 'Not assigned':
        grouped_df['Neighborhood'][i] = grouped_df['Borough'][i]

In [11]:
grouped_df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


### Add longitude and latitude coordinates using geocoder

In [12]:
import geocoder

In [13]:
geocoder.google('Mountain View, CA')

<[OVER_QUERY_LIMIT] Google - Geocode [empty]>

###### Geocoder is not working. Using CSV file

In [14]:
file = '/Users/scott/Projects/Coursera/Geospatial_Coordinates.csv'

In [15]:
coord_df = pd.read_csv(file)

In [16]:
# Rename Postal Code column to match other df
coord_df = coord_df.rename(columns={'Postal Code': 'PostalCode'})

In [17]:
neighborhoods = grouped_df.merge(coord_df, on='PostalCode', how='left')

In [18]:
neighborhoods.tail(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
90,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
91,M9B,Etobicoke,"Cloverdale, Islington, Martin Grove, Princess ...",43.650943,-79.554724
92,M9C,Etobicoke,"Bloordale Gardens, Eringate, Markland Wood, Ol...",43.643515,-79.577201
93,M9L,North York,Humber Summit,43.756303,-79.565963
94,M9M,North York,"Emery, Humberlea",43.724766,-79.532242
95,M9N,York,Weston,43.706876,-79.518188
96,M9P,Etobicoke,Westmount,43.696319,-79.532242
97,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.688905,-79.554724
98,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437
99,M9W,Etobicoke,Northwest,43.706748,-79.594054


In [19]:
len(neighborhoods['Neighborhood'].unique())

100

### Cluster Analysis

In [20]:
#pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

In [21]:
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [22]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

###### Explore venues

In [23]:
CLIENT_ID = 'LI4K1OOUAFPJJSTNNZ23CZ05ROFAGZ5LLOOM1WNWWE0PN0P0' # your Foursquare ID
CLIENT_SECRET = 'P1N0RHNW1IMWEVOT205NDGHN1MIGCGAKN52HCH3JVTZ4AVRF' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: LI4K1OOUAFPJJSTNNZ23CZ05ROFAGZ5LLOOM1WNWWE0PN0P0
CLIENT_SECRET:P1N0RHNW1IMWEVOT205NDGHN1MIGCGAKN52HCH3JVTZ4AVRF


In [24]:
neighborhoods.loc[0, 'Neighborhood']

'Rouge, Malvern'

In [25]:
neighborhood_latitude = neighborhoods.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = neighborhoods.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = neighborhoods.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rouge, Malvern are 43.806686299999996, -79.19435340000001.


In [26]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    count = 1
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        
        print(name, count)
        count += 1
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [27]:
toronto_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )

Rouge, Malvern 1
Highland Creek, Rouge Hill, Port Union 2
Guildwood, Morningside, West Hill 3
Woburn 4
Cedarbrae 5
Scarborough Village 6
East Birchmount Park, Ionview, Kennedy Park 7
Clairlea, Golden Mile, Oakridge 8
Cliffcrest, Cliffside, Scarborough Village West 9
Birch Cliff, Cliffside West 10
Dorset Park, Scarborough Town Centre, Wexford Heights 11
Maryvale, Wexford 12
Agincourt 13
Clarks Corners, Sullivan, Tam O'Shanter 14
Agincourt North, L'Amoreaux East, Milliken, Steeles East 15
L'Amoreaux West, Steeles West 16
Upper Rouge 17
Hillcrest Village 18
Fairview, Henry Farm, Oriole 19
Bayview Village 20
Silver Hills, York Mills 21
Newtonbrook, Willowdale 22
Willowdale South 23
York Mills West 24
Willowdale West 25
Parkwoods 26
Don Mills North 27
Flemingdon Park, Don Mills South 28
Bathurst Manor, Downsview North, Wilson Heights 29
Northwood Park, York University 30
CFB Toronto, Downsview East 31
Downsview West 32
Downsview Central 33
Downsview Northwest 34
Victoria Village 35
Woodbine

In [28]:
toronto_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Marina Spa,43.766000,-79.191000,Spa
5,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Big Bite Burrito,43.766299,-79.190720,Mexican Restaurant
6,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Enterprise Rent-A-Car,43.764042,-79.193371,Rental Car Location
7,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Woburn Medical Centre,43.766631,-79.192286,Medical Center
8,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Eggsmart,43.767800,-79.190466,Breakfast Spot
9,Woburn,43.770992,-79.216917,Starbucks,43.770037,-79.221156,Coffee Shop


###  Analyze Each Neighborhood

In [29]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighbodrhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
toronto_onehot.shape

(2246, 280)

In [31]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,"Adelaide, King, Richmond",0.000000,0.01,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.010000,0.000000,0.000000,0.000000,0.0000,0.0,0.010000,0.0,0.0,0.01
1,Agincourt,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,0.0,0.0,0.00
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,0.0,0.0,0.00
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.090909,0.000000,0.0000,0.0,0.000000,0.0,0.0,0.00
4,"Alderwood, Long Branch",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,0.0,0.0,0.00
5,"Bathurst Manor, Downsview North, Wilson Heights",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.058824,0.000000,0.0000,0.0,0.000000,0.0,0.0,0.00
6,Bayview Village,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,0.0,0.0,0.00
7,"Bedford Park, Lawrence Manor East",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,0.0,0.0,0.00
8,Berczy Park,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,0.0,0.0,0.00
9,"Birch Cliff, Cliffside West",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,0.0,0.0,0.00


In [32]:
toronto_grouped.shape

(97, 280)

In [33]:
# find missing Neighborhoods
missing_list = []
for x in neighborhoods['Neighborhood']:
    toronto_grouped_list = toronto_grouped['Neighborhood'].tolist()
    if x in toronto_grouped_list:
        continue
    else:
        missing_list.append(x)
missing_list

['Upper Rouge', 'Newtonbrook, Willowdale', 'Islington Avenue']

In [34]:
neighborhoods = neighborhoods[~neighborhoods['Neighborhood'].isin(missing_list)]

In [35]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
                 venue  freq
0          Coffee Shop  0.07
1                 Café  0.05
2           Steakhouse  0.04
3  American Restaurant  0.04
4                Hotel  0.03


----Agincourt----
            venue  freq
0          Lounge  0.25
1  Clothing Store  0.25
2    Skating Rink  0.25
3  Breakfast Spot  0.25
4     Yoga Studio  0.00


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
                             venue  freq
0                       Playground   0.5
1                             Park   0.5
2                      Yoga Studio   0.0
3                      Men's Store   0.0
4  Molecular Gastronomy Restaurant   0.0


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                  venue  freq
0         Grocery Store  0.18
1            Beer Store  0.09
2              Pharmacy  0.09
3  Fast Food Restaurant  0.09
4           Pizza Place  0.09


----Alderwood,

                             venue  freq
0           Furniture / Home Store   0.5
1                   Baseball Field   0.5
2                    Metro Station   0.0
3              Monument / Landmark   0.0
4  Molecular Gastronomy Restaurant   0.0


----Fairview, Henry Farm, Oriole----
                  venue  freq
0        Clothing Store  0.15
1  Fast Food Restaurant  0.07
2           Coffee Shop  0.06
3            Shoe Store  0.04
4            Restaurant  0.04


----First Canadian Place, Underground city----
         venue  freq
0  Coffee Shop  0.10
1         Café  0.08
2        Hotel  0.06
3   Restaurant  0.05
4   Steakhouse  0.04


----Flemingdon Park, Don Mills South----
                 venue  freq
0           Beer Store  0.10
1     Asian Restaurant  0.10
2                  Gym  0.10
3          Coffee Shop  0.10
4  Japanese Restaurant  0.05


----Forest Hill North, Forest Hill West----
              venue  freq
0              Park  0.25
1             Trail  0.25
2  Sushi Restaurant

          venue  freq
0   Coffee Shop  0.11
1    Restaurant  0.04
2          Café  0.04
3           Pub  0.03
4  Cocktail Bar  0.03


----Studio District----
                venue  freq
0                Café  0.10
1         Coffee Shop  0.08
2              Bakery  0.05
3           Gastropub  0.05
4  Italian Restaurant  0.05


----The Annex, North Midtown, Yorkville----
            venue  freq
0            Café  0.13
1  Sandwich Place  0.13
2     Coffee Shop  0.13
3     Pizza Place  0.09
4  History Museum  0.04


----The Beaches----
                 venue  freq
0          Coffee Shop  0.33
1                  Pub  0.33
2          Yoga Studio  0.00
3   Mexican Restaurant  0.00
4  Monument / Landmark  0.00


----The Beaches West, India Bazaar----
                venue  freq
0                Park  0.13
1      Sandwich Place  0.09
2          Board Shop  0.04
3  Italian Restaurant  0.04
4         Pizza Place  0.04


----The Danforth West, Riverdale----
                venue  freq
0    Greek R

In [36]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [37]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Steakhouse,American Restaurant,Thai Restaurant,Restaurant,Breakfast Spot,Gym,Hotel,Cosmetics Shop
1,Agincourt,Lounge,Clothing Store,Skating Rink,Breakfast Spot,Dog Run,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Playground,Women's Store,Dog Run,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Fried Chicken Joint,Pharmacy,Pizza Place,Sandwich Place,Liquor Store,Beer Store,Fast Food Restaurant,Coffee Shop,Video Store
4,"Alderwood, Long Branch",Pizza Place,Gym,Athletics & Sports,Pharmacy,Pool,Pub,Sandwich Place,Skating Rink,Bank,Coffee Shop
5,"Bathurst Manor, Downsview North, Wilson Heights",Coffee Shop,Pizza Place,Middle Eastern Restaurant,Ice Cream Shop,Restaurant,Deli / Bodega,Fried Chicken Joint,Frozen Yogurt Shop,Bank,Sushi Restaurant
6,Bayview Village,Bank,Japanese Restaurant,Café,Chinese Restaurant,Women's Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
7,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Fast Food Restaurant,Pharmacy,Thai Restaurant,Pub,Indian Restaurant,Sushi Restaurant,Butcher,Café
8,Berczy Park,Coffee Shop,Cocktail Bar,Restaurant,Pub,Cheese Shop,Farmers Market,Seafood Restaurant,Bakery,Café,Steakhouse
9,"Birch Cliff, Cliffside West",General Entertainment,College Stadium,Skating Rink,Café,Women's Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store


### Cluster Neighborhoods

In [38]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 1, 0, 0, 0, 4, 0, 0, 0], dtype=int32)

In [39]:
toronto_merged = neighborhoods

# add clustering labels
toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,0,Fast Food Restaurant,Women's Store,Doner Restaurant,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,0,Bar,Women's Store,Doner Restaurant,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1,Breakfast Spot,Electronics Store,Spa,Rental Car Location,Mexican Restaurant,Pizza Place,Medical Center,Women's Store,Design Studio,Dessert Shop
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Coffee Shop,Korean Restaurant,Doner Restaurant,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,Fried Chicken Joint,Hakka Restaurant,Thai Restaurant,Bakery,Bank,Athletics & Sports,Caribbean Restaurant,Discount Store,Design Studio,Dessert Shop


In [40]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

###### Cluster 1


In [41]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,0,Fast Food Restaurant,Women's Store,Doner Restaurant,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop
1,Scarborough,0,Bar,Women's Store,Doner Restaurant,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop
3,Scarborough,0,Coffee Shop,Korean Restaurant,Doner Restaurant,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop
4,Scarborough,0,Fried Chicken Joint,Hakka Restaurant,Thai Restaurant,Bakery,Bank,Athletics & Sports,Caribbean Restaurant,Discount Store,Design Studio,Dessert Shop
5,Scarborough,0,Playground,Convenience Store,Women's Store,Dog Run,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Doner Restaurant
7,Scarborough,0,Bakery,Soccer Field,Ice Cream Shop,Business Service,Bus Station,Bus Line,Metro Station,Intersection,Park,General Entertainment
8,Scarborough,0,Motel,American Restaurant,Dance Studio,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
9,Scarborough,0,General Entertainment,College Stadium,Skating Rink,Café,Women's Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
10,Scarborough,0,Indian Restaurant,Latin American Restaurant,Chinese Restaurant,Vietnamese Restaurant,Pet Store,Dumpling Restaurant,Eastern European Restaurant,Drugstore,Electronics Store,Donut Shop
11,Scarborough,0,Bakery,Auto Garage,Shopping Mall,Sandwich Place,Breakfast Spot,Middle Eastern Restaurant,Women's Store,Dessert Shop,Dim Sum Restaurant,Diner


###### Cluster 2

In [42]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Scarborough,1,Breakfast Spot,Electronics Store,Spa,Rental Car Location,Mexican Restaurant,Pizza Place,Medical Center,Women's Store,Design Studio,Dessert Shop
13,Scarborough,1,Pizza Place,Fried Chicken Joint,Italian Restaurant,Thai Restaurant,Chinese Restaurant,Noodle House,Pharmacy,Donut Shop,Doner Restaurant,Dog Run
40,East York,1,Park,Coffee Shop,Convenience Store,Dog Run,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Doner Restaurant
45,Central Toronto,1,Breakfast Spot,Park,Burger Joint,Clothing Store,Food & Drink Shop,Sandwich Place,Playground,Hotel,Dim Sum Restaurant,Design Studio
57,Downtown Toronto,1,Coffee Shop,Café,Italian Restaurant,Middle Eastern Restaurant,Bar,Bubble Tea Shop,Ice Cream Shop,Sandwich Place,Japanese Restaurant,Burger Joint
73,Downtown Toronto,1,Grocery Store,Café,Park,Baby Store,Italian Restaurant,Diner,Convenience Store,Nightclub,Restaurant,Coffee Shop
79,York,1,Pizza Place,Bakery,Bus Line,Convenience Store,Women's Store,Dog Run,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner
88,Etobicoke,1,Deli / Bodega,Baseball Field,Locksmith,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Ethiopian Restaurant,Dumpling Restaurant,Drugstore,Department Store
93,North York,1,Empanada Restaurant,Restaurant,Pizza Place,Women's Store,Diner,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant
99,Etobicoke,1,Rental Car Location,Drugstore,Bar,Women's Store,Dog Run,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store


###### Cluster 3

In [43]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
31,North York,2,Grocery Store,Bank,Shopping Mall,Women's Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
71,North York,2,Clothing Store,Furniture / Home Store,Coffee Shop,Gift Shop,Event Space,Boutique,Miscellaneous Shop,Arts & Crafts Store,Accessories Store,Vietnamese Restaurant
75,West Toronto,2,Bar,Café,Restaurant,Coffee Shop,Asian Restaurant,Bakery,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Cocktail Bar,French Restaurant


###### Cluster 4

In [44]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
84,East Toronto,3,Light Rail Station,Garden,Garden Center,Auto Workshop,Skate Park,Fast Food Restaurant,Farmers Market,Brewery,Spa,Burrito Place
91,Etobicoke,3,Bank,Doner Restaurant,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Women's Store,Deli / Bodega
92,Etobicoke,3,Liquor Store,Beer Store,Pharmacy,Pizza Place,Convenience Store,Café,Shopping Plaza,Park,General Travel,Creperie
96,Etobicoke,3,Pizza Place,Coffee Shop,Intersection,Middle Eastern Restaurant,Sandwich Place,Chinese Restaurant,Dim Sum Restaurant,Department Store,Design Studio,Dessert Shop


###### Cluster 5

In [45]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Scarborough,4,Coffee Shop,Hobby Shop,Department Store,Train Station,Chinese Restaurant,Bus Station,Donut Shop,Drugstore,Dumpling Restaurant,Doner Restaurant
26,North York,4,Gym / Fitness Center,Caribbean Restaurant,Japanese Restaurant,Café,Baseball Field,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
