## Segmenting and Clustering Neighborhoods in Toronto

### IBM Data Science Specialization - Week 3 Assignment

#### Prepared By Scott Brown on 5/5/2019

In [1]:
#Import libraries for scrapping exercise

import numpy as np 
import pandas as pd 
from bs4 import BeautifulSoup
import requests

#Import date and then convert from text to xml format
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
xml_page_data = BeautifulSoup(source, 'lxml')

In [2]:
#Create a class that will scrape the Wikipedia page to gather the target information 
#The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood

class webpage_scrapp:
       
        def parse_url(self, url):
            response = requests.get(url)
            xml_page_data = BeautifulSoup(response.text, 'lxml')
            return [(self.parse_html_table(table))\
                    for table in xml_page_data.find_all('table', class_="wikitable sortable")]  
    
        def parse_html_table(self, table):
            n_columns = 0
            n_rows=0
            column_names = []
            for row in table.find_all('tr'):
                td_tags = row.find_all('td')
                if len(td_tags) > 0:
                    n_rows+=1
                    if n_columns == 0:
                        n_columns = len(td_tags)
                        
                th_tags = row.find_all('th') 
                if len(th_tags) > 0 and len(column_names) == 0:
                    for th in th_tags:
                        column_names.append(th.get_text())
    
            if len(column_names) > 0 and len(column_names) != n_columns:
                raise Exception("Column titles do not match the number of columns")
    
            columns = column_names if len(column_names) > 0 else range(0,n_columns)
            df = pd.DataFrame(columns = columns,
                              index= range(0,n_rows))
            row_marker = 0
            for row in table.find_all('tr'):
                column_marker = 0
                columns = row.find_all('td')
                for column in columns:
                    df.iat[row_marker,column_marker] = column.get_text()
                    column_marker += 1
                if len(columns) > 0:
                    row_marker += 1
                    
            for col in df:
                try:
                    df[col] = df[col].astype(float)
                except ValueError:
                    pass
            
            return df

In [3]:
table = webpage_scrapp().parse_url('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0] 
table.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned\n
1,M2A,Not assigned,Not assigned\n
2,M3A,North York,Parkwoods\n
3,M4A,North York,Victoria Village\n
4,M5A,Downtown Toronto,Harbourfront\n
5,M5A,Downtown Toronto,Regent Park\n
6,M6A,North York,Lawrence Heights\n
7,M6A,North York,Lawrence Manor\n
8,M7A,Queen's Park,Not assigned\n
9,M8A,Not assigned,Not assigned\n


In [4]:
# Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.

table = table[table.Borough != 'Not assigned']
table.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods\n
3,M4A,North York,Victoria Village\n
4,M5A,Downtown Toronto,Harbourfront\n
5,M5A,Downtown Toronto,Regent Park\n
6,M6A,North York,Lawrence Heights\n
7,M6A,North York,Lawrence Manor\n
8,M7A,Queen's Park,Not assigned\n
10,M9A,Etobicoke,Islington Avenue\n
11,M1B,Scarborough,Rouge\n
12,M1B,Scarborough,Malvern\n


In [5]:
#Remove \n from the data in table
table = table.replace('\n',' ', regex=True)
table.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [6]:
#More than one neighborhood can exist in one postal code area. 
#For example, in the table on the Wikipedia page, you will notice that M5A is 
#listed twice and has two neighborhoods: Harbourfront and Regent Park. 
#These two rows will be combined into one row with the neighborhoods separated 
#with a comma as shown in row 11 in the above table.

neighborhood_frame = table.groupby(['Postcode','Borough'])['Neighbourhood\n'].apply(lambda x: ", ".join(x.astype(str))).reset_index()
neighborhood_frame = neighborhood_frame.sample(frac=1).reset_index(drop=True)
neighborhood_frame.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M5C,Downtown Toronto,St. James Town
1,M4P,Central Toronto,Davisville North
2,M4S,Central Toronto,Davisville
3,M2M,North York,"Newtonbrook , Willowdale"
4,M4K,East Toronto,"The Danforth West , Riverdale"
5,M2J,North York,"Fairview , Henry Farm , Oriole"
6,M6E,York,Caledonia-Fairbanks
7,M5B,Downtown Toronto,"Ryerson , Garden District"
8,M7Y,East Toronto,Business Reply Mail Processing Centre 969 East...
9,M6N,York,"The Junction North , Runnymede"


In [7]:
#In the last cell of your notebook, use the .shape method to print the number of rows of your dataframe

print(neighborhood_frame.shape)

(103, 3)


## Geospacial Data

In [8]:
url_geo="http://cocl.us/Geospatial_data"
geo_info=pd.read_csv(url_geo)
geo_info.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [9]:
print(list(neighborhood_frame))
print(list(geo_info))

full_table = neighborhood_frame.set_index('Postcode').join(geo_info.set_index('Postal Code'))
full_table = full_table.sample(frac=1).reset_index(drop=True)
full_table.head(20)

['Postcode', 'Borough', 'Neighbourhood\n']
['Postal Code', 'Latitude', 'Longitude']


Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Downtown Toronto,"Harbord , University of Toronto",43.662696,-79.400049
1,Scarborough,Upper Rouge,43.836125,-79.205636
2,North York,Don Mills North,43.745906,-79.352188
3,Scarborough,"Birch Cliff , Cliffside West",43.692657,-79.264848
4,North York,Bayview Village,43.786947,-79.385975
5,Etobicoke,"Alderwood , Long Branch",43.602414,-79.543484
6,Scarborough,Cedarbrae,43.773136,-79.239476
7,East Toronto,The Beaches,43.676357,-79.293031
8,Scarborough,"Maryvale , Wexford",43.750072,-79.295849
9,Central Toronto,"Forest Hill North , Forest Hill West",43.696948,-79.411307


## Map of Toronto 

In [10]:
#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim 
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

In [11]:
address = 'Toronto'

geolocator = Nominatim(user_agent="capstone_coursera_SB")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [12]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(full_table['Latitude'], full_table['Longitude'], full_table['Neighbourhood\n']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto



#### Explore Venues in Neighborhoods

In [16]:
CLIENT_ID = 'D01SZLAIMCZZP4KLE5EHP4YIDRDGOJKKXCUCNTWUFP4E5VSE' # your Foursquare ID
CLIENT_SECRET = 'L3NB1OIQTKU4GKVRXASD4E5RAENKS2UIRVQBR4NP4GMRFUJ0' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

LIMIT = 100

Your credentails:
CLIENT_ID: D01SZLAIMCZZP4KLE5EHP4YIDRDGOJKKXCUCNTWUFP4E5VSE
CLIENT_SECRET:L3NB1OIQTKU4GKVRXASD4E5RAENKS2UIRVQBR4NP4GMRFUJ0


In [17]:
#Create a function to pull venues for all the neighborhoods

import requests # library to handle requests

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [18]:
toronto_venues = getNearbyVenues(names=full_table['Neighbourhood\n'],
                                   latitudes=full_table['Latitude'],
                                   longitudes=full_table['Longitude']
                                  )

print(toronto_venues.shape)
toronto_venues.head()

Harbord , University of Toronto 
Upper Rouge 
Don Mills North 
Birch Cliff , Cliffside West 
Bayview Village 
Alderwood , Long Branch 
Cedarbrae 
The Beaches 
Maryvale , Wexford 
Forest Hill North , Forest Hill West 
Rosedale 
Parkdale , Roncesvalles 
The Danforth West , Riverdale 
L'Amoreaux West 
Christie 
Little Portugal , Trinity 
Cloverdale , Islington , Martin Grove , Princess Gardens , West Deane Park 
Bloordale Gardens , Eringate , Markland Wood , Old Burnhamthorpe 
Roselawn 
Willowdale South 
Adelaide , King , Richmond 
Runnymede , Swansea 
Clarks Corners , Sullivan , Tam O'Shanter 
Davisville 
St. James Town 
Church and Wellesley 
Humber Bay , King's Mill Park , Kingsway Park South East , Mimico NE , Old Mill South , The Queensway East , Royal York South East , Sunnylea 
Cabbagetown , St. James Town 
Willowdale West 
Chinatown , Grange Park , Kensington Market 
Hillcrest Village 
Emery , Humberlea 
Moore Park , Summerhill East 
High Park , The Junction South 
Parkwoods 
Canad

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Harbord , University of Toronto",43.662696,-79.400049,Yasu,43.662837,-79.403217,Japanese Restaurant
1,"Harbord , University of Toronto",43.662696,-79.400049,Piano Piano,43.662949,-79.402898,Italian Restaurant
2,"Harbord , University of Toronto",43.662696,-79.400049,Rasa,43.662757,-79.403988,Restaurant
3,"Harbord , University of Toronto",43.662696,-79.400049,The Dessert Kitchen,43.662823,-79.402746,Dessert Shop
4,"Harbord , University of Toronto",43.662696,-79.400049,Almond Butterfly,43.662836,-79.403365,Bakery


In [19]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide , King , Richmond",100,100,100,100,100,100
Agincourt,4,4,4,4,4,4
"Agincourt North , L'Amoreaux East , Milliken , Steeles East",3,3,3,3,3,3
"Albion Gardens , Beaumond Heights , Humbergate , Jamestown , Mount Olive , Silverstone , South Steeles , Thistletown",9,9,9,9,9,9
"Alderwood , Long Branch",10,10,10,10,10,10
"Bathurst Manor , Downsview North , Wilson Heights",19,19,19,19,19,19
Bayview Village,4,4,4,4,4,4
"Bedford Park , Lawrence Manor East",23,23,23,23,23,23
Berczy Park,57,57,57,57,57,57
"Birch Cliff , Cliffside West",4,4,4,4,4,4


In [20]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 274 uniques categories.


#### Analyze each neighborhood

In [21]:

# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
toronto_onehot.shape

(2243, 274)

In [23]:
#Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,"Adelaide , King , Richmond",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.0,0.010000,0.000000,0.000000,0.000000,0.000000,0.010000,0.000000,0.000000
1,Agincourt,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,"Agincourt North , L'Amoreaux East , Milliken ,...",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,"Albion Gardens , Beaumond Heights , Humbergate...",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,"Alderwood , Long Branch",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,"Bathurst Manor , Downsview North , Wilson Heig...",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.0,0.000000,0.000000,0.052632,0.000000,0.000000,0.000000,0.000000,0.000000
6,Bayview Village,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7,"Bedford Park , Lawrence Manor East",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8,Berczy Park,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.0,0.017544,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
9,"Birch Cliff , Cliffside West",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [24]:
toronto_grouped.shape

(100, 274)

In [25]:
#Print each neighborhood and top 5 venues

num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide , King , Richmond ----
                 venue  freq
0          Coffee Shop  0.06
1                 Café  0.05
2           Steakhouse  0.04
3  American Restaurant  0.04
4      Thai Restaurant  0.04


----Agincourt ----
                venue  freq
0        Skating Rink  0.25
1      Sandwich Place  0.25
2              Lounge  0.25
3      Breakfast Spot  0.25
4  Mexican Restaurant  0.00


----Agincourt North , L'Amoreaux East , Milliken , Steeles East ----
                venue  freq
0          Playground  0.33
1         Coffee Shop  0.33
2                Park  0.33
3         Yoga Studio  0.00
4  Mexican Restaurant  0.00


----Albion Gardens , Beaumond Heights , Humbergate , Jamestown , Mount Olive , Silverstone , South Steeles , Thistletown ----
                 venue  freq
0        Grocery Store  0.22
1             Pharmacy  0.11
2           Beer Store  0.11
3       Sandwich Place  0.11
4  Fried Chicken Joint  0.11


----Alderwood , Long Branch ----
                venue  fr

               venue  freq
0  Korean Restaurant  0.25
1   Business Service  0.25
2         Food Truck  0.25
3     Baseball Field  0.25
4        Yoga Studio  0.00


----Downsview Northwest ----
                  venue  freq
0  Gym / Fitness Center   0.2
1         Grocery Store   0.2
2          Liquor Store   0.2
3        Discount Store   0.2
4    Athletics & Sports   0.2


----Downsview West ----
           venue  freq
0           Bank  0.25
1  Moving Target  0.25
2  Grocery Store  0.25
3  Shopping Mall  0.25
4    Yoga Studio  0.00


----East Birchmount Park , Ionview , Kennedy Park ----
               venue  freq
0   Department Store  0.25
1        Bus Station  0.25
2     Discount Store  0.25
3        Coffee Shop  0.25
4  Mobile Phone Shop  0.00


----East Toronto ----
                             venue  freq
0                             Park  0.50
1                    Metro Station  0.25
2                Convenience Store  0.25
3                      Yoga Studio  0.00
4  Molecular Ga

              venue  freq
0              Café  0.10
1       Pizza Place  0.08
2       Coffee Shop  0.08
3  Sushi Restaurant  0.05
4               Gym  0.05


----Ryerson , Garden District ----
                       venue  freq
0                Coffee Shop  0.08
1             Clothing Store  0.07
2             Cosmetics Shop  0.04
3                       Café  0.04
4  Middle Eastern Restaurant  0.03


----Scarborough Village ----
                             venue  freq
0                Convenience Store   0.5
1                       Playground   0.5
2               Mexican Restaurant   0.0
3              Monument / Landmark   0.0
4  Molecular Gastronomy Restaurant   0.0


----Silver Hills , York Mills ----
                             venue  freq
0                        Cafeteria   0.5
1                             Park   0.5
2               Mexican Restaurant   0.0
3              Monument / Landmark   0.0
4  Molecular Gastronomy Restaurant   0.0


----St. James Town ----
           

In [26]:
#Sort venurs in descending order

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [27]:
#Top 10 Venues for Each Neighborhood

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide , King , Richmond",Coffee Shop,Café,Thai Restaurant,American Restaurant,Steakhouse,Gym,Bakery,Bar,Hotel,Burger Joint
1,Agincourt,Lounge,Breakfast Spot,Sandwich Place,Skating Rink,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
2,"Agincourt North , L'Amoreaux East , Milliken ,...",Park,Coffee Shop,Playground,Dog Run,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
3,"Albion Gardens , Beaumond Heights , Humbergate...",Grocery Store,Beer Store,Fried Chicken Joint,Fast Food Restaurant,Pharmacy,Pizza Place,Sandwich Place,Coffee Shop,Airport Lounge,Falafel Restaurant
4,"Alderwood , Long Branch",Pizza Place,Gym,Coffee Shop,Athletics & Sports,Skating Rink,Pharmacy,Pub,Sandwich Place,Pool,Women's Store


#### Cluster Neighborhoods

In [28]:
from sklearn.preprocessing import StandardScaler

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 


array([0, 0, 4, 0, 0, 0, 0, 0, 0, 0])

In [29]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = full_table

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood\n')

toronto_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,"Harbord , University of Toronto",43.662696,-79.400049,0.0,Café,Bookstore,Restaurant,Japanese Restaurant,Bar,Bakery,College Gym,Chinese Restaurant,Sandwich Place,Beer Store
1,Scarborough,Upper Rouge,43.836125,-79.205636,,,,,,,,,,,
2,North York,Don Mills North,43.745906,-79.352188,0.0,Gym / Fitness Center,Café,Japanese Restaurant,Caribbean Restaurant,Women's Store,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
3,Scarborough,"Birch Cliff , Cliffside West",43.692657,-79.264848,0.0,College Stadium,Café,Skating Rink,General Entertainment,Women's Store,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
4,North York,Bayview Village,43.786947,-79.385975,0.0,Bank,Chinese Restaurant,Café,Japanese Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Women's Store


In [33]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood\n'], toronto_merged['Cluster Labels']):
    cluster = np.nan_to_num(cluster)
    cluster = int(cluster)
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [35]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Harbord , University of Toronto",Café,Bookstore,Restaurant,Japanese Restaurant,Bar,Bakery,College Gym,Chinese Restaurant,Sandwich Place,Beer Store
2,Don Mills North,Gym / Fitness Center,Café,Japanese Restaurant,Caribbean Restaurant,Women's Store,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
3,"Birch Cliff , Cliffside West",College Stadium,Café,Skating Rink,General Entertainment,Women's Store,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
4,Bayview Village,Bank,Chinese Restaurant,Café,Japanese Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Women's Store
5,"Alderwood , Long Branch",Pizza Place,Gym,Coffee Shop,Athletics & Sports,Skating Rink,Pharmacy,Pub,Sandwich Place,Pool,Women's Store
6,Cedarbrae,Hakka Restaurant,Fried Chicken Joint,Thai Restaurant,Caribbean Restaurant,Bakery,Bank,Athletics & Sports,Dumpling Restaurant,Drugstore,Donut Shop
7,The Beaches,Health Food Store,Music Venue,Pub,Women's Store,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
8,"Maryvale , Wexford",Bakery,Auto Garage,Smoke Shop,Shopping Mall,Breakfast Spot,Middle Eastern Restaurant,Sandwich Place,Women's Store,Dim Sum Restaurant,Diner
9,"Forest Hill North , Forest Hill West",Trail,Jewelry Store,Sushi Restaurant,Bus Line,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Electronics Store,Department Store
11,"Parkdale , Roncesvalles",Gift Shop,Breakfast Spot,Restaurant,Bank,Bookstore,Italian Restaurant,Eastern European Restaurant,Coffee Shop,Dessert Shop,Dog Run


In [36]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
91,"Rouge , Malvern",Fast Food Restaurant,Women's Store,Doner Restaurant,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop


In [37]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
26,"Humber Bay , King's Mill Park , Kingsway Park ...",Baseball Field,Women's Store,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore
31,"Emery , Humberlea",Construction & Landscaping,Baseball Field,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store


In [38]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Roselawn,Garden,Women's Store,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Doner Restaurant,Dance Studio


In [39]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Rosedale,Park,Playground,Trail,Eastern European Restaurant,Dumpling Restaurant,Electronics Store,Drugstore,Donut Shop,Doner Restaurant,Curling Ice
34,Parkwoods,Park,Fast Food Restaurant,Food & Drink Shop,Pool,Women's Store,Discount Store,Department Store,Dessert Shop,Dim Sum Restaurant,Diner
36,East Toronto,Park,Convenience Store,Metro Station,Women's Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
38,"Silver Hills , York Mills",Park,Cafeteria,Women's Store,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Doner Restaurant
43,York Mills West,Bank,Park,Electronics Store,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
44,Caledonia-Fairbanks,Park,Women's Store,Fast Food Restaurant,Market,Pharmacy,Gluten-free Restaurant,Gift Shop,Gourmet Shop,Dumpling Restaurant,Drugstore
46,"CFB Toronto , Downsview East",Park,Snack Place,Airport,Doner Restaurant,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
63,Lawrence Park,Park,Swim School,Construction & Landscaping,Bus Line,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant
68,"The Kingsway , Montgomery Road , Old Mill North",Park,River,Women's Store,Discount Store,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Dog Run
73,"Downsview , North Park , Upwood Park",Construction & Landscaping,Park,Massage Studio,Bakery,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
