# Segmenting and Clustering Neighborhoods in Toronto Part 3

## First of all, it is neccesary run the part 1 in order to obtain the wikipedia data 

### Import libraries

In [2]:
import pandas as pd
import numpy as np
import requests

### Scrapping the wikipedia data

In [3]:
wiki_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
wiki_page = requests.get(wiki_url)
wiki_data = pd.read_html(wiki_page.text)
wiki_data

[    Postal Code           Borough  \
 0           M1A      Not assigned   
 1           M2A      Not assigned   
 2           M3A        North York   
 3           M4A        North York   
 4           M5A  Downtown Toronto   
 ..          ...               ...   
 175         M5Z      Not assigned   
 176         M6Z      Not assigned   
 177         M7Z      Not assigned   
 178         M8Z         Etobicoke   
 179         M9Z      Not assigned   
 
                                          Neighbourhood  
 0                                         Not assigned  
 1                                         Not assigned  
 2                                            Parkwoods  
 3                                     Victoria Village  
 4                            Regent Park, Harbourfront  
 ..                                                 ...  
 175                                       Not assigned  
 176                                       Not assigned  
 177                

In [4]:
len(wiki_data)

3

In [5]:
type(wiki_data)

list

### Obtain dataframe with three columns

In [6]:
#only it is neccesary the first table so we can drop the other tables

wiki_df = wiki_data[0]
wiki_df.head()
wiki_df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [7]:
list(wiki_df.columns.values)

['Postal Code', 'Borough', 'Neighbourhood']

### Drop the Borough and Neighbourhood which are not assigned

In [8]:
df = wiki_df[wiki_df['Neighbourhood'] != 'Not assigned']
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [9]:
df.Neighbourhood.str.count("Not assigned").sum()

0

### Group by postal codes, reset the index values and drop the old index

In [10]:
# Group by Postal Code

df = df.sort_values(['Postal Code'])
df



Unnamed: 0,Postal Code,Borough,Neighbourhood
9,M1B,Scarborough,"Malvern, Rouge"
18,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
27,M1E,Scarborough,"Guildwood, Morningside, West Hill"
36,M1G,Scarborough,Woburn
45,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
107,M9P,Etobicoke,Westmount
116,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
143,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [11]:
#Reset the index values and drop the old index
df.reset_index(inplace = True)
df.drop(['index'], axis = 'columns', inplace = True)
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


### Obtain the shape of dataframe

In [12]:
df.shape

(103, 3)

## Then I have ran the Part 2 in order to obtain the coordinates

### Install geocoder and obtain the coords

In [13]:
!pip install geocoder

Defaulting to user installation because normal site-packages is not writeable


In [14]:
import geocoder

# initialize your variable to None
#lat_lng_coords = None

#postal_code = 'M3A'

# loop until you get the coordinates
#while(lat_lng_coords is None):
  #g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
  #lat_lng_coords = g.latlng

#latitude = lat_lng_coords[0]
#longitude = lat_lng_coords[1]


### After 30' the code does not obtain the coordinates, so I decide obtain the coordinates with the alternative way offer in the course, importing csv file from the URL

In [15]:
geodata = pd.read_csv("https://cocl.us/Geospatial_data")
geodata

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


### Check the shapes and types of the two dataframes 

In [16]:
print("The wiki data shape is ", df.shape)
print("The geodata shape is ", geodata.shape)

The wiki data shape is  (103, 3)
The geodata shape is  (103, 3)


In [17]:
print("The wiki data types are \n", df.dtypes, "\n")

print("The geodata types are \n", geodata.dtypes)

The wiki data types are 
 Postal Code      object
Borough          object
Neighbourhood    object
dtype: object 

The geodata types are 
 Postal Code     object
Latitude       float64
Longitude      float64
dtype: object


### Join the two data in one dataframe using pandas

In [18]:
merge_data = df.join(geodata.set_index('Postal Code'), on = 'Postal Code', how = 'inner')
merge_data

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


In [19]:
merge_data.shape

(103, 5)

## Part 3

### Such as in the previous lab, it is important to  cluster Toronto based on the similarities of the venues categories using K-means clustering and Foursquare API.

In [22]:
from geopy.geocoders import Nominatim

In [23]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The coordinates of Toronto are 43.6534817, -79.3839347.


### Create and visualize Toronto's map

In [26]:
import folium

In [39]:
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

### Adding markers neighbourhoods to map

In [44]:
for latitude, longitude, borough, neighbourhood in zip(merge_data['Latitude'], merge_data['Longitude'], merge_data['Borough'], merge_data['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=4,
        popup=label,
        color='red',
        fill=True
        ).add_to(map_Toronto)  
    
map_Toronto

### Foursquare API credentials

In [40]:
CLIENT_ID = 'NRGQEDUFGDGDUVYO3MEW3QY1E1P51QHT3QBP0MTNJL12ZZ0F' 
CLIENT_SECRET = '02GVG22AKZW33XAPG3ZUPVX0BZNCH0DKZJUCPMFH54BC3RTB'
VERSION = '20180605' 


### Getting all venues categories in Toronto

In [42]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, long in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            long, 
            radius
            )
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            long, 
            v['venue']['name'], 
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([element for venue in venues_list for element in venue])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Category']
    
    return(nearby_venues)

### Venues in Toronto for each Neighbourhood with function created

In [43]:
venues_in_toronto = getNearbyVenues(merge_data['Neighbourhood'], merge_data['Latitude'], merge_data['Longitude'])

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills
Willowdale, Newtonbrook
Willowdale, Willowdale East
York Mills West
Willowdale, Willowdale West
Parkwoods
Don Mills
Don Mills
Bathurst Manor, Wilson Heights, Downsview North
Northwood Park, York University
Downsview
Downsview
Downsview
Downsview
Victoria Village
Parkview Hill, Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto, Broadview North (Old East York)
The Danforth West, 

In [45]:
venues_in_toronto.shape

(1326, 5)

In [46]:
venues_in_toronto.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,Fast Food Restaurant
1,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Royal Canadian Legion,Bar
2,"Guildwood, Morningside, West Hill",43.763573,-79.188711,RBC Royal Bank,Bank
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,Electronics Store
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Sail Sushi,Restaurant


### Grouping data by categories

In [53]:
Gb_venues_category = venues_in_toronto.groupby('Venue Category').max()
Gb_venues_category


Unnamed: 0_level_0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Accessories Store,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,Ardene Shoes Outlet
Airport,Downsview,43.737473,-79.394420,Toronto Downsview Airport (YZD)
Airport Food Court,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.394420,Billy Bishop Café
Airport Gate,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.394420,Gate 8
Airport Lounge,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.394420,Porter Lounge
...,...,...,...,...
Warehouse Store,Thorncliffe Park,43.705369,-79.349372,Costco
Wine Bar,"Toronto Dominion Centre, Design Exchange",43.653206,-79.379817,The National Club
Wings Joint,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Wingporium
Women's Store,"Lawrence Manor, Lawrence Heights",43.718518,-79.453512,Maximum Woman


In [56]:
venues_in_toronto['Venue Category'].value_counts()


Coffee Shop                    93
Café                           71
Park                           46
Restaurant                     40
Pizza Place                    39
                               ..
Sculpture Garden                1
Gaming Cafe                     1
Eastern European Restaurant     1
Field                           1
Monument / Landmark             1
Name: Venue Category, Length: 239, dtype: int64

In [59]:
Gb_neighbourhood = venues_in_toronto.groupby('Neighbourhood').head()
Gb_neighbourhood


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,Fast Food Restaurant
1,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Royal Canadian Legion,Bar
2,"Guildwood, Morningside, West Hill",43.763573,-79.188711,RBC Royal Bank,Bank
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,Electronics Store
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Sail Sushi,Restaurant
...,...,...,...,...,...
1321,"Northwest, West Humber - Clairville",43.706748,-79.594054,Economy Rent A Car,Rental Car Location
1322,"Northwest, West Humber - Clairville",43.706748,-79.594054,Logistics Distribution,Bar
1323,"Northwest, West Humber - Clairville",43.706748,-79.594054,Saand Rexdale,Drugstore
1324,"Northwest, West Humber - Clairville",43.706748,-79.594054,PC Garden,Garden Center


In [60]:
venues_in_toronto['Neighbourhood'].value_counts()

Runnymede, Swansea                                                        30
St. James Town, Cabbagetown                                               30
Regent Park, Harbourfront                                                 30
Studio District                                                           30
Church and Wellesley                                                      30
                                                                          ..
Rouge Hill, Port Union, Highland Creek                                     1
Willowdale, Newtonbrook                                                    1
Humberlea, Emery                                                           1
Malvern, Rouge                                                             1
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale     1
Name: Neighbourhood, Length: 96, dtype: int64

### One Hot encoding

In [61]:
toronto_venue_category = pd.get_dummies(venues_in_toronto[['Venue Category']], prefix = "", prefix_sep = "")
toronto_venue_category

Unnamed: 0,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1321,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1322,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1323,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1324,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Adding the neighbourhood to last dataframe and group by neihgbourhood

In [62]:
toronto_venue_category['Neighbourhood'] = venues_in_toronto['Neighbourhood'] 

fixed_columns = [toronto_venue_category.columns[-1]] + list(toronto_venue_category.columns[:-1])
toronto_venue_category = toronto_venue_category[fixed_columns]

toronto_venue_category.head()

Unnamed: 0,Neighbourhood,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [65]:
Gb_neighbourhood_toronto = toronto_venue_category.groupby('Neighbourhood').mean().reset_index().round(2)
Gb_neighbourhood_toronto.head()

Unnamed: 0,Neighbourhood,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Finding the most common venues 

In [66]:
def most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [67]:
import numpy as np

### Decision of number of common venues due to there are many venues in order to evaluate

In [72]:
number_common_venues = 12

ord_number = ['st', 'nd', 'rd']

columns = ['Neighbourhood']
for ordinal in np.arange(number_common_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ordinal+1, ord_number[ordinal]))
    except:
        columns.append('{}th Most Common Venue'.format(ordinal+1))


neighborhoods_venues_sorted = pd.DataFrame(columns = columns)
neighborhoods_venues_sorted['Neighbourhood'] = Gb_neighbourhood_toronto['Neighbourhood']

for i in np.arange(Gb_neighbourhood_toronto.shape[0]):
    neighborhoods_venues_sorted.iloc[i, 1:] = most_common_venues(Gb_neighbourhood_toronto.iloc[i, :], number_common_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue
0,Agincourt,Lounge,Breakfast Spot,Clothing Store,Latin American Restaurant,Skating Rink,Accessories Store,Movie Theater,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop
1,"Alderwood, Long Branch",Pizza Place,Gym,Coffee Shop,Pub,Dance Studio,Sandwich Place,Skating Rink,Pharmacy,Home Service,Historic Site,Hobby Shop,Middle Eastern Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Health Food Store,Middle Eastern Restaurant,Fried Chicken Joint,Sushi Restaurant,Supermarket,Bridal Shop,Shopping Mall,Restaurant,Sandwich Place,Chinese Restaurant
3,Bayview Village,Japanese Restaurant,Café,Chinese Restaurant,Bank,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant
4,"Bedford Park, Lawrence Manor East",Sandwich Place,Coffee Shop,Italian Restaurant,Pizza Place,Pub,Pharmacy,Butcher,Café,Liquor Store,Sushi Restaurant,Restaurant,Juice Bar
...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,"Willowdale, Willowdale East",Ramen Restaurant,Pizza Place,Café,Sandwich Place,Coffee Shop,Pet Store,Fast Food Restaurant,Shopping Mall,Electronics Store,Lounge,Steakhouse,Sushi Restaurant
92,"Willowdale, Willowdale West",Pizza Place,Pharmacy,Grocery Store,Coffee Shop,Butcher,Miscellaneous Shop,Motel,Monument / Landmark,Modern European Restaurant,Mobile Phone Shop,Metro Station,Middle Eastern Restaurant
93,Woburn,Coffee Shop,Korean BBQ Restaurant,Accessories Store,Movie Theater,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop
94,Woodbine Heights,Skating Rink,Video Store,Athletics & Sports,Curling Ice,Beer Store,Intersection,Park,Metro Station,Men's Store,Motel,Mexican Restaurant,Mediterranean Restaurant


#### Clustering Neighbourhoods k-means

In [73]:
from sklearn.cluster import KMeans

In [92]:

k_num = 5

toronto_clustering = Gb_neighbourhood_toronto.drop('Neighbourhood', 1)

k_means = KMeans(n_clusters = k_num, random_state=0).fit(toronto_clustering)
k_means

KMeans(n_clusters=5, random_state=0)

In [93]:
k_means.labels_[0:100]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1,
       1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 2,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1,
       2, 1, 2, 1, 1, 1, 1, 2], dtype=int32)

In [77]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', k_means.labels_)

### Preparing the dataframe in order to plot it

In [85]:
toronto_combined = merge_data.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on = 'Neighbourhood')
toronto_combined_nonan = toronto_combined.dropna(subset = ['Cluster Labels'])
toronto_combined_nonan

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,3.0,Fast Food Restaurant,Market,Martial Arts School,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,1.0,Bar,Accessories Store,Market,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1.0,Medical Center,Rental Car Location,Breakfast Spot,Intersection,Electronics Store,Bank,Mexican Restaurant,Restaurant,Modern European Restaurant,Mobile Phone Shop,Middle Eastern Restaurant,Miscellaneous Shop
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Korean BBQ Restaurant,Accessories Store,Movie Theater,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Caribbean Restaurant,Thai Restaurant,Bank,Gas Station,Athletics & Sports,Fried Chicken Joint,Bakery,Hakka Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mexican Restaurant,Modern European Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188,2.0,Park,Convenience Store,Accessories Store,Movie Theater,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop
99,M9P,Etobicoke,Westmount,43.696319,-79.532242,1.0,Pizza Place,Chinese Restaurant,Sandwich Place,Intersection,Coffee Shop,Middle Eastern Restaurant,Discount Store,Motel,Monument / Landmark,Mobile Phone Shop,Modern European Restaurant,Museum
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724,1.0,Sandwich Place,Park,Mobile Phone Shop,Bus Line,Accessories Store,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,1.0,Grocery Store,Pharmacy,Fast Food Restaurant,Pizza Place,Fried Chicken Joint,Beer Store,Sandwich Place,Middle Eastern Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Men's Store


### Plot the clusters

In [82]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [99]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)


x = np.arange(k_num)
ys = [i + x + (i*x)**2 for i in range(k_num)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


markers_colors = []
for lat, lon, poi, cluster in zip(toronto_combined_nonan['Latitude'], toronto_combined_nonan['Longitude'], toronto_combined_nonan['Neighbourhood'], toronto_combined_nonan['Cluster Labels']):
    label = folium.Popup('Cluster ' + str(int(cluster) +1) + '\n' + str(poi) , parse_html = True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)]
        ).add_to(map_clusters)
        
map_clusters

### Cluster 1 (Red)

In [89]:
toronto_cluster_0  = toronto_combined_nonan.loc[toronto_combined_nonan['Cluster Labels'] == 0, toronto_combined_nonan.columns[[1] + list(range(5, toronto_combined_nonan.shape[1]))]]
toronto_cluster_0

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue
94,Etobicoke,0.0,Bakery,Accessories Store,Market,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop


### Cluster 2 (Purple)

In [94]:
toronto_cluster_1  = toronto_combined_nonan.loc[toronto_combined_nonan['Cluster Labels'] == 1, toronto_combined_nonan.columns[[1] + list(range(5, toronto_combined_nonan.shape[1]))]]
toronto_cluster_1

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue
1,Scarborough,1.0,Bar,Accessories Store,Market,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop
2,Scarborough,1.0,Medical Center,Rental Car Location,Breakfast Spot,Intersection,Electronics Store,Bank,Mexican Restaurant,Restaurant,Modern European Restaurant,Mobile Phone Shop,Middle Eastern Restaurant,Miscellaneous Shop
3,Scarborough,1.0,Coffee Shop,Korean BBQ Restaurant,Accessories Store,Movie Theater,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop
4,Scarborough,1.0,Caribbean Restaurant,Thai Restaurant,Bank,Gas Station,Athletics & Sports,Fried Chicken Joint,Bakery,Hakka Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mexican Restaurant,Modern European Restaurant
5,Scarborough,1.0,Playground,Spa,Plane,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,North York,1.0,Pizza Place,Restaurant,Movie Theater,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop
99,Etobicoke,1.0,Pizza Place,Chinese Restaurant,Sandwich Place,Intersection,Coffee Shop,Middle Eastern Restaurant,Discount Store,Motel,Monument / Landmark,Mobile Phone Shop,Modern European Restaurant,Museum
100,Etobicoke,1.0,Sandwich Place,Park,Mobile Phone Shop,Bus Line,Accessories Store,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop
101,Etobicoke,1.0,Grocery Store,Pharmacy,Fast Food Restaurant,Pizza Place,Fried Chicken Joint,Beer Store,Sandwich Place,Middle Eastern Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Men's Store


### Cluster 3 (Blue)

In [95]:
toronto_cluster_2  = toronto_combined_nonan.loc[toronto_combined_nonan['Cluster Labels'] == 2, toronto_combined_nonan.columns[[1] + list(range(5, toronto_combined_nonan.shape[1]))]]
toronto_cluster_2

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue
14,Scarborough,2.0,Playground,Park,Intersection,Pet Store,Motel,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant
21,North York,2.0,Park,Accessories Store,Movie Theater,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop
23,North York,2.0,Park,Convenience Store,Accessories Store,Movie Theater,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop
40,East York,2.0,Intersection,Park,Convenience Store,Museum,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop
44,Central Toronto,2.0,Park,Swim School,Bus Line,Accessories Store,Motel,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant
50,Downtown Toronto,2.0,Park,Playground,Trail,Movie Theater,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop
74,York,2.0,Park,Women's Store,Pool,Accessories Store,Motel,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant
90,Etobicoke,2.0,Park,River,Accessories Store,Movie Theater,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop
98,York,2.0,Park,Convenience Store,Accessories Store,Movie Theater,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop


### Cluster 4 (Green)

In [96]:
toronto_cluster_3  = toronto_combined_nonan.loc[toronto_combined_nonan['Cluster Labels'] == 3, toronto_combined_nonan.columns[[1] + list(range(5, toronto_combined_nonan.shape[1]))]]
toronto_cluster_3

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue
0,Scarborough,3.0,Fast Food Restaurant,Market,Martial Arts School,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop


### Cluster 5 (Orange)

In [97]:
toronto_cluster_4  = toronto_combined_nonan.loc[toronto_combined_nonan['Cluster Labels'] == 4, toronto_combined_nonan.columns[[1] + list(range(5, toronto_combined_nonan.shape[1]))]]
toronto_cluster_4

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue
91,Etobicoke,4.0,Baseball Field,Business Service,Accessories Store,Movie Theater,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop
97,North York,4.0,Baseball Field,Accessories Store,Market,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop
