# Segmenting and Clustering Neighborhoods Pt3

### Importing libraries

In [3]:
#Numpy to handle data in vectorized manner
import numpy as np

import pandas as pd

import json

from geopy.geocoders import Nominatim #convert address into latitude and longitude

import requests

#to transform json into pandas dataframe
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors
%matplotlib inline

from sklearn.cluster import KMeans

import folium

### Reading Toronto csv made previously

In [5]:
df=pd.read_csv('df_toronto.csv')
columns=df.columns
df.head()

Unnamed: 0.1,Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,0,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353
1,1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek",43.784535,-79.160497
2,2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.763573,-79.188711
3,3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Dropping unneeded columns

In [6]:
df=df.drop(columns[0],axis=1)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [14]:
df.shape

(103, 5)

### Number of Boroughs and Neighborhoods 

In [15]:
print('There are ' + str(len(df['Borough'].unique())) +' Boroughs')

There are 10 Boroughs


In [16]:
print('There are ' + str(len(df['Neighborhood'].unique())) +' Neighborhoods')

There are 98 Neighborhoods


### Finding the Latitude and Longitude of Canada

In [17]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="can_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Creating Maps Depicting the different Neighborhoods

In [27]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[(latitude+0.075), longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#98FB98',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Retrieving Venues from FourSquare

In [28]:
# Define Foursquare Credentials and Version
LIMIT = 100

CLIENT_ID = 'LPFYDO0GVMD4PSHPJATGKYZQXBZ2VDHHQVHTLZ2ZCEEMPY5D' # your Foursquare ID
CLIENT_SECRET = 'JEPOMUYWQAAZBKK2DUO0WXGUVSIOMIBQIYCKBKRWJZZSTFUO' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [29]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [32]:
toronto_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude'],
                                  )

Malvern , Rouge 
Rouge Hill , Port Union , Highland Creek 
Guildwood , Morningside , West Hill 
Woburn 
Cedarbrae 
Scarborough Village 
Kennedy Park , Ionview , East Birchmount Park 
Golden Mile , Clairlea , Oakridge 
Cliffside , Cliffcrest , Scarborough Village West 
Birch Cliff , Cliffside West 
Dorset Park , Wexford Heights , Scarborough Town Centre 
Wexford , Maryvale 
Agincourt 
Clarks Corners , Tam O'Shanter , Sullivan 
Milliken , Agincourt North , Steeles East , L'Amoreaux East 
Steeles West , L'Amoreaux West 
Upper Rouge 
Hillcrest Village 
Fairview , Henry Farm , Oriole 
Bayview Village 
York Mills , Silver Hills 
Willowdale , Newtonbrook 
Willowdale 
York Mills West 
Willowdale 
Parkwoods 
Don Mills 
Don Mills 
Bathurst Manor , Wilson Heights , Downsview North 
Northwood Park , York University 
Downsview 
Downsview 
Downsview 
Downsview 
Victoria Village 
Parkview Hill , Woodbine Gardens 
Woodbine Heights 
The Beaches 
Leaside 
Thorncliffe Park 
East Toronto 
The Danforth Wes

In [33]:
toronto_venues.shape

(2125, 7)

### Seeing the venues dataset 

In [34]:
toronto_venues.head(15)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern , Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge Hill , Port Union , Highland Creek",43.784535,-79.160497,RIGHT WAY TO GOLF,43.785177,-79.161108,Golf Course
2,"Rouge Hill , Port Union , Highland Creek",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,"Guildwood , Morningside , West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,"Guildwood , Morningside , West Hill",43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant
5,"Guildwood , Morningside , West Hill",43.763573,-79.188711,Enterprise Rent-A-Car,43.764076,-79.193406,Rental Car Location
6,"Guildwood , Morningside , West Hill",43.763573,-79.188711,RBC Royal Bank,43.76679,-79.191151,Bank
7,"Guildwood , Morningside , West Hill",43.763573,-79.188711,Woburn Medical Centre,43.766631,-79.192286,Medical Center
8,"Guildwood , Morningside , West Hill",43.763573,-79.188711,Lawrence Ave E & Kingston Rd,43.767704,-79.18949,Intersection
9,"Guildwood , Morningside , West Hill",43.763573,-79.188711,Eggsmart,43.7678,-79.190466,Breakfast Spot


### Creating a Map finding locations of the Coffee Shops

In [39]:
coffee_shop=toronto_venues[toronto_venues['Venue Category']=='Coffee Shop']
coffee_shop.head(10)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
10,Woburn,43.770992,-79.216917,Starbucks,43.770037,-79.221156,Coffee Shop
11,Woburn,43.770992,-79.216917,Tim Hortons,43.770827,-79.223078,Coffee Shop
24,"Kennedy Park , Ionview , East Birchmount Park",43.727929,-79.262029,Tim Hortons,43.726895,-79.266157,Coffee Shop
84,"Steeles West , L'Amoreaux West",43.799525,-79.318389,Tim Hortons,43.799102,-79.318715,Coffee Shop
111,"Fairview , Henry Farm , Oriole",43.778517,-79.346556,Aroma Espresso Bar,43.7777,-79.344652,Coffee Shop
113,"Fairview , Henry Farm , Oriole",43.778517,-79.346556,Starbucks,43.77799,-79.344091,Coffee Shop
123,"Fairview , Henry Farm , Oriole",43.778517,-79.346556,Tim Hortons,43.777964,-79.344715,Coffee Shop
127,"Fairview , Henry Farm , Oriole",43.778517,-79.346556,Tim Hortons,43.774993,-79.346303,Coffee Shop
130,"Fairview , Henry Farm , Oriole",43.778517,-79.346556,Tim Hortons,43.775249,-79.34774,Coffee Shop
173,Willowdale,43.77012,-79.408493,Starbucks,43.768353,-79.413046,Coffee Shop


In [55]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[(latitude+0.075), longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(coffee_shop['Venue Latitude'], coffee_shop['Venue Longitude'], coffee_shop['Venue'], coffee_shop['Neighborhood']):
    venue=venue.encode('ascii', 'ignore').decode('ascii')
    label = '{},{}'.format(venue,neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#98FB98',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Counting Number of Venues from each Neighborhood

In [56]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Alderwood , Long Branch",10,10,10,10,10,10
"Bathurst Manor , Wilson Heights , Downsview North",19,19,19,19,19,19
Bayview Village,4,4,4,4,4,4
"Bedford Park , Lawrence Manor East",26,26,26,26,26,26
Berczy Park,56,56,56,56,56,56
"Birch Cliff , Cliffside West",4,4,4,4,4,4
"Brockton , Parkdale Village , Exhibition Place",23,23,23,23,23,23
Business reply mail Processing CentrE,14,14,14,14,14,14
"CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst",18,18,18,18,18,18


### Seeing the unique categories

In [57]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 267 uniques categories.


### One hot encoding

In [60]:
toronto=pd.get_dummies(toronto_venues[['Venue Category']], prefix='', prefix_sep="")
toronto['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto.columns[-1]] + list(toronto.columns[:-1])
toronto= toronto[fixed_columns]

toronto.head()

Unnamed: 0,Yoga Studio,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Group By Neighborhood

In [61]:
toronto_grouped = toronto.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,Agincourt,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.00,0.000000,0.0,0.000000
1,"Alderwood , Long Branch",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.00,0.000000,0.0,0.000000
2,"Bathurst Manor , Wilson Heights , Downsview No...",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.000000,0.000000,0.052632,0.0000,0.00,0.000000,0.0,0.000000
3,Bayview Village,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.00,0.000000,0.0,0.000000
4,"Bedford Park , Lawrence Manor East",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.038462,...,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.00,0.000000,0.0,0.000000
5,Berczy Park,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.017857,0.000000,0.000000,0.0000,0.00,0.000000,0.0,0.000000
6,"Birch Cliff , Cliffside West",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.00,0.000000,0.0,0.000000
7,"Brockton , Parkdale Village , Exhibition Place",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.00,0.000000,0.0,0.000000
8,Business reply mail Processing CentrE,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.00,0.000000,0.0,0.000000
9,"CN Tower , King and Spadina , Railway Lands , ...",0.000000,0.0,0.055556,0.055556,0.055556,0.111111,0.166667,0.111111,0.000000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.00,0.000000,0.0,0.000000


### Seeing which Neighborhood has the most number of hospitals

In [75]:
toronto_grouped['Neighborhood'][toronto_grouped['Hospital']==toronto_grouped['Hospital'].max()]

43    Kensington Market , Chinatown , Grange Park 
Name: Neighborhood, dtype: object

### Printing list of top 5 venues from each Neighborhood

In [76]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt ----
                       venue  freq
0             Breakfast Spot  0.25
1  Latin American Restaurant  0.25
2               Skating Rink  0.25
3                     Lounge  0.25
4                Yoga Studio  0.00


----Alderwood , Long Branch ----
            venue  freq
0     Pizza Place   0.2
1        Pharmacy   0.1
2            Pool   0.1
3  Sandwich Place   0.1
4             Pub   0.1


----Bathurst Manor , Wilson Heights , Downsview North ----
            venue  freq
0            Bank  0.11
1     Coffee Shop  0.11
2        Pharmacy  0.05
3  Ice Cream Shop  0.05
4   Shopping Mall  0.05


----Bayview Village ----
                 venue  freq
0                 Café  0.25
1                 Bank  0.25
2  Japanese Restaurant  0.25
3   Chinese Restaurant  0.25
4          Yoga Studio  0.00


----Bedford Park , Lawrence Manor East ----
                venue  freq
0      Sandwich Place  0.08
1         Pizza Place  0.08
2          Restaurant  0.08
3    Sushi Restaurant  0.08


                venue  freq
0        Food Service   0.5
1      Baseball Field   0.5
2         Men's Store   0.0
3   Mobile Phone Shop   0.0
4  Miscellaneous Shop   0.0


----Humewood-Cedarvale ----
                venue  freq
0               Field  0.33
1               Trail  0.33
2        Hockey Arena  0.33
3   Mobile Phone Shop  0.00
4  Miscellaneous Shop  0.00


----India Bazaar , The Beaches West ----
                  venue  freq
0  Fast Food Restaurant  0.10
1        Ice Cream Shop  0.05
2         Burrito Place  0.05
3               Brewery  0.05
4     Fish & Chips Shop  0.05


----Kennedy Park , Ionview , East Birchmount Park ----
              venue  freq
0    Discount Store  0.33
1     Train Station  0.17
2  Department Store  0.17
3        Hobby Shop  0.17
4       Coffee Shop  0.17


----Kensington Market , Chinatown , Grange Park ----
                   venue  freq
0                   Café  0.09
1            Coffee Shop  0.07
2           Dessert Shop  0.05
3  Vietnamese Resta

                 venue  freq
0                 Café  0.10
1          Coffee Shop  0.08
2              Brewery  0.05
3               Bakery  0.05
4  American Restaurant  0.05


----Summerhill West , Rathnelly , South Hill , Forest Hill SE , Deer Park ----
                venue  freq
0                 Pub  0.12
1         Coffee Shop  0.12
2  Light Rail Station  0.06
3    Sushi Restaurant  0.06
4          Restaurant  0.06


----The Annex , North Midtown , Yorkville ----
               venue  freq
0     Sandwich Place  0.13
1               Café  0.13
2        Coffee Shop  0.09
3     Cosmetics Shop  0.04
4  Indian Restaurant  0.04


----The Beaches ----
               venue  freq
0              Trail  0.17
1  Health Food Store  0.17
2                Pub  0.17
3   Asian Restaurant  0.17
4        Coffee Shop  0.17


----The Danforth West , Riverdale ----
                    venue  freq
0        Greek Restaurant  0.19
1      Italian Restaurant  0.07
2             Coffee Shop  0.07
3  Furniture

### Creating a dataframe of above items

In [140]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [168]:
um_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Agincourt,Latin American Restaurant,Skating Rink,Lounge,Breakfast Spot,Women's Store
1,"Alderwood , Long Branch",Pizza Place,Gym,Athletics & Sports,Pharmacy,Pool
2,"Bathurst Manor , Wilson Heights , Downsview No...",Bank,Coffee Shop,Pharmacy,Supermarket,Deli / Bodega
3,Bayview Village,Café,Bank,Chinese Restaurant,Japanese Restaurant,Dim Sum Restaurant
4,"Bedford Park , Lawrence Manor East",Sushi Restaurant,Sandwich Place,Italian Restaurant,Coffee Shop,Restaurant


### Clustering Neighborhoods

In [169]:
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 3, 1, 3, 1, 1, 1, 1, 3, 1])

In [170]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df


# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

### Merged Dataset of Cluster, Common Venues and Neighborhood

In [172]:
toronto_merged['Cluster Labels'].isnull().count()
toronto_merged.head(20)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353,2.0,Fast Food Restaurant,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek",43.784535,-79.160497,1.0,Golf Course,Bar,Women's Store,Drugstore,Discount Store
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.763573,-79.188711,3.0,Rental Car Location,Breakfast Spot,Mexican Restaurant,Electronics Store,Bank
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Korean Restaurant,Convenience Store,Women's Store,Drugstore
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,3.0,Fried Chicken Joint,Gas Station,Bank,Hakka Restaurant,Caribbean Restaurant
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,0.0,Playground,Women's Store,Donut Shop,Dim Sum Restaurant,Diner
6,M1K,Scarborough,"Kennedy Park , Ionview , East Birchmount Park",43.727929,-79.262029,1.0,Discount Store,Department Store,Hobby Shop,Coffee Shop,Train Station
7,M1L,Scarborough,"Golden Mile , Clairlea , Oakridge",43.711112,-79.284577,1.0,Bakery,Park,Ice Cream Shop,Metro Station,Bus Station
8,M1M,Scarborough,"Cliffside , Cliffcrest , Scarborough Village W...",43.716316,-79.239476,1.0,American Restaurant,Motel,Dessert Shop,Diner,Discount Store
9,M1N,Scarborough,"Birch Cliff , Cliffside West",43.692657,-79.264848,1.0,College Stadium,Café,Skating Rink,General Entertainment,Donut Shop


In [173]:
toronto_merged=toronto_merged.dropna()

### Visualising the Clusters

In [160]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Cluster 1

In [161]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
5,Scarborough,0.0,Playground,Women's Store,Donut Shop,Dim Sum Restaurant,Diner
48,Central Toronto,0.0,Restaurant,Playground,Donut Shop,Dim Sum Restaurant,Diner
72,North York,0.0,Japanese Restaurant,Bakery,Pub,Playground,Women's Store


### Cluster 2

In [162]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,Scarborough,1.0,Golf Course,Bar,Women's Store,Drugstore,Discount Store
3,Scarborough,1.0,Coffee Shop,Korean Restaurant,Convenience Store,Women's Store,Drugstore
6,Scarborough,1.0,Discount Store,Department Store,Hobby Shop,Coffee Shop,Train Station
7,Scarborough,1.0,Bakery,Park,Ice Cream Shop,Metro Station,Bus Station
8,Scarborough,1.0,American Restaurant,Motel,Dessert Shop,Diner,Discount Store
9,Scarborough,1.0,College Stadium,Café,Skating Rink,General Entertainment,Donut Shop
11,Scarborough,1.0,Middle Eastern Restaurant,Sandwich Place,Breakfast Spot,Shopping Mall,Auto Garage
12,Scarborough,1.0,Latin American Restaurant,Skating Rink,Lounge,Breakfast Spot,Women's Store
17,North York,1.0,Pool,Golf Course,Dog Run,Mediterranean Restaurant,Women's Store
18,North York,1.0,Clothing Store,Coffee Shop,Fast Food Restaurant,Japanese Restaurant,Tea Room


### Cluster 3

In [163]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Scarborough,2.0,Fast Food Restaurant,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant
80,York,2.0,Fast Food Restaurant,Discount Store,Sandwich Place,Women's Store,Donut Shop


### Cluster 4

In [164]:

toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,Scarborough,3.0,Rental Car Location,Breakfast Spot,Mexican Restaurant,Electronics Store,Bank
4,Scarborough,3.0,Fried Chicken Joint,Gas Station,Bank,Hakka Restaurant,Caribbean Restaurant
10,Scarborough,3.0,Indian Restaurant,Pet Store,Vietnamese Restaurant,Chinese Restaurant,Doner Restaurant
13,Scarborough,3.0,Pharmacy,Pizza Place,Bank,Fried Chicken Joint,Noodle House
15,Scarborough,3.0,Chinese Restaurant,Fast Food Restaurant,Coffee Shop,Pharmacy,Shopping Mall
19,North York,3.0,Café,Bank,Chinese Restaurant,Japanese Restaurant,Dim Sum Restaurant
30,North York,3.0,Grocery Store,Park,Athletics & Sports,Airport,Gym / Fitness Center
31,North York,3.0,Grocery Store,Park,Athletics & Sports,Airport,Gym / Fitness Center
32,North York,3.0,Grocery Store,Park,Athletics & Sports,Airport,Gym / Fitness Center
33,North York,3.0,Grocery Store,Park,Athletics & Sports,Airport,Gym / Fitness Center


In [None]:
### Cluster 5

In [165]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
14,Scarborough,4.0,Park,Playground,Donut Shop,Dim Sum Restaurant,Diner
20,North York,4.0,Cafeteria,Park,Colombian Restaurant,Comfort Food Restaurant,Event Space
23,North York,4.0,Park,Bank,Bar,Convenience Store,Donut Shop
25,North York,4.0,Park,Fireworks Store,Food & Drink Shop,Donut Shop,Diner
40,East York,4.0,Park,Metro Station,Convenience Store,Donut Shop,Diner
44,Central Toronto,4.0,Park,Swim School,Bus Line,Doner Restaurant,Diner
50,Downtown Toronto,4.0,Park,Playground,Trail,Electronics Store,Eastern European Restaurant
74,York,4.0,Park,Pool,Women's Store,Airport,Farmers Market
79,North York,4.0,Park,Bakery,Construction & Landscaping,Drugstore,Discount Store
90,Etobicoke,4.0,Park,Pool,River,Dog Run,Dessert Shop


In [167]:
dr.head(30)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
