# Toronto Neighborhoods

In [5]:
import pandas as pd
import numpy as np

# Copied data from Wikipedia and then pasted into an Excel workbook. Removed boroughs that were not assigned a neighborhood and manually combined duplicate postal code neighborhoods into one respective borough. Saved as a csv file and then read into Pandas dataframe

In [6]:
df  = pd.read_csv("toronto.csv")
df

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [8]:
df.shape

(103, 3)

# Downloaded the CSV file with Toronto LAT/LONG values

In [9]:
df1 = pd.read_csv("geo.csv")
df1

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


# Sorted the original toronto dataset by postal code to line up with geo csv/dataframe

In [10]:
dft = df.sort_values(by='Postalcode')

In [11]:
dft

Unnamed: 0,Postalcode,Borough,Neighborhood
6,M1B,Scarborough,"Rouge, Malvern"
12,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
18,M1E,Scarborough,"Guildwood, Morningside, West Hill"
22,M1G,Scarborough,Woburn
26,M1H,Scarborough,Cedarbrae
32,M1J,Scarborough,Scarborough Village
38,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
44,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
51,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
58,M1N,Scarborough,"Birch Cliff, Cliffside West"


# Elimnated the postal code from dataframe

In [12]:
dfONE = df1.drop(columns=["Postal Code"])
dfONE

Unnamed: 0,Latitude,Longitude
0,43.806686,-79.194353
1,43.784535,-79.160497
2,43.763573,-79.188711
3,43.770992,-79.216917
4,43.773136,-79.239476
5,43.744734,-79.239476
6,43.727929,-79.262029
7,43.711112,-79.284577
8,43.716316,-79.239476
9,43.692657,-79.264848


# Joined the two dataframes together to combine Lat/long geo data with original Toronto neighborhoods

In [13]:
toronto = dft.join(dfONE)
toronto

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
6,M1B,Scarborough,"Rouge, Malvern",43.727929,-79.262029
12,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.794200,-79.262029
18,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.778517,-79.346556
22,M1G,Scarborough,Woburn,43.770120,-79.408493
26,M1H,Scarborough,Cedarbrae,43.745906,-79.352188
32,M1J,Scarborough,Scarborough Village,43.728496,-79.495697
38,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.709060,-79.363452
44,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.728020,-79.388790
51,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.667967,-79.367675
58,M1N,Scarborough,"Birch Cliff, Cliffside West",43.650571,-79.384568


In [61]:
import json 
from geopy.geocoders import Nominatim 

import requests 
from pandas.io.json import json_normalize 

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium 

print('Libraries imported.')

Libraries imported.


# Getting coordinates of Toronto

In [15]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


# Creating initial map

In [17]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto['Latitude'], toronto['Longitude'], toronto['Neighborhood']):
    label = folium.Popup(label)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

In [18]:
CLIENT_ID = 'FZ5PC41XREUOZVSTK4PDUTWECH0AEDIOXORFUC51PKQWBNAP' # your Foursquare ID
CLIENT_SECRET = 'UYRHL3MMXRAIWBMBVXD0HXB3X2LJSMGVBTZY1XYNENWY2PD2' # your Foursquare Secret
VERSION = '20190410' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FZ5PC41XREUOZVSTK4PDUTWECH0AEDIOXORFUC51PKQWBNAP
CLIENT_SECRET:UYRHL3MMXRAIWBMBVXD0HXB3X2LJSMGVBTZY1XYNENWY2PD2


In [26]:
toronto.loc[44, 'Neighborhood']

'Clairlea, Golden Mile, Oakridge'

In [27]:
neighborhood_latitude = toronto.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = toronto.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = toronto.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.806686299999996, -79.19435340000001.


In [28]:
radius = 500
LIMIT = 100

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

In [22]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5cae5bc49fb6b7131249d3d6'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-4bb6b9446edc76b0d771311c-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/fastfood_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d16e941735',
         'name': 'Fast Food Restaurant',
         'pluralName': 'Fast Food Restaurants',
         'primary': True,
         'shortName': 'Fast Food'}],
       'id': '4bb6b9446edc76b0d771311c',
       'location': {'cc': 'CA',
        'city': 'Toronto',
        'country': 'Canada',
        'crossStreet': 'Morningside & Sheppard',
        'distance': 387,
        'formattedAddress': ['Toronto ON', 'Canada'],
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.80744841934756,
          'ln

# Getting nearby venues for Toronto Neighborhoods

In [29]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [30]:
toronto_venues = getNearbyVenues(names=toronto['Neighborhood'],
                                   latitudes=toronto['Latitude'],
                                   longitudes=toronto['Longitude']
                                  )

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, 

In [31]:
print(toronto_venues.shape)
toronto_venues.head()

(2244, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.727929,-79.262029,Giant Tiger,43.727447,-79.26624,Department Store
1,"Rouge, Malvern",43.727929,-79.262029,Tim Hortons,43.726895,-79.266157,Coffee Shop
2,"Rouge, Malvern",43.727929,-79.262029,Hakka No.1,43.727688,-79.266057,Chinese Restaurant
3,"Rouge, Malvern",43.727929,-79.262029,Dollarama,43.726904,-79.265886,Discount Store
4,"Rouge, Malvern",43.727929,-79.262029,Tandy Leather,43.726974,-79.266513,Hobby Shop


In [32]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",3,3,3,3,3,3
Agincourt,20,20,20,20,20,20
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",39,39,39,39,39,39
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",10,10,10,10,10,10
"Bathurst Manor, Downsview North, Wilson Heights",18,18,18,18,18,18
Bayview Village,17,17,17,17,17,17
"Bedford Park, Lawrence Manor East",100,100,100,100,100,100
Berczy Park,2,2,2,2,2,2
"Birch Cliff, Cliffside West",100,100,100,100,100,100
"Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe",5,5,5,5,5,5


In [33]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 275 uniques categories.


In [34]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
toronto_onehot.shape

(2244, 275)

In [36]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,"Adelaide, King, Richmond",0.000000,0.00,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000
1,Agincourt,0.000000,0.00,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.025641,0.00,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.025641,0.000000
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.000000,0.00,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000
4,"Bathurst Manor, Downsview North, Wilson Heights",0.000000,0.00,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.00000,0.00,0.000000,0.000000,0.055556,0.000000,0.000000,0.00,0.000000,0.000000
5,Bayview Village,0.058824,0.00,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.058824,0.00,0.000000,0.000000
6,"Bedford Park, Lawrence Manor East",0.000000,0.01,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.00000,0.00,0.010000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000
7,Berczy Park,0.000000,0.00,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000
8,"Birch Cliff, Cliffside West",0.000000,0.01,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.00000,0.00,0.010000,0.000000,0.000000,0.000000,0.000000,0.01,0.000000,0.000000
9,"Bloordale Gardens, Eringate, Markland Wood, Ol...",0.000000,0.00,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000


In [37]:
toronto_grouped.shape

(100, 275)

# Top 5 Venues per Neighborhood

In [39]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
           venue  freq
0       Bus Stop  0.33
1        Airport  0.33
2           Park  0.33
3    Yoga Studio  0.00
4  Metro Station  0.00


----Agincourt----
                    venue  freq
0                    Café  0.10
1             Coffee Shop  0.10
2          Breakfast Spot  0.10
3  Furniture / Home Store  0.05
4    Caribbean Restaurant  0.05


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
                 venue  freq
0          Coffee Shop  0.23
1  Japanese Restaurant  0.05
2         Burger Joint  0.05
3                Diner  0.05
4                  Gym  0.05


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
            venue  freq
0     Pizza Place   0.2
1     Coffee Shop   0.1
2    Skating Rink   0.1
3  Sandwich Place   0.1
4             Pub   0.1


----Bathurst Manor, Downsview North, Wilson Heights----
                 venue  freq
0          Coffee Shop  0

                venue  freq
0         Coffee Shop  0.15
1  Italian Restaurant  0.05
2        Burger Joint  0.04
3                Café  0.04
4      Sandwich Place  0.04


----Fairview, Henry Farm, Oriole----
                  venue  freq
0    Athletics & Sports  0.25
1  Gym / Fitness Center  0.25
2           Coffee Shop  0.25
3         Grocery Store  0.25
4         Metro Station  0.00


----First Canadian Place, Underground city----
                        venue  freq
0              Baseball Field   0.5
1      Furniture / Home Store   0.5
2                 Men's Store   0.0
3  Modern European Restaurant   0.0
4           Mobile Phone Shop   0.0


----Flemingdon Park, Don Mills South----
                 venue  freq
0          Pizza Place   0.2
1      Thai Restaurant   0.1
2         Noodle House   0.1
3   Chinese Restaurant   0.1
4  Rental Car Location   0.1


----Forest Hill North, Forest Hill West----
              venue  freq
0  Airport Terminal  0.15
1   Airport Service  0.15
2    Ai

               venue  freq
0     Baseball Field  0.33
1         Food Truck  0.33
2   Business Service  0.33
3  Martial Arts Dojo  0.00
4     Massage Studio  0.00


----Silver Hills, York Mills----
              venue  freq
0       Pizza Place  0.09
1    Clothing Store  0.09
2    Sandwich Place  0.09
3  Asian Restaurant  0.09
4      Dance Studio  0.09


----St. James Town----
                    venue  freq
0    Fast Food Restaurant  0.17
1      Chinese Restaurant  0.17
2  Thrift / Vintage Store  0.08
3             Pizza Place  0.08
4             Coffee Shop  0.08


----Stn A PO Boxes 25 The Esplanade----
                    venue  freq
0          Discount Store  0.07
1  Thrift / Vintage Store  0.07
2             Social Club  0.07
3            Burger Joint  0.07
4             Flower Shop  0.07


----Studio District----
                       venue  freq
0                Coffee Shop  0.08
1             Clothing Store  0.07
2                       Café  0.04
3             Cosmetics Shop  

In [40]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [43]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Park,Airport,Bus Stop,Electronics Store,Eastern European Restaurant,Empanada Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Dance Studio
1,Agincourt,Coffee Shop,Café,Breakfast Spot,Performing Arts Venue,Bar,Gym,Furniture / Home Store,Italian Restaurant,Falafel Restaurant,Convenience Store
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Coffee Shop,Diner,Gym,Burger Joint,Japanese Restaurant,Yoga Studio,Fast Food Restaurant,Creperie,Portuguese Restaurant,Café
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Pizza Place,Coffee Shop,Gym,Dance Studio,Skating Rink,Pharmacy,Sandwich Place,Pub,Pool,Dessert Shop
4,"Bathurst Manor, Downsview North, Wilson Heights",Coffee Shop,Frozen Yogurt Shop,Diner,Sandwich Place,Bridal Shop,Fast Food Restaurant,Deli / Bodega,Bank,Restaurant,Supermarket


In [45]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Park,Airport,Bus Stop,Electronics Store,Eastern European Restaurant,Empanada Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Dance Studio
1,Agincourt,Coffee Shop,Café,Breakfast Spot,Performing Arts Venue,Bar,Gym,Furniture / Home Store,Italian Restaurant,Falafel Restaurant,Convenience Store
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Coffee Shop,Diner,Gym,Burger Joint,Japanese Restaurant,Yoga Studio,Fast Food Restaurant,Creperie,Portuguese Restaurant,Café
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Pizza Place,Coffee Shop,Gym,Dance Studio,Skating Rink,Pharmacy,Sandwich Place,Pub,Pool,Dessert Shop
4,"Bathurst Manor, Downsview North, Wilson Heights",Coffee Shop,Frozen Yogurt Shop,Diner,Sandwich Place,Bridal Shop,Fast Food Restaurant,Deli / Bodega,Bank,Restaurant,Supermarket


# K Means clustering

In [70]:
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
KMeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
KMeans.labels_[0:10] 

array([1, 3, 3, 3, 3, 3, 3, 3, 3, 3], dtype=int32)

In [72]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'ClusterLabels', KMeans.labels_)

toronto_merged = toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,ClusterLabels,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,M1B,Scarborough,"Rouge, Malvern",43.727929,-79.262029,3.0,<function BaseEstimator.get_params at 0x114eab...,Discount Store,Coffee Shop,Chinese Restaurant,Department Store,Hobby Shop,Bus Station,Dumpling Restaurant,Drugstore,Eastern European Restaurant,Donut Shop
12,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.7942,-79.262029,3.0,<function BaseEstimator.get_params at 0x114eab...,Lounge,Clothing Store,Sandwich Place,Breakfast Spot,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
18,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.778517,-79.346556,3.0,<function BaseEstimator.get_params at 0x114eab...,Clothing Store,Fast Food Restaurant,Coffee Shop,Bus Station,Toy / Game Store,Restaurant,Food Court,Bakery,Sandwich Place,Salon / Barbershop
22,M1G,Scarborough,Woburn,43.77012,-79.408493,3.0,<function BaseEstimator.get_params at 0x114eab...,Coffee Shop,Ramen Restaurant,Restaurant,Sandwich Place,Café,Pizza Place,Hotel,Steakhouse,Fast Food Restaurant,Ice Cream Shop
26,M1H,Scarborough,Cedarbrae,43.745906,-79.352188,3.0,<function BaseEstimator.get_params at 0x114eab...,Gym / Fitness Center,Caribbean Restaurant,Café,Japanese Restaurant,Basketball Court,Baseball Field,Dim Sum Restaurant,Diner,Discount Store,Dog Run


In [74]:
toronto_merged = toronto_merged.drop(columns = ["Cluster Labels"])

In [75]:
toronto_merged

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,ClusterLabels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,M1B,Scarborough,"Rouge, Malvern",43.727929,-79.262029,3.0,Discount Store,Coffee Shop,Chinese Restaurant,Department Store,Hobby Shop,Bus Station,Dumpling Restaurant,Drugstore,Eastern European Restaurant,Donut Shop
12,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.794200,-79.262029,3.0,Lounge,Clothing Store,Sandwich Place,Breakfast Spot,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
18,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.778517,-79.346556,3.0,Clothing Store,Fast Food Restaurant,Coffee Shop,Bus Station,Toy / Game Store,Restaurant,Food Court,Bakery,Sandwich Place,Salon / Barbershop
22,M1G,Scarborough,Woburn,43.770120,-79.408493,3.0,Coffee Shop,Ramen Restaurant,Restaurant,Sandwich Place,Café,Pizza Place,Hotel,Steakhouse,Fast Food Restaurant,Ice Cream Shop
26,M1H,Scarborough,Cedarbrae,43.745906,-79.352188,3.0,Gym / Fitness Center,Caribbean Restaurant,Café,Japanese Restaurant,Basketball Court,Baseball Field,Dim Sum Restaurant,Diner,Discount Store,Dog Run
32,M1J,Scarborough,Scarborough Village,43.728496,-79.495697,0.0,Business Service,Food Truck,Baseball Field,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop
38,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.709060,-79.363452,3.0,Coffee Shop,Sporting Goods Shop,Burger Joint,Sushi Restaurant,Restaurant,Bagel Shop,Fish & Chips Shop,Bank,Dessert Shop,Sports Bar
44,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.728020,-79.388790,1.0,Park,Bus Line,Swim School,Discount Store,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Dog Run,Dance Studio
51,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.667967,-79.367675,3.0,Coffee Shop,Restaurant,Bakery,Italian Restaurant,Pub,Café,Pizza Place,Chinese Restaurant,Pharmacy,Bookstore
58,M1N,Scarborough,"Birch Cliff, Cliffside West",43.650571,-79.384568,3.0,Coffee Shop,Bar,Café,Thai Restaurant,Steakhouse,Hotel,Sushi Restaurant,Bakery,Burger Joint,American Restaurant


# Mapping out the clusters

In [90]:
# map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['ClusterLabels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True,
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters