# Toronto Neighborhood Clustering

### Importing dependencies

In [1]:
import pandas as pd
import numpy as np
! pip install geocoder
import geocoder

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Libraries imported.


### Reading the URL

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'  
df = pd.read_html(url) 
df[0].head(12)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


### Dropping the Not Assigned Boroughs

In [3]:
toronto=df[0] 
toronto=toronto[toronto.Borough != 'Not assigned']
toronto.head(12)

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


### Grouping by postal codes and adding commas for neighborhoods

In [4]:
toronto_grp=toronto.groupby('Postal Code').agg({'Borough':'first','Neighborhood':', '.join}).reset_index()
toronto_grp.head(12)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


### copying boroughs to neighborhood if Neighborhood is not assigned


In [5]:
toronto_grp.loc[toronto_grp.Neighborhood == 'Not Assigned' , ['Neighborhood'] ] = toronto_grp['Borough'] 
toronto_grp.head(12)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


### Printing the dimensions as required


In [6]:
toronto_grp.shape

(103, 3)

### Giving the Geocoder API a try (It times out quite a lot...)

In [None]:
# Danger do not run
for index,row in toronto_grp.iterrows():
    lat_long_coords=None
    postal_code=toronto_grp['Postal Code']
    while (lat_long_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
    toronto_grp["Latitude"] = lat_lng_coods[0]
    toronto_grp["Longitude"] = lat_lng_coods[1]
toronto_grp

### Geocoder does not work so, going to CSV to get the data and merging it with the borough and neighborhood info.

In [7]:
toronto_latlong=pd.read_csv("http://cocl.us/Geospatial_data")
tor_fin= pd.merge(toronto_grp,toronto_latlong,on="Postal Code")
tor_fin.head(12)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


### Extracting data for boroughs which contain Toronto

In [177]:
tor_neigh = tor_fin[(tor_fin['Borough'].str.contains("Toronto"))].reset_index(drop=True)
tor_neigh

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197
6,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678
7,M4S,Central Toronto,Davisville,43.704324,-79.38879
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
9,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049


### Getting coordinates for Toronto

In [178]:
address = 'Toronto,ON'
geolocator=Nominatim(user_agent="Toronto Explorer")
location=geolocator.geocode(address) 
to_latitude=location.latitude
to_longitude=location.longitude
print('The geograpical coordinates of Toronto City are {}, {}.'.format(to_latitude,to_longitude))

The geograpical coordinates of Toronto City are 43.6534817, -79.3839347.


## Clustering the neighborhoods around Toronto now

### Printing the neighborhood on the Map

In [179]:
map_toronto=folium.Map(location=[to_latitude,to_longitude],zoom_start=12)

for lat,long,borough,neighborhood in zip(tor_neigh['Latitude'],tor_neigh['Longitude'],tor_neigh['Borough'],tor_neigh['Neighborhood']):
    label='{},{}'.format(neighborhood,borough)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(
    [lat,long],
    radius=5,
    popup=label,
    color='green',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_toronto)
    
map_toronto

### Define Foursquare Credentials

In [180]:
CLIENT_ID = 'CED4RQMWZBS0WYLEZ2O5W14SF3X4ZMUHF2B3Z25DRTWTU4VC' # your Foursquare ID
CLIENT_SECRET = 'YKFZYNQF43KRE1UFZY4QYVNKIKBBR2WODPI2U1PLCDDW31IO' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: CED4RQMWZBS0WYLEZ2O5W14SF3X4ZMUHF2B3Z25DRTWTU4VC
CLIENT_SECRET:YKFZYNQF43KRE1UFZY4QYVNKIKBBR2WODPI2U1PLCDDW31IO


### Since there more than one neighborhoods in each record, Splitting them for ease of processing

In [181]:
from itertools import chain
def chainer(str1):
    return list(chain.from_iterable(str1.str.split(',')))

lens=tor_neigh['Neighborhood'].str.split(',').map(len)
tor_nei_exp=pd.DataFrame({'Postal Code':np.repeat(tor_neigh['Postal Code'],lens),
                        'Borough':np.repeat(tor_neigh['Borough'],lens),
                        'Neighborhood':chainer(tor_neigh['Neighborhood']),
                        'Latitude':np.repeat(tor_neigh['Latitude'],lens),
                        'Longitude':np.repeat(tor_neigh['Longitude'],lens)})
tor_nei_exp.reset_index(drop=True,inplace=True)
tor_nei_exp

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,The Danforth West,43.679557,-79.352188
2,M4K,East Toronto,Riverdale,43.679557,-79.352188
3,M4L,East Toronto,India Bazaar,43.668999,-79.315572
4,M4L,East Toronto,The Beaches West,43.668999,-79.315572
...,...,...,...,...,...
73,M6S,West Toronto,Swansea,43.651571,-79.484450
74,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
75,M7A,Downtown Toronto,Ontario Provincial Government,43.662301,-79.389494
76,M7Y,East Toronto,Business reply mail Processing Centre,43.662744,-79.321558


### Reading one Neighborhood data from Harbourfront West and calling four square and its longitude and latitude

In [182]:

nei=tor_nei_exp.loc[53,'Neighborhood']
nei_lat=tor_nei_exp.loc[53,'Latitude']
nei_long=tor_nei_exp.loc[53,'Longitude']
print('The neighborhood of {}, has latitude {}, Longitude {}'.format(nei,nei_lat,nei_long))

The neighborhood of  Harbourfront West, has latitude 43.6289467, Longitude -79.3944199


### Calling Four Square to get top 100 venues near Harbourfront

In [183]:
radius=500
LIMIT=100
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, nei_lat, nei_long, VERSION, radius, LIMIT)
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5edd0aa9b4b684001be24601'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 17,
  'suggestedBounds': {'ne': {'lat': 43.6334467045, 'lng': -79.3882145152226},
   'sw': {'lat': 43.6244466955, 'lng': -79.4006252847774}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4ad8df54f964a520881521e3',
       'name': 'Billy Bishop Toronto City Airport (YTZ) (Billy Bishop Toronto City Airport)',
       'location': {'address': 'Toronto Island',
        'lat': 43.63168259661481,
        'lng': -79.3960334124689,
        'distance': 331,
        'postalCode': 

### Defining a function to get the category of these venues 

In [184]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [185]:
venues=results['response']['groups'][0]['items']
nearby_venues=pd.json_normalize(venues) #flatten json
filtered_columns=['venue.name','venue.categories','venue.location.lat','venue.location.lng']
nearby_venues=nearby_venues.loc[:,filtered_columns]
nearby_venues['venue.categories']=nearby_venues.apply(get_category_type,axis=1)
nearby_venues.columns=[col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Billy Bishop Toronto City Airport (YTZ) (Billy...,Airport,43.631683,-79.396033
1,Porter Lounge,Airport Lounge,43.63068,-79.395756
2,Toronto Harbour,Harbor / Marina,43.633045,-79.396484
3,Billy Bishop Café,Airport Food Court,43.631132,-79.396139
4,Air Canada Check-In Counter,Airport Terminal,43.631226,-79.395987


In [186]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

17 venues were returned by Foursquare.


### Writing a function for getting all venues from the neighborhood table

In [187]:
def getNearbyVenues(names,latitudes,longitudes,radius=500):
    venues_list=[]

    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
         # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        
        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return(nearby_venues)

In [189]:
tor_venues= getNearbyVenues(names=tor_nei_exp['Neighborhood'],latitudes=tor_nei_exp['Latitude'],
                           longitudes=tor_nei_exp['Longitude'])

The Beaches
The Danforth West
 Riverdale
India Bazaar
 The Beaches West
Studio District
Lawrence Park
Davisville North
North Toronto West
 Lawrence Park
Davisville
Moore Park
 Summerhill East
Summerhill West
 Rathnelly
 South Hill
 Forest Hill SE
 Deer Park
Rosedale
St. James Town
 Cabbagetown
Church and Wellesley
Regent Park
 Harbourfront
Garden District
 Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond
 Adelaide
 King
Harbourfront East
 Union Station
 Toronto Islands
Toronto Dominion Centre
 Design Exchange
Commerce Court
 Victoria Hotel
Roselawn
Forest Hill North & West
 Forest Hill Road Park
The Annex
 North Midtown
 Yorkville
University of Toronto
 Harbord
Kensington Market
 Chinatown
 Grange Park
CN Tower
 King and Spadina
 Railway Lands
 Harbourfront West
 Bathurst Quay
 South Niagara
 Island airport
Stn A PO Boxes
First Canadian Place
 Underground city
Christie
Dufferin
 Dovercourt Village
Little Portugal
 Trinity
Brockton
 Parkdale Village
 Exhibition Place
High 

In [190]:
print(tor_venues.shape)
tor_venues.head()

(3180, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,The Danforth West,43.679557,-79.352188,MenEssentials,43.67782,-79.351265,Cosmetics Shop


In [191]:
tor_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,94,94,94,94,94,94
Bathurst Quay,17,17,17,17,17,17
Cabbagetown,45,45,45,45,45,45
Chinatown,59,59,59,59,59,59
Deer Park,17,17,17,17,17,17
...,...,...,...,...,...,...
The Annex,21,21,21,21,21,21
The Beaches,4,4,4,4,4,4
The Danforth West,43,43,43,43,43,43
Toronto Dominion Centre,100,100,100,100,100,100


In [192]:
print ('The unique venue cateogories are: {}'.format(len(tor_venues['Venue Category'].unique())))

The unique venue cateogories are: 236


## Analyzing Neighborhoods

### One hot encoding the venue categories

In [193]:
tor_onehot=pd.get_dummies(tor_venues[['Venue Category']],prefix="",prefix_sep="")
neigh = tor_venues['Neighborhood']
tor_onehot.drop(labels='Neighborhood',axis=1,inplace=True)
tor_onehot.insert(0,'Neighborhood',neigh)
tor_onehot.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,The Danforth West,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [194]:
tor_onehot.shape

(3180, 236)

In [195]:
tor_grouped=tor_onehot.groupby('Neighborhood').mean().reset_index()
tor_grouped

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Adelaide,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.021277,0.0,0.0,...,0.000000,0.00,0.010638,0.000000,0.000000,0.000000,0.0,0.0,0.010638,0.000000
1,Bathurst Quay,0.0,0.058824,0.058824,0.117647,0.176471,0.117647,0.000000,0.0,0.0,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
2,Cabbagetown,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
3,Chinatown,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,...,0.000000,0.00,0.050847,0.000000,0.050847,0.016949,0.0,0.0,0.000000,0.000000
4,Deer Park,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.058824,0.0,0.0,...,0.000000,0.00,0.000000,0.000000,0.058824,0.000000,0.0,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,The Annex,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,...,0.000000,0.00,0.047619,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
73,The Beaches,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,...,0.250000,0.00,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
74,The Danforth West,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.023256,0.0,0.0,...,0.023256,0.00,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.023256
75,Toronto Dominion Centre,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.030000,0.0,0.0,...,0.000000,0.01,0.010000,0.000000,0.000000,0.010000,0.0,0.0,0.000000,0.000000


In [196]:
tor_grouped.shape

(77, 236)

### Printing top 5 venues from each neighborhood

In [197]:
num_top_venues = 5

for hood in tor_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = tor_grouped[tor_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Adelaide----
         venue  freq
0  Coffee Shop  0.10
1         Café  0.05
2   Restaurant  0.04
3          Gym  0.03
4        Hotel  0.03


---- Bathurst Quay----
                 venue  freq
0      Airport Service  0.18
1       Airport Lounge  0.12
2     Airport Terminal  0.12
3      Harbor / Marina  0.06
4  Rental Car Location  0.06


---- Cabbagetown----
         venue  freq
0  Coffee Shop  0.07
1    Pet Store  0.04
2     Pharmacy  0.04
3  Pizza Place  0.04
4          Pub  0.04


---- Chinatown----
                           venue  freq
0                           Café  0.08
1                    Coffee Shop  0.07
2             Mexican Restaurant  0.05
3          Vietnamese Restaurant  0.05
4  Vegetarian / Vegan Restaurant  0.05


---- Deer Park----
                venue  freq
0         Coffee Shop  0.12
1                 Pub  0.12
2                Bank  0.06
3  Light Rail Station  0.06
4          Restaurant  0.06


---- Design Exchange----
                venue  freq
0        

4   Italian Restaurant  0.06




### Writing a function to sort venue categories in desecending order 

In [198]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Fetching top 10 venues for neighborhood


In [250]:
num_top_venues=10

indicators = ['st', 'nd', 'rd']

columns=['Neighborhood']

for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most common venue'.format(ind+1,indicators[ind]))
    except:
         columns.append('{}th Most common venue'.format(ind+1))
                        
nei_venues_sorted=pd.DataFrame(columns=columns)
nei_venues_sorted['Neighborhood'] = tor_grouped['Neighborhood']

for ind in np.arange(tor_grouped.shape[0]):
    nei_venues_sorted.iloc[ind,1:]=return_most_common_venues(tor_grouped.iloc[ind,:],num_top_venues)

nei_venues_sorted.head()

        

Unnamed: 0,Neighborhood,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,Adelaide,Coffee Shop,Café,Restaurant,Deli / Bodega,Clothing Store,Hotel,Gym,Thai Restaurant,Salad Place,Sushi Restaurant
1,Bathurst Quay,Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Bar,Plane,Coffee Shop,Rental Car Location,Sculpture Garden,Boat or Ferry
2,Cabbagetown,Coffee Shop,Restaurant,Italian Restaurant,Pharmacy,Pet Store,Chinese Restaurant,Pizza Place,Bakery,Pub,Café
3,Chinatown,Café,Coffee Shop,Mexican Restaurant,Bakery,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Bar,Park,Gaming Cafe,Dessert Shop
4,Deer Park,Pub,Coffee Shop,Bank,Supermarket,Sushi Restaurant,Bagel Shop,Fried Chicken Joint,Sports Bar,Restaurant,Light Rail Station


### Clustering Neighborhoods

In [251]:
kclusters=10

tor_grouped_clustering=tor_grouped.drop('Neighborhood',1)

kmeans=KMeans(n_clusters=kclusters,random_state=0).fit(tor_grouped_clustering)

nei_venues_sorted.insert(0,'Cluster Labels',kmeans.labels_)


In [252]:

tor_merged = tor_nei_exp

tor_merged = tor_merged.join(nei_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

tor_merged

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Trail,Health Food Store,Pub,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
1,M4K,East Toronto,The Danforth West,43.679557,-79.352188,7,Greek Restaurant,Italian Restaurant,Coffee Shop,Furniture / Home Store,Bookstore,Restaurant,Ice Cream Shop,Yoga Studio,Spa,Japanese Restaurant
2,M4K,East Toronto,Riverdale,43.679557,-79.352188,7,Greek Restaurant,Italian Restaurant,Coffee Shop,Furniture / Home Store,Bookstore,Restaurant,Ice Cream Shop,Yoga Studio,Spa,Japanese Restaurant
3,M4L,East Toronto,India Bazaar,43.668999,-79.315572,2,Park,Fast Food Restaurant,Pub,Light Rail Station,Liquor Store,Sandwich Place,Burrito Place,Italian Restaurant,Restaurant,Fish & Chips Shop
4,M4L,East Toronto,The Beaches West,43.668999,-79.315572,2,Park,Fast Food Restaurant,Pub,Light Rail Station,Liquor Store,Sandwich Place,Burrito Place,Italian Restaurant,Restaurant,Fish & Chips Shop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,M6S,West Toronto,Swansea,43.651571,-79.484450,7,Coffee Shop,Sushi Restaurant,Café,Pizza Place,Pub,Italian Restaurant,Diner,Sandwich Place,Burrito Place,Restaurant
74,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494,7,Coffee Shop,Sushi Restaurant,Yoga Studio,Bank,Beer Bar,Smoothie Shop,Sandwich Place,Burger Joint,Burrito Place,Café
75,M7A,Downtown Toronto,Ontario Provincial Government,43.662301,-79.389494,7,Coffee Shop,Sushi Restaurant,Yoga Studio,Bank,Beer Bar,Smoothie Shop,Sandwich Place,Burger Joint,Burrito Place,Café
76,M7Y,East Toronto,Business reply mail Processing Centre,43.662744,-79.321558,2,Yoga Studio,Auto Workshop,Skate Park,Light Rail Station,Smoke Shop,Spa,Farmers Market,Fast Food Restaurant,Burrito Place,Restaurant


### Visualizing the clusters

In [253]:
# create map
map_clusters=folium.Map(location=[to_latitude,to_longitude],zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tor_merged['Latitude'], tor_merged['Longitude'], tor_merged['Neighborhood'], tor_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters


### Examining Clusters

#### Cluster 1

In [254]:
tor_merged.loc[tor_merged['Cluster Labels'] == 0, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,East Toronto,0,Trail,Health Food Store,Pub,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
13,Central Toronto,0,Pub,Coffee Shop,Bank,Supermarket,Sushi Restaurant,Bagel Shop,Fried Chicken Joint,Sports Bar,Restaurant,Light Rail Station
14,Central Toronto,0,Pub,Coffee Shop,Bank,Supermarket,Sushi Restaurant,Bagel Shop,Fried Chicken Joint,Sports Bar,Restaurant,Light Rail Station
15,Central Toronto,0,Pub,Coffee Shop,Bank,Supermarket,Sushi Restaurant,Bagel Shop,Fried Chicken Joint,Sports Bar,Restaurant,Light Rail Station
16,Central Toronto,0,Pub,Coffee Shop,Bank,Supermarket,Sushi Restaurant,Bagel Shop,Fried Chicken Joint,Sports Bar,Restaurant,Light Rail Station
17,Central Toronto,0,Pub,Coffee Shop,Bank,Supermarket,Sushi Restaurant,Bagel Shop,Fried Chicken Joint,Sports Bar,Restaurant,Light Rail Station


#### Cluster2

In [255]:
tor_merged.loc[tor_merged['Cluster Labels'] == 1, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
42,Central Toronto,1,Sandwich Place,Café,Coffee Shop,BBQ Joint,Burger Joint,Flower Shop,Middle Eastern Restaurant,Indian Restaurant,Pub,Liquor Store
43,Central Toronto,1,Sandwich Place,Café,Coffee Shop,BBQ Joint,Burger Joint,Flower Shop,Middle Eastern Restaurant,Indian Restaurant,Pub,Liquor Store
44,Central Toronto,1,Sandwich Place,Café,Coffee Shop,BBQ Joint,Burger Joint,Flower Shop,Middle Eastern Restaurant,Indian Restaurant,Pub,Liquor Store
47,Downtown Toronto,1,Café,Coffee Shop,Mexican Restaurant,Bakery,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Bar,Park,Gaming Cafe,Dessert Shop
48,Downtown Toronto,1,Café,Coffee Shop,Mexican Restaurant,Bakery,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Bar,Park,Gaming Cafe,Dessert Shop
49,Downtown Toronto,1,Café,Coffee Shop,Mexican Restaurant,Bakery,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Bar,Park,Gaming Cafe,Dessert Shop
60,Downtown Toronto,1,Grocery Store,Café,Park,Restaurant,Baby Store,Diner,Italian Restaurant,Athletics & Sports,Candy Store,Coffee Shop
61,West Toronto,1,Pharmacy,Bakery,Grocery Store,Furniture / Home Store,Park,Music Venue,Portuguese Restaurant,Middle Eastern Restaurant,Café,Brewery
62,West Toronto,1,Pharmacy,Bakery,Grocery Store,Furniture / Home Store,Park,Music Venue,Portuguese Restaurant,Middle Eastern Restaurant,Café,Brewery
68,West Toronto,1,Café,Mexican Restaurant,Thai Restaurant,Grocery Store,Furniture / Home Store,Fast Food Restaurant,Bookstore,Flea Market,Cajun / Creole Restaurant,Speakeasy


#### Cluster 3

In [256]:
tor_merged.loc[tor_merged['Cluster Labels'] == 2, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
3,East Toronto,2,Park,Fast Food Restaurant,Pub,Light Rail Station,Liquor Store,Sandwich Place,Burrito Place,Italian Restaurant,Restaurant,Fish & Chips Shop
4,East Toronto,2,Park,Fast Food Restaurant,Pub,Light Rail Station,Liquor Store,Sandwich Place,Burrito Place,Italian Restaurant,Restaurant,Fish & Chips Shop
7,Central Toronto,2,Gym / Fitness Center,Hotel,Dance Studio,Department Store,Sandwich Place,Breakfast Spot,Food & Drink Shop,Park,General Entertainment,Gay Bar
76,East Toronto,2,Yoga Studio,Auto Workshop,Skate Park,Light Rail Station,Smoke Shop,Spa,Farmers Market,Fast Food Restaurant,Burrito Place,Restaurant
77,East Toronto,2,Yoga Studio,Auto Workshop,Skate Park,Light Rail Station,Smoke Shop,Spa,Farmers Market,Fast Food Restaurant,Burrito Place,Restaurant


#### Cluster 4

In [257]:
tor_merged.loc[tor_merged['Cluster Labels'] == 3, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
50,Downtown Toronto,3,Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Bar,Plane,Coffee Shop,Rental Car Location,Sculpture Garden,Boat or Ferry
51,Downtown Toronto,3,Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Bar,Plane,Coffee Shop,Rental Car Location,Sculpture Garden,Boat or Ferry
52,Downtown Toronto,3,Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Bar,Plane,Coffee Shop,Rental Car Location,Sculpture Garden,Boat or Ferry
53,Downtown Toronto,3,Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Bar,Plane,Coffee Shop,Rental Car Location,Sculpture Garden,Boat or Ferry
54,Downtown Toronto,3,Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Bar,Plane,Coffee Shop,Rental Car Location,Sculpture Garden,Boat or Ferry
55,Downtown Toronto,3,Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Bar,Plane,Coffee Shop,Rental Car Location,Sculpture Garden,Boat or Ferry
56,Downtown Toronto,3,Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Bar,Plane,Coffee Shop,Rental Car Location,Sculpture Garden,Boat or Ferry


#### Cluster 5

In [258]:
tor_merged.loc[tor_merged['Cluster Labels'] == 4, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
11,Central Toronto,4,Park,Restaurant,Yoga Studio,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant
12,Central Toronto,4,Park,Restaurant,Yoga Studio,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant


#### Cluster 6

In [259]:
tor_merged.loc[tor_merged['Cluster Labels'] == 5, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
40,Central Toronto,5,Jewelry Store,Trail,Mexican Restaurant,Sushi Restaurant,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
41,Central Toronto,5,Jewelry Store,Trail,Mexican Restaurant,Sushi Restaurant,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant


#### Cluster 7

In [262]:
tor_merged.loc[tor_merged['Cluster Labels'] == 6, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
6,Central Toronto,6,Park,Bus Line,Swim School,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop


# Thats it for now my friend!!!