In [651]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import json
from geopy.geocoders import Nominatim
import wikipedia
from pandas.io.json import json_normalize
import requests
import folium

## Obtain information from Wikipedia page to construct the dataframe

In [652]:
url='https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Toronto'
results=requests.get(url)

In [653]:
from bs4 import BeautifulSoup
soup=BeautifulSoup(results.text, 'html.parser')

print(soup.prettify()) #Organize the code to be understandable

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of neighbourhoods in Toronto - Wikipedia
  </title>
  <script>
   document.documentElement.className = document.documentElement.className.replace( /(^|\s)client-nojs(\s|$)/, "$1client-js$2" );
  </script>
  <script>
   (window.RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"List_of_neighbourhoods_in_Toronto","wgTitle":"List of neighbourhoods in Toronto","wgCurRevisionId":894467670,"wgRevisionId":894467670,"wgArticleId":1150939,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Webarchive template wayback links","Neighbourhoods in Toronto","Lists of populated places in Ontario","Toronto-related lists","Lists of neighbourhoods in Canadian cities"],"wgBreakFrames":false,"wgPageContentLanguage":"en","wgPageContentMod

In [27]:
soup.body.table

<table class="multicol" role="presentation" style="border-collapse: collapse; padding: 0; border: 0; background:transparent; width:100%;"><tbody><tr>
<td style="text-align: left; vertical-align: top;">
<h4><span id="Downtown_Core_.28Central.29"></span><span class="mw-headline" id="Downtown_Core_(Central)"><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Core (Central)</a></span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=List_of_neighbourhoods_in_Toronto&amp;action=edit&amp;section=3" title="Edit section: Downtown Core (Central)">edit</a><span class="mw-editsection-bracket">]</span></span></h4>
<ul><li><a href="/wiki/Alexandra_Park,_Toronto" title="Alexandra Park, Toronto">Alexandra Park</a></li>
<li><a href="/wiki/The_Annex" title="The Annex">The Annex</a></li>
<li><a href="/wiki/Baldwin_Village" title="Baldwin Village">Baldwin Village</a></li>
<li><a href="/wiki/Cabbagetown,_Toronto" title="Cabbagetown, Toron

In [113]:
soup.find_all('tbody')

[<tbody><tr>
 <td style="text-align: left; vertical-align: top;">
 <h4><span id="Downtown_Core_.28Central.29"></span><span class="mw-headline" id="Downtown_Core_(Central)"><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Core (Central)</a></span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=List_of_neighbourhoods_in_Toronto&amp;action=edit&amp;section=3" title="Edit section: Downtown Core (Central)">edit</a><span class="mw-editsection-bracket">]</span></span></h4>
 <ul><li><a href="/wiki/Alexandra_Park,_Toronto" title="Alexandra Park, Toronto">Alexandra Park</a></li>
 <li><a href="/wiki/The_Annex" title="The Annex">The Annex</a></li>
 <li><a href="/wiki/Baldwin_Village" title="Baldwin Village">Baldwin Village</a></li>
 <li><a href="/wiki/Cabbagetown,_Toronto" title="Cabbagetown, Toronto">Cabbagetown</a></li>
 <li><a href="/wiki/CityPlace,_Toronto" title="CityPlace, Toronto">CityPlace</a></li>
 <li><a href="/wiki/

In [655]:
        
soup.ul.li.ul.ul.decompose()


In [656]:
#Obtain the names of the Districts in Toronto
title=soup.ul.li.ul.find_all(class_='toctext')

title_list=[]
for i in title:
    title_list.append(i.string)
    print(i.string)
title_list

Old Toronto
East York
Etobicoke
North York
Scarborough
York


['Old Toronto', 'East York', 'Etobicoke', 'North York', 'Scarborough', 'York']

In [665]:
df_toronto=pd.DataFrame(columns=['District','Neighborhood','Latitude','Longitude'])
df_toronto

Unnamed: 0,District,Neighborhood,Latitude,Longitude


In [666]:
for i, district in enumerate(title_list):

    locality=soup.find_all('tbody')[i].find_all('ul')
    for j in locality:
        neighborhood=j.find_all('a')
        for n, k in enumerate(neighborhood):
            url='https://en.wikipedia.org'+k['href']
            result=requests.get(url)
            soup_n=BeautifulSoup(result.text, 'html.parser')
            try:
                #obtain coordinates from links of each city
                coordinates=soup_n.find(class_='geo').string
                lat=coordinates.split(';')[0]
                lon=coordinates.split(';')[1]
            except AttributeError:
                #if we cannot obtain coordinates in wikipedia, we can try to look for them with Nominatim
                geolocator=Nominatim(user_agent='explorer')
                coordinates=geolocator.geocode(k.string+' Toronto ', country_codes='CA')
                
                if not coordinates:
                    coordinates= '0 ; 0'
                    lat=coordinates.split(';')[0]
                    lon=coordinates.split(';')[1]
                else:
                    lat=coordinates.latitude
                    lon=coordinates.longitude     
                   
            df_toronto=df_toronto.append({'District': district,
                              'Neighborhood': k.string,
                              'Longitude': float(lon),
                              'Latitude': float(lat)}, ignore_index=True)
    
    

In [667]:
df_toronto.head(10)

Unnamed: 0,District,Neighborhood,Latitude,Longitude
0,Old Toronto,Alexandra Park,43.65,-79.4
1,Old Toronto,The Annex,43.67,-79.404
2,Old Toronto,Baldwin Village,43.656,-79.3934
3,Old Toronto,Cabbagetown,43.6664,-79.3629
4,Old Toronto,CityPlace,43.640044,-79.395179
5,Old Toronto,Chinatown,43.6529,-79.398
6,Old Toronto,Church and Wellesley,43.665694,-79.380956
7,Old Toronto,Corktown,43.655518,-79.359712
8,Old Toronto,Discovery District,43.658,-79.388
9,Old Toronto,Distillery District,43.650295,-79.35954


In [668]:
#Delete the neighborhoods which have coordinate values = 0,0
cond=df_toronto['Latitude']==0
df_toronto.drop(df_toronto[cond].index, axis=0, inplace=True)
df_toronto.head(10)

Unnamed: 0,District,Neighborhood,Latitude,Longitude
0,Old Toronto,Alexandra Park,43.65,-79.4
1,Old Toronto,The Annex,43.67,-79.404
2,Old Toronto,Baldwin Village,43.656,-79.3934
3,Old Toronto,Cabbagetown,43.6664,-79.3629
4,Old Toronto,CityPlace,43.640044,-79.395179
5,Old Toronto,Chinatown,43.6529,-79.398
6,Old Toronto,Church and Wellesley,43.665694,-79.380956
7,Old Toronto,Corktown,43.655518,-79.359712
8,Old Toronto,Discovery District,43.658,-79.388
9,Old Toronto,Distillery District,43.650295,-79.35954


In [669]:
#Now obtain the location of Toronto
toronto='Toronto'

geolocator=Nominatim(user_agent='explorer')
location=geolocator.geocode(toronto)
lat=location.latitude
lon=location.longitude
print(lat,lon)

43.653963 -79.387207


In [671]:
#print map with all points
map_toronto=folium.Map(location=[lat,lon], zoom_start=11)

for lat, lon, name, district in zip(df_toronto.Latitude, df_toronto.Longitude, df_toronto.Neighborhood, df_toronto.District):
    label= name +' '+ district
    
    folium.CircleMarker([lat, lon],
                        color='blue',
                        fill=True,
                        fill_color='yellow',
                        popup=label,
                        radius=5,
                       fill_opacity=1).add_to(map_toronto)
map_toronto

In [674]:
#Now we will take only one of the districts
oldtoronto_data = df_toronto[df_toronto['District'] == 'Old Toronto'].reset_index(drop=True)
oldtoronto_data.head()

Unnamed: 0,District,Neighborhood,Latitude,Longitude
0,Old Toronto,Alexandra Park,43.65,-79.4
1,Old Toronto,The Annex,43.67,-79.404
2,Old Toronto,Baldwin Village,43.656,-79.3934
3,Old Toronto,Cabbagetown,43.6664,-79.3629
4,Old Toronto,CityPlace,43.640044,-79.395179


In [675]:
address = 'Old Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [677]:
map_oldtoronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(oldtoronto_data['Latitude'], oldtoronto_data['Longitude'], oldtoronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='yellow',
        fill_opacity=0.7,
        parse_html=False).add_to(map_oldtoronto)  
    
map_oldtoronto

In [678]:
#Start utilizing FourSquare API to explore neighborhoods
CLIENT_ID = '4VQQ1CNFOUMYC1NI3HXTZJSGFMJ2ZC2VUDIYO0KTO2Z2O3O3' # your Foursquare ID
CLIENT_SECRET = 'AGIWGD3W1CVTO4AA3SPMAR14CKMWNI3F4NMOOGNZVBYGP0GC' # your Foursquare Secret
VERSION = '20180604'

In [679]:
#Name of the first neighborhood
name=oldtoronto_data.loc[0,'Neighborhood']
lon=oldtoronto_data.loc[0,'Longitude']
lat=oldtoronto_data.iloc[0]['Latitude']
print(name, lat, lon)

Alexandra Park 43.65 -79.4


Let's get the top 100 venues in Alexandra Park, within a radius of 500 meters

In [682]:

url='http://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lon, VERSION, 500, 100)
results=requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5cca42faf594df21bb051011'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Alexandra Park',
  'headerFullLocation': 'Alexandra Park, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 160,
  'suggestedBounds': {'ne': {'lat': 43.6545000045, 'lng': -79.39379244047241},
   'sw': {'lat': 43.645499995499996, 'lng': -79.4062075595276}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5644dbaa498e7f7534154326',
       'name': 'Maker Pizza',
       'location': {'address': '59 Cameron St',
        'lat': 43.6504011331197,
        'lng': -79.39804047841302,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.650401

In [684]:
df_venues=json_normalize(results['response']['groups'][0]['items'])
df_venues.head()

Unnamed: 0,reasons.count,reasons.items,referralId,venue.categories,venue.id,venue.location.address,venue.location.cc,venue.location.city,venue.location.country,venue.location.crossStreet,...,venue.location.labeledLatLngs,venue.location.lat,venue.location.lng,venue.location.neighborhood,venue.location.postalCode,venue.location.state,venue.name,venue.photos.count,venue.photos.groups,venue.venuePage.id
0,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-5644dbaa498e7f7534154326-0,"[{'id': '4bf58dd8d48988d1ca941735', 'name': 'P...",5644dbaa498e7f7534154326,59 Cameron St,CA,Toronto,Canada,,...,"[{'label': 'display', 'lat': 43.6504011331197,...",43.650401,-79.39804,,M5T 2H1,ON,Maker Pizza,0,[],
1,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4adf8284f964a520477b21e3-1,"[{'id': '4bf58dd8d48988d102941735', 'name': 'Y...",4adf8284f964a520477b21e3,553 Queen St. W,CA,Toronto,Canada,at Augusta Ave.,...,"[{'label': 'display', 'lat': 43.64791967343419...",43.64792,-79.400196,,M5V 2B6,ON,Core Studio Yoga & Pilates,0,[],
2,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-56464481498e460c7247dc56-2,"[{'id': '4bf58dd8d48988d1d2941735', 'name': 'S...",56464481498e460c7247dc56,478 Queen St West,CA,Toronto,Canada,Augusta,...,"[{'label': 'display', 'lat': 43.64803826205330...",43.648038,-79.400268,,,ON,Saku Sushi,0,[],
3,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-54282a5b498ef4e6e79707ac-3,"[{'id': '4bf58dd8d48988d10d951735', 'name': 'R...",54282a5b498ef4e6e79707ac,215 Spadina,CA,Toronto,Canada,Sullivan,...,"[{'label': 'display', 'lat': 43.65085888252597...",43.650859,-79.396985,,,ON,Sonic Boom,0,[],
4,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-5b233c5595d986002c9e8788-4,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",5b233c5595d986002c9e8788,458 Queen Street W,CA,Toronto,Canada,,...,"[{'label': 'display', 'lat': 43.64813402984747...",43.648134,-79.399675,,M5V 2A8,ON,Drom Taberna,0,[],


Use the function get_category_type() to obtain the category list for each venue

In [685]:
def get_category_type(df):
    try:
        categories_list=df['categories']
    except:
        categories_list=df['venue.categories']
        
    if len(categories_list)==0:
        return None
    else:
        return categories_list[0]['name']

cat_type=df_venues.apply(get_category_type, axis=1)#(df_venues)
cat_type

0                       Pizza Place
1                       Yoga Studio
2                  Sushi Restaurant
3                       Record Shop
4                               Bar
5                      Dance Studio
6                       Coffee Shop
7                               Bar
8                    Sandwich Place
9                               Bar
10                     Dessert Shop
11                French Restaurant
12                     Cocktail Bar
13                      Gaming Cafe
14                        Gift Shop
15                         Creperie
16                   Ice Cream Shop
17                French Restaurant
18                              Bar
19                      Coffee Shop
20                 Arepa Restaurant
21                       Street Art
22                  Udon Restaurant
23             Fast Food Restaurant
24                             Café
25                   Cosmetics Shop
26                       Shoe Store
27                      Musi

Choose the columns we will use and organize the dataset

In [686]:
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
df_venues =df_venues.loc[:, filtered_columns]

# filter the category for each row
df_venues['venue.categories'] = df_venues.apply(get_category_type, axis=1)


In [687]:
df_venues.columns=[col.split('.')[-1] for col in df_venues.columns]
df_venues.head(10)

Unnamed: 0,name,categories,lat,lng
0,Maker Pizza,Pizza Place,43.650401,-79.39804
1,Core Studio Yoga & Pilates,Yoga Studio,43.64792,-79.400196
2,Saku Sushi,Sushi Restaurant,43.648038,-79.400268
3,Sonic Boom,Record Shop,43.650859,-79.396985
4,Drom Taberna,Bar,43.648134,-79.399675
5,City Dance Corps,Dance Studio,43.648144,-79.398111
6,Dark Horse Espresso Bar,Coffee Shop,43.650564,-79.397018
7,BarChef,Bar,43.648038,-79.400161
8,Banh Mi Boys,Sandwich Place,43.64865,-79.396859
9,Tequila Bookworm,Bar,43.647697,-79.401549


## Explore venues in each neighborhood

In [690]:
#Function to obtain all venues in each neighborhood
def getNearbyVenues(names, latitudes, longitudes, radius=500, limit=100):
    
    venues_list=[]
    
    for name, lat, lon in zip (names, latitudes, longitudes):
        
        url='http://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lon, VERSION, radius, limit)
        
        results=requests.get(url).json()['response']['groups'][0]['items']
        
        venues_list.append([(name, lat, lon, 
                            v['venue']['name'],
                            v['venue']['location']['lat'],
                            v['venue']['location']['lng'],
                            v['venue']['categories'][0]['name']) for v in results])
        
    nearby_venues=pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns=['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return nearby_venues


In [691]:
oldtoronto_venues=getNearbyVenues(names=oldtoronto_data['Neighborhood'], latitudes=oldtoronto_data['Latitude'],
                              longitudes=oldtoronto_data['Longitude'])

In [692]:
#Check size of the resulting df
print(oldtoronto_venues.shape)
oldtoronto_venues.head()

(4168, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Alexandra Park,43.65,-79.4,Maker Pizza,43.650401,-79.39804,Pizza Place
1,Alexandra Park,43.65,-79.4,Core Studio Yoga & Pilates,43.64792,-79.400196,Yoga Studio
2,Alexandra Park,43.65,-79.4,Saku Sushi,43.648038,-79.400268,Sushi Restaurant
3,Alexandra Park,43.65,-79.4,Sonic Boom,43.650859,-79.396985,Record Shop
4,Alexandra Park,43.65,-79.4,Drom Taberna,43.648134,-79.399675,Bar


In [693]:
#Count how many venues we have for each neighborhood

oldtoronto_venues.groupby('Neighborhood').count().head(10)

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alexandra Park,100,100,100,100,100,100
Baldwin Village,67,67,67,67,67,67
Beaconsfield Village,42,42,42,42,42,42
Bedford Park,3,3,3,3,3,3
Bloor West Village,47,47,47,47,47,47
Bracondale Hill,4,4,4,4,4,4
Brockton Village,42,42,42,42,42,42
Cabbagetown,33,33,33,33,33,33
Carleton Village,11,11,11,11,11,11
Casa Loma,32,32,32,32,32,32


In [694]:
#Number of unique categories

print('there are {} unique categories'.format(len(oldtoronto_venues['Venue Category'].unique())))

#It appeared 'Neighborhood as category, so I will delete the rows that have Neighborhood as category'

cond=oldtoronto_venues['Venue Category']=='Neighborhood'
oldtoronto_venues.drop(oldtoronto_venues[cond].index, axis=0, inplace=True)


there are 305 unique categories


## Analyze Each Neighborhood

In [695]:
#one hot encoding, to assign to each category ones or zeros according to the venue

old_toronto_onehot = pd.get_dummies(oldtoronto_venues[['Venue Category']], prefix="", prefix_sep="")
old_toronto_onehot.head()

Unnamed: 0,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Arcade,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [696]:
#add neighborhood column back to dataframe
old_toronto_onehot['Neighborhood'] = oldtoronto_venues['Neighborhood'] 
#oldtoronto_onehot.insert(0, 'Neighborhood', oldtoronto_venues['Neighborhood'])
old_toronto_onehot

Unnamed: 0,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Arcade,...,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Neighborhood
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alexandra Park
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,Alexandra Park
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alexandra Park
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alexandra Park
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alexandra Park
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alexandra Park
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alexandra Park
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alexandra Park
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alexandra Park
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alexandra Park


In [697]:
## move neighborhood column to the first column
fixed_columns = [old_toronto_onehot.columns[-1]] + list(old_toronto_onehot.columns[:-1])
old_toronto_onehot = oldtoronto_onehot[fixed_columns]

old_toronto_onehot.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Aquarium,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Alexandra Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Alexandra Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,Alexandra Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Alexandra Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Alexandra Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [698]:
old_toronto_onehot.shape

(5428, 305)

In [700]:
##Group rows by neighborhood and take the mean of the frequency per category

oldtoronto_grouped=old_toronto_onehot.groupby('Neighborhood').mean().reset_index()
oldtoronto_grouped.head(10)

Unnamed: 0,Neighborhood,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Aquarium,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Birch Cliff Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Alderwood,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Alexandra Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.06,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.02
4,Amesbury,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Armadale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0
6,Armour Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Baby Point,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Baldwin Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.014925,0.0,0.0,0.029851,0.0,0.0,0.0,0.0,0.0,0.0
9,Bathurst Manor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [701]:
oldtoronto_grouped.shape

(206, 305)

In [702]:
#print each neighborhood along with the top 5 most common venues
num_top_venues=5

for hood in oldtoronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    cond=oldtoronto_grouped['Neighborhood']==hood
    temp=oldtoronto_grouped[cond].T.reset_index() # T transpose index and columns
    temp.columns=['venue','freq']
    temp=temp.iloc[1:]
    temp['freq']=temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Birch Cliff Heights----
             venue  freq
0             Bank  0.25
1  Thai Restaurant  0.25
2            Diner  0.25
3   Discount Store  0.25
4  Other Nightlife  0.00


----Agincourt----
                           venue  freq
0                           Park   1.0
1                            ATM   0.0
2                Organic Grocery   0.0
3                    Pastry Shop   0.0
4  Paper / Office Supplies Store   0.0


----Alderwood----
                venue  freq
0          Playground  0.25
1  Athletics & Sports  0.25
2                Park  0.25
3              Market  0.25
4           Nightclub  0.00


----Alexandra Park----
                           venue  freq
0  Vegetarian / Vegan Restaurant  0.06
1                            Bar  0.06
2                           Café  0.05
3                    Coffee Shop  0.04
4                   Dessert Shop  0.03


----Amesbury----
                       venue  freq
0                Supermarket  0.33
1              Grocery Store  0

                venue  freq
0  Italian Restaurant  0.08
1      Sandwich Place  0.08
2         Coffee Shop  0.08
3        Dessert Shop  0.08
4                Café  0.08


----Deer Park----
                venue  freq
0         Coffee Shop  0.13
1  Italian Restaurant  0.07
2                Café  0.05
3                 Pub  0.04
4      Sandwich Place  0.04


----Discovery District----
                venue  freq
0         Coffee Shop  0.17
1                Café  0.07
2  Italian Restaurant  0.05
3     Bubble Tea Shop  0.04
4                 Bar  0.04


----Distillery District----
           venue  freq
0    Coffee Shop  0.11
1         Bakery  0.07
2           Café  0.05
3        Theater  0.05
4  Boat or Ferry  0.05


----Don Mills----
                 venue  freq
0           Restaurant  0.06
1  American Restaurant  0.06
2          Coffee Shop  0.06
3       Sandwich Place  0.03
4         Burger Joint  0.03


----Don Valley Village----
                  venue  freq
0  Fast Food Restaurant  0

             venue  freq
0     Hockey Arena  0.25
1       Restaurant  0.25
2         Tea Room  0.25
3  Organic Grocery  0.00
4             Park  0.00


----Humberwood----
                           venue  freq
0               Business Service  0.25
1                Organic Grocery  0.00
2                    Pastry Shop  0.00
3                           Park  0.00
4  Paper / Office Supplies Store  0.00


----Humewood–Cedarvale----
               venue  freq
0       Hockey Arena  0.25
1  Convenience Store  0.25
2              Trail  0.25
3                ATM  0.00
4        Pastry Shop  0.00


----Ionview----
                venue  freq
0         Coffee Shop  0.29
1                Bank  0.14
2  Chinese Restaurant  0.14
3       Grocery Store  0.14
4      Sandwich Place  0.14


----Islington–City Centre West----
               venue  freq
0   Sushi Restaurant   0.2
1  Korean Restaurant   0.1
2        Pizza Place   0.1
3     Sandwich Place   0.1
4          BBQ Joint   0.1


----Jamaica----
 

                       venue  freq
0                        Gym  0.11
1                      Hotel  0.05
2           Greek Restaurant  0.05
3  Middle Eastern Restaurant  0.05
4             Clothing Store  0.05


----North York City Centre----
              venue  freq
0       Coffee Shop  0.08
1  Ramen Restaurant  0.06
2    Sandwich Place  0.04
3              Café  0.04
4        Restaurant  0.04


----O'Connor–Parkview----
                  venue  freq
0           Pizza Place  0.18
1          Intersection  0.09
2             Gastropub  0.09
3        Breakfast Spot  0.09
4  Fast Food Restaurant  0.09


----Oakridge----
                  venue  freq
0            Restaurant  0.14
1    Chinese Restaurant  0.14
2          Dessert Shop  0.14
3     Convenience Store  0.14
4  Fast Food Restaurant  0.14


----Oakwood–Vaughan----
                venue  freq
0       Grocery Store   0.2
1  Italian Restaurant   0.2
2         Coffee Shop   0.2
3          Beer Store   0.2
4  Seafood Restaurant   0.2


                venue  freq
0  Italian Restaurant  0.15
1                Park  0.05
2    Sushi Restaurant  0.05
3         Coffee Shop  0.05
4                 Spa  0.05


----Sunnylea----
                    venue  freq
0             Coffee Shop  0.19
1      Italian Restaurant  0.05
2            Liquor Store  0.05
3  Furniture / Home Store  0.05
4        Sushi Restaurant  0.05


----Swansea----
                  venue  freq
0                  Park  0.50
1          Dance Studio  0.25
2          Skating Rink  0.25
3                   ATM  0.00
4  Other Great Outdoors  0.00


----Tam O'Shanter – Sullivan----
                   venue  freq
0  Vietnamese Restaurant  0.29
1   Taiwanese Restaurant  0.14
2       Greek Restaurant  0.14
3   Caribbean Restaurant  0.14
4     Mexican Restaurant  0.14


----The Annex----
         venue  freq
0         Park  0.11
1          Pub  0.06
2          Gym  0.06
3   Restaurant  0.06
4  Pizza Place  0.06


----The Beaches----
                 venue  freq
0    

Put previous data in a dataframe

In [703]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


In [716]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = oldtoronto_grouped['Neighborhood']

for ind in np.arange(oldtoronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(oldtoronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Birch Cliff Heights,Bank,Thai Restaurant,Discount Store,Diner,Yoga Studio,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market
1,Agincourt,Park,Yoga Studio,Fast Food Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Filipino Restaurant
2,Alderwood,Athletics & Sports,Market,Playground,Park,Yoga Studio,Farmers Market,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
3,Alexandra Park,Vegetarian / Vegan Restaurant,Bar,Café,Coffee Shop,Cocktail Bar,Dessert Shop,French Restaurant,Ice Cream Shop,Sandwich Place,Restaurant
4,Amesbury,Supermarket,South American Restaurant,Grocery Store,Yoga Studio,Farmers Market,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm


## Cluster Neighborhoods
run k_means to cluster the neighborhood into 5 clusters

In [717]:
kclusters=5

oldtoronto_grouped_clustering=oldtoronto_grouped.drop('Neighborhood', 1)

#run k-means clustering

kmeans=KMeans(init='k-means++', n_clusters=kclusters, n_init=15, random_state=0)
kmeans.fit(oldtoronto_grouped_clustering)

kmeans.labels_

array([0, 1, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,
       0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0])

In [718]:
#create df that includes the cluster as well as the top 10 venues for each neighborhood
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
oldtoronto_merged=oldtoronto_data
oldtoronto_merged=oldtoronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

oldtoronto_merged.head() 

Unnamed: 0,District,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Old Toronto,Alexandra Park,43.65,-79.4,0.0,Vegetarian / Vegan Restaurant,Bar,Café,Coffee Shop,Cocktail Bar,Dessert Shop,French Restaurant,Ice Cream Shop,Sandwich Place,Restaurant
1,Old Toronto,The Annex,43.67,-79.404,0.0,Park,Gym,Coffee Shop,Thai Restaurant,Restaurant,Pizza Place,Pub,Bubble Tea Shop,Clothing Store,Falafel Restaurant
2,Old Toronto,Baldwin Village,43.656,-79.3934,0.0,Coffee Shop,Sandwich Place,Café,Chinese Restaurant,Ramen Restaurant,Japanese Restaurant,Bar,Arts & Crafts Store,Bubble Tea Shop,Art Gallery
3,Old Toronto,Cabbagetown,43.6664,-79.3629,0.0,Coffee Shop,Thai Restaurant,Park,Bakery,Baseball Field,Sandwich Place,General Entertainment,Beer Store,Taiwanese Restaurant,Liquor Store
4,Old Toronto,CityPlace,43.640044,-79.395179,0.0,Coffee Shop,Gym,Park,Café,Pizza Place,Japanese Restaurant,Grocery Store,Pub,French Restaurant,Diner


In [719]:
#Let's visualize the resulting clusters

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(oldtoronto_merged['Latitude'], oldtoronto_merged['Longitude'], oldtoronto_merged['Neighborhood'], kmeans.labels_):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 5. Examine Clusters
examine each cluster and determine the discriminating venue categories that distinguish each cluster

In [720]:
#Cluster 1
oldtoronto_merged.loc[oldtoronto_merged['Cluster Labels'] == 0, oldtoronto_merged.columns[[1] + list(range(5, oldtoronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Alexandra Park,Vegetarian / Vegan Restaurant,Bar,Café,Coffee Shop,Cocktail Bar,Dessert Shop,French Restaurant,Ice Cream Shop,Sandwich Place,Restaurant
1,The Annex,Park,Gym,Coffee Shop,Thai Restaurant,Restaurant,Pizza Place,Pub,Bubble Tea Shop,Clothing Store,Falafel Restaurant
2,Baldwin Village,Coffee Shop,Sandwich Place,Café,Chinese Restaurant,Ramen Restaurant,Japanese Restaurant,Bar,Arts & Crafts Store,Bubble Tea Shop,Art Gallery
3,Cabbagetown,Coffee Shop,Thai Restaurant,Park,Bakery,Baseball Field,Sandwich Place,General Entertainment,Beer Store,Taiwanese Restaurant,Liquor Store
4,CityPlace,Coffee Shop,Gym,Park,Café,Pizza Place,Japanese Restaurant,Grocery Store,Pub,French Restaurant,Diner
5,Chinatown,Café,Bar,Vegetarian / Vegan Restaurant,Mexican Restaurant,Dumpling Restaurant,Ramen Restaurant,Dessert Shop,Bakery,Vietnamese Restaurant,Chinese Restaurant
6,Church and Wellesley,Coffee Shop,Gay Bar,Japanese Restaurant,Sushi Restaurant,Restaurant,Ramen Restaurant,Dance Studio,Nightclub,Men's Store,Fast Food Restaurant
7,Corktown,Coffee Shop,Pub,Breakfast Spot,Park,Restaurant,Thai Restaurant,Beer Store,Spa,Café,Hotel
8,Discovery District,Coffee Shop,Café,Italian Restaurant,Bubble Tea Shop,Bar,Ice Cream Shop,Chinese Restaurant,Spa,Salad Place,Restaurant
9,Distillery District,Coffee Shop,Bakery,Café,Boat or Ferry,Theater,Tech Startup,Farmers Market,Bank,Shoe Store,Pub


In [721]:
#Cluster 2
oldtoronto_merged.loc[oldtoronto_merged['Cluster Labels'] == 1, oldtoronto_merged.columns[[1] + list(range(5, oldtoronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
52,Moore Park,Park,Yoga Studio,Fast Food Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Filipino Restaurant
57,Wanless Park,Park,Yoga Studio,Fast Food Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Filipino Restaurant
88,Swansea,Park,Skating Rink,Dance Studio,Farmers Market,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm


In [722]:
#Cluster 3
oldtoronto_merged.loc[oldtoronto_merged['Cluster Labels'] == 2, oldtoronto_merged.columns[[1] + list(range(5, oldtoronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
50,Lytton Park,Skating Rink,Spa,Health Food Store,Park,Flea Market,Fish Market,Fish & Chips Shop,Filipino Restaurant,Fast Food Restaurant,Flower Shop
54,Rosedale,Park,Trail,Playground,Yoga Studio,Farmers Market,Egyptian Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
55,South Hill,Park,French Restaurant,History Museum,Athletics & Sports,Historic Site,Skating Rink,Falafel Restaurant,Food & Drink Shop,Egyptian Restaurant,Electronics Store
62,Bracondale Hill,Park,Event Space,Café,Farmers Market,Fast Food Restaurant,Electronics Store,Ethiopian Restaurant,Falafel Restaurant,Farm,Yoga Studio


In [723]:
#Cluster 4
oldtoronto_merged.loc[oldtoronto_merged['Cluster Labels'] == 3, oldtoronto_merged.columns[[1] + list(range(5, oldtoronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


In [724]:
#Cluster 5
oldtoronto_merged.loc[oldtoronto_merged['Cluster Labels'] == 4, oldtoronto_merged.columns[[1] + list(range(5, oldtoronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
