In [10]:
import numpy as np
import pandas as pd

import json 
from geopy.geocoders import Nominatim 
import requests 
from pandas.io.json import json_normalize 

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans
import folium
from bs4 import BeautifulSoup

<h3>Store the wikipedia link as a Soup object<h3>

In [11]:
url = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(url.text, "html.parser")

In [12]:
rows = table.find_all("tr")
columns = [v.text.replace("\n", "") for v in rows[0].find_all("th")]
print(columns)

['Postcode', 'Borough', 'Neighbourhood']


In [19]:

table = soup.tbody

headers = []
rows = []
i=-1

for head in table.find_all('th'):
    head = head.text.strip()
    headers.append(head)

for row in table.find_all('tr'):
    i=i+1
    rows.append([])
    for r in row.find_all('td'):
        r = r.text.strip()
        rows[i].append(r)

i=-1
for row in rows:
    i+=1
    for item in row:
        if item == 'Not assigned':
            item = row[1]
            rows[i][2]=item

df = pd.DataFrame(data=rows, columns=headers)

df=df.drop([0], axis=0)

df = df[~df['Borough'].str.contains("Not assigned") == True]

can_df=df.groupby(['Postcode','Borough'], as_index=False).agg(lambda col: ', '.join(col))

can_df.rename(columns ={'Postcode' : 'PostalCode'}, inplace=True)
can_df

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


In [21]:
can_df.shape

(103, 3)

<h3>Adding latitudes and logitudes<h3>

In [24]:
import requests
import io

url="http://cocl.us/Geospatial_data"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

c.columns = ['Postcode', 'Latitude', 'Longitude']
can_df = pd.merge(c, df, on='Postcode')

can_df = can_df[['Postcode', 'Borough', 'Neighbourhood', 'Latitude', 'Longitude']]
can_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,Rouge,43.806686,-79.194353
1,M1B,Scarborough,Malvern,43.806686,-79.194353
2,M1C,Scarborough,Highland Creek,43.784535,-79.160497
3,M1C,Scarborough,Rouge Hill,43.784535,-79.160497
4,M1C,Scarborough,Port Union,43.784535,-79.160497


In [26]:
tor_df = can_df[can_df['Borough'].str.contains("Toronto") == True].reset_index(drop=True)
tor_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,The Danforth West,43.679557,-79.352188
2,M4K,East Toronto,Riverdale,43.679557,-79.352188
3,M4L,East Toronto,The Beaches West,43.668999,-79.315572
4,M4L,East Toronto,India Bazaar,43.668999,-79.315572


<h3>Getting coordinates of Toronto<h3>

In [27]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="ca_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


<h3>Generating Map<h3>

In [29]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

#add markers to map
for lat, lng, borough, neighbourhood in zip(tor_df['Latitude'], tor_df['Longitude'], tor_df['Borough'], tor_df['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup=label,
        color='purple',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto

<h3>Now using FourSqaure API<h3>

In [31]:
CLIENT_ID=''
CLIENT_SECRET=''

In [32]:
radius = 500
VERSION = 20190322
LIMIT = 100

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [33]:
toronto_venues = getNearbyVenues(names=tor_df['Neighbourhood'],
                                   latitudes=tor_df['Latitude'],
                                   longitudes=tor_df['Longitude']
                                  )
print('Complete')

The Beaches
The Danforth West
Riverdale
The Beaches West
India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park
Summerhill East
Deer Park
Forest Hill SE
Rathnelly
South Hill
Summerhill West
Rosedale
Cabbagetown
St. James Town
Church and Wellesley
Harbourfront
Ryerson
Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide
King
Richmond
Harbourfront East
Toronto Islands
Union Station
Design Exchange
Toronto Dominion Centre
Commerce Court
Victoria Hotel
Roselawn
Forest Hill North
Forest Hill West
The Annex
North Midtown
Yorkville
Harbord
University of Toronto
Chinatown
Grange Park
Kensington Market
CN Tower
Bathurst Quay
Island airport
Harbourfront West
King and Spadina
Railway Lands
South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place
Underground city
Christie
Dovercourt Village
Dufferin
Little Portugal
Trinity
Brockton
Exhibition Place
Parkdale Village
High Park
The Junction South
Parkdale
Roncesvalles
Runnymede

<h3>Examining DataFrame<h3>

In [34]:
print('Shape of dataframe: ', toronto_venues.shape)

Shape of dataframe:  (3245, 7)


In [35]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,100,100,100,100,100,100
Bathurst Quay,17,17,17,17,17,17
Berczy Park,56,56,56,56,56,56
Brockton,24,24,24,24,24,24
Business Reply Mail Processing Centre 969 Eastern,17,17,17,17,17,17
...,...,...,...,...,...,...
Underground city,100,100,100,100,100,100
Union Station,100,100,100,100,100,100
University of Toronto,35,35,35,35,35,35
Victoria Hotel,100,100,100,100,100,100


In [36]:
print('Number of unique categories: ', len(toronto_venues['Venue Category'].unique()))

Number of unique categories:  232


<h3>Encoding Categories<h3>

In [37]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()

In [38]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [39]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Restaurant,Thai Restaurant,Café,Bar,Sushi Restaurant,Bookstore,Seafood Restaurant,Asian Restaurant,Breakfast Spot
1,Bathurst Quay,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Coffee Shop,Airport,Airport Food Court,Airport Gate,Bar,Harbor / Marina
2,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Cheese Shop,Café,Restaurant,Seafood Restaurant,Beer Bar,Farmers Market,Gourmet Shop
3,Brockton,Café,Breakfast Spot,Coffee Shop,Yoga Studio,Gym,Pet Store,Performing Arts Venue,Nightclub,Japanese Restaurant,Italian Restaurant
4,Business Reply Mail Processing Centre 969 Eastern,Yoga Studio,Auto Workshop,Park,Pizza Place,Recording Studio,Restaurant,Burrito Place,Brewery,Light Rail Station,Skate Park


<h3>Performing Clustering Using k=3 in K-Mean<h3>

In [40]:
kclusters = 3

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

kmeans.labels_[0:40]

array([0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 2, 0, 0])

In [41]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = tor_df

toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')


In [42]:
toronto_merged.shape

(74, 16)

In [43]:
toronto_merged.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Trail,Neighborhood,Pub,Health Food Store,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
1,M4K,East Toronto,The Danforth West,43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Frozen Yogurt Shop,Pub,Pizza Place,Liquor Store
2,M4K,East Toronto,Riverdale,43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Frozen Yogurt Shop,Pub,Pizza Place,Liquor Store
3,M4L,East Toronto,The Beaches West,43.668999,-79.315572,0,Park,Pizza Place,Sushi Restaurant,Brewery,Fish & Chips Shop,Pub,Fast Food Restaurant,Italian Restaurant,Steakhouse,Movie Theater
4,M4L,East Toronto,India Bazaar,43.668999,-79.315572,0,Park,Pizza Place,Sushi Restaurant,Brewery,Fish & Chips Shop,Pub,Fast Food Restaurant,Italian Restaurant,Steakhouse,Movie Theater


<h3>Visualizing Clusters<h3>

In [45]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ', Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h3>Examining Clusters<h3>

In [47]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,The Beaches,0,Trail,Neighborhood,Pub,Health Food Store,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
1,The Danforth West,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Frozen Yogurt Shop,Pub,Pizza Place,Liquor Store
2,Riverdale,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Frozen Yogurt Shop,Pub,Pizza Place,Liquor Store
3,The Beaches West,0,Park,Pizza Place,Sushi Restaurant,Brewery,Fish & Chips Shop,Pub,Fast Food Restaurant,Italian Restaurant,Steakhouse,Movie Theater
4,India Bazaar,0,Park,Pizza Place,Sushi Restaurant,Brewery,Fish & Chips Shop,Pub,Fast Food Restaurant,Italian Restaurant,Steakhouse,Movie Theater
...,...,...,...,...,...,...,...,...,...,...,...,...
69,Roncesvalles,0,Breakfast Spot,Gift Shop,Movie Theater,Eastern European Restaurant,Dog Run,Italian Restaurant,Bar,Restaurant,Dessert Shop,Bookstore
70,Runnymede,0,Coffee Shop,Café,Italian Restaurant,Sushi Restaurant,Pizza Place,Electronics Store,IT Services,Fish & Chips Shop,Indie Movie Theater,Bookstore
71,Swansea,0,Coffee Shop,Café,Italian Restaurant,Sushi Restaurant,Pizza Place,Electronics Store,IT Services,Fish & Chips Shop,Indie Movie Theater,Bookstore
72,Queen's Park,0,Coffee Shop,Park,Burger Joint,Beer Bar,Sandwich Place,Burrito Place,Café,Portuguese Restaurant,Chinese Restaurant,College Auditorium


In [48]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
48,CN Tower,1,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Coffee Shop,Airport,Airport Food Court,Airport Gate,Bar,Harbor / Marina
49,Bathurst Quay,1,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Coffee Shop,Airport,Airport Food Court,Airport Gate,Bar,Harbor / Marina
50,Island airport,1,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Coffee Shop,Airport,Airport Food Court,Airport Gate,Bar,Harbor / Marina
51,Harbourfront West,1,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Coffee Shop,Airport,Airport Food Court,Airport Gate,Bar,Harbor / Marina
52,King and Spadina,1,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Coffee Shop,Airport,Airport Food Court,Airport Gate,Bar,Harbor / Marina
53,Railway Lands,1,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Coffee Shop,Airport,Airport Food Court,Airport Gate,Bar,Harbor / Marina
54,South Niagara,1,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Coffee Shop,Airport,Airport Food Court,Airport Gate,Bar,Harbor / Marina


In [49]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Moore Park,2,Playground,Trail,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
11,Summerhill East,2,Playground,Trail,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
17,Rosedale,2,Park,Playground,Trail,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
