In [1]:
import pandas as pd
import numpy as np

In [2]:
from bs4 import BeautifulSoup

In [3]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
url

'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [4]:
import requests

In [5]:
page = requests.get(url)

In [6]:
import lxml.html as lh

In [7]:
doc = lh.fromstring(page.content)
tr_elements = doc.xpath('//tr')

In [8]:
[len(T) for T in tr_elements[:12]]

[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]

In [9]:
tr_elements = doc.xpath('//tr')
col=[]
i=0
for t in tr_elements[0]:
    i+=1
    name=t.text_content()
    col.append((name,[]))

In [10]:
#Since out first row is the header, data is stored on the second row onwards
for j in range(1,len(tr_elements)):
    #T is our j'th row
    T=tr_elements[j]
    
    #If row is not of size 3, the //tr data is not from our table 
    if len(T)!=3:
        break
    
    #i is the index of our column
    i=0
    
    #Iterate through each element of the row
    for t in T.iterchildren():
        data=t.text_content() 
        #Check if row is empty
        if i>0:
        #Convert any numerical value to integers
            try:
                data=int(data)
            except:
                pass
        #Append the data to the empty list of the i'th column
        col[i][1].append(data)
        #Increment i for the next column
        i+=1

In [11]:
Dict={title:column for (title,column) in col}
df=pd.DataFrame(Dict)

In [12]:
df.head(5)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned\n
1,M2A,Not assigned,Not assigned\n
2,M3A,North York,Parkwoods\n
3,M4A,North York,Victoria Village\n
4,M5A,Downtown Toronto,Harbourfront\n


In [13]:
df.dropna(axis = 0, how = 'all')

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned\n
1,M2A,Not assigned,Not assigned\n
2,M3A,North York,Parkwoods\n
3,M4A,North York,Victoria Village\n
4,M5A,Downtown Toronto,Harbourfront\n
5,M5A,Downtown Toronto,Regent Park\n
6,M6A,North York,Lawrence Heights\n
7,M6A,North York,Lawrence Manor\n
8,M7A,Queen's Park,Not assigned\n
9,M8A,Not assigned,Not assigned\n


In [14]:
df.columns

Index(['Postcode', 'Borough', 'Neighbourhood\n'], dtype='object')

In [15]:
df.rename(index=str, columns={'Neighbourhood\n': 'Neighbourhood'}, inplace = True)

In [16]:
df['Neighbourhood'] = df['Neighbourhood'].map(lambda x: x.rstrip('\n'))
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [17]:
df2 = df.where(df['Borough'] != 'Not assigned')

In [18]:
df2.dropna(axis = 0, how = 'all', inplace = True)
df2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [19]:
for i in range(len(df2)):
    if df2.iloc[i].Neighbourhood == 'Not assigned':
        df2.iloc[i].Neighbourhood = df2.iloc[i].Borough
df2.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [20]:
df2.where(df2.Neighbourhood == 'Not assigned').count()

Postcode         0
Borough          0
Neighbourhood    0
dtype: int64

In [21]:
df3 = df2

In [22]:
for i in range(len(df3)):
    count = 0
    for j in range(len(df3)-1):
        if df3.iloc[i].Postcode == df3.iloc[j+1].Postcode:
            count = count + 1
        if count > 1:
            df3.iloc[i].Neighbourhood = df3.iloc[i].Neighbourhood+','+df3.iloc[j+1].Neighbourhood
df3.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Harbourfront,Regent Park,Lawrence Heights,Lawr..."
5,M5A,Downtown Toronto,"Regent Park,Regent Park,Lawrence Heights,Lawre..."
6,M6A,North York,"Lawrence Heights,Lawrence Manor,Queen's Park,I..."
7,M6A,North York,"Lawrence Manor,Lawrence Manor,Queen's Park,Isl..."
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,"Rouge,Malvern,Don Mills North,Woodbine Gardens..."
12,M1B,Scarborough,"Malvern,Malvern,Don Mills North,Woodbine Garde..."


In [23]:
df4 = df3

In [24]:
df4.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Harbourfront,Regent Park,Lawrence Heights,Lawr..."
5,M5A,Downtown Toronto,"Regent Park,Regent Park,Lawrence Heights,Lawre..."
6,M6A,North York,"Lawrence Heights,Lawrence Manor,Queen's Park,I..."


In [25]:
df4.drop_duplicates(subset = ['Postcode'], keep = 'first', inplace = True)
df4.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Harbourfront,Regent Park,Lawrence Heights,Lawr..."
6,M6A,North York,"Lawrence Heights,Lawrence Manor,Queen's Park,I..."
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,"Rouge,Malvern,Don Mills North,Woodbine Gardens..."
14,M3B,North York,Don Mills North
15,M4B,East York,"Woodbine Gardens,Parkview Hill,Ryerson,Garden ..."
17,M5B,Downtown Toronto,"Ryerson,Garden District,Glencairn,Cloverdale,I..."


In [45]:
geo_coordinates = pd.read_csv("http://cocl.us/Geospatial_data")

In [46]:
geo_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [47]:
geo_coordinates.columns = ["Postcode", "Latitude","Longitude"]

In [48]:
geo_coordinates.dtypes

Postcode      object
Latitude     float64
Longitude    float64
dtype: object

In [49]:
df4.dtypes

Postcode         object
Borough          object
Neighbourhood    object
dtype: object

In [50]:
df5 = df4.merge(geo_coordinates, how = 'left')
df5

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront,Regent Park,Lawrence Heights,Lawr...",43.654260,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor,Queen's Park,I...",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge,Malvern,Don Mills North,Woodbine Gardens...",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens,Parkview Hill,Ryerson,Garden ...",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson,Garden District,Glencairn,Cloverdale,I...",43.657162,-79.378937


In [51]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [52]:
address = 'Toronto, Ontario, Canada'

geolocator = Nominatim(user_agent="torono_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [53]:
import folium # map rendering library

In [54]:
neighborhoods = df5

In [55]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [56]:
# @hidden cell
CLIENT_ID = 'FHF145TU3DZAAT0UN0XDWLTROBCTIZQNP3RDP2CPP5MH5ACU' # your Foursquare ID
CLIENT_SECRET = '011IFEWPZLJJEW4CYOIT214ZYXWZVCBNBHXARTQBWYIUGV3W' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [57]:
neighborhoods.loc[0, 'Neighbourhood']

'Parkwoods'

In [58]:
neighborhood_latitude = neighborhoods.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = neighborhoods.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = neighborhoods.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


In [59]:
LIMIT = 100
latitude = neighborhood_latitude
longitude = neighborhood_longitude
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)

In [60]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5cea3d40351e3d1288a3de7a'},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 3,
  'suggestedBounds': {'ne': {'lat': 43.757758604500005,
    'lng': -79.32343823984928},
   'sw': {'lat': 43.7487585955, 'lng': -79.33587476015072}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 245,
        'cc': 'CA',
        'c

In [61]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [62]:
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

In [63]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,KFC,Fast Food Restaurant,43.754387,-79.333021
2,Variety Store,Food & Drink Shop,43.751974,-79.333114


In [64]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

3 venues were returned by Foursquare.


In [65]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [66]:
toronto_venues = getNearbyVenues(names=neighborhoods['Neighbourhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )

Parkwoods
Victoria Village
Harbourfront,Regent Park,Lawrence Heights,Lawrence Manor,Queen's Park,Islington Avenue,Rouge,Malvern,Don Mills North,Woodbine Gardens,Parkview Hill,Ryerson,Garden District,Glencairn,Cloverdale,Islington,Martin Grove,Princess Gardens,West Deane Park,Highland Creek,Rouge Hill,Port Union,Flemingdon Park,Don Mills South,Woodbine Heights,St. James Town,Humewood-Cedarvale,Bloordale Gardens,Eringate,Markland Wood,Old Burnhamthorpe,Guildwood,Morningside,West Hill,The Beaches,Berczy Park,Caledonia-Fairbanks,Woburn,Leaside,Central Bay Street,Christie,Cedarbrae,Hillcrest Village,Bathurst Manor,Downsview North,Wilson Heights,Thorncliffe Park,Adelaide,King,Richmond,Dovercourt Village,Dufferin,Scarborough Village,Fairview,Henry Farm,Oriole,Northwood Park,York University,East Toronto,Harbourfront East,Toronto Islands,Union Station,Little Portugal,Trinity,East Birchmount Park,Ionview,Kennedy Park,Bayview Village,CFB Toronto,Downsview East,The Danforth West,Riverdale,Design E

In [67]:
print(neighborhoods.shape)
neighborhoods.head()

(103, 5)


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront,Regent Park,Lawrence Heights,Lawr...",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor,Queen's Park,I...",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


In [68]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond,Dovercourt Village,Dufferin,Scarborough Village,Fairview,Henry Farm,Oriole,Northwood Park,York University,East Toronto,Harbourfront East,Toronto Islands,Union Station,Little Portugal,Trinity,East Birchmount Park,Ionview,Kennedy Park,Bayview Village,CFB Toronto,Downsview East,The Danforth West,Riverdale,Design Exchange,Toronto Dominion Centre,Brockton,Exhibition Place,Parkdale Village,Clairlea,Golden Mile,Oakridge,Silver Hills,York Mills,Downsview West,The Beaches West,India Bazaar,Commerce Court,Victoria Hotel,Downsview,North Park,Upwood Park,Humber Summit,Cliffcrest,Cliffside,Scarborough Village West,Newtonbrook,Willowdale,Downsview Central,Studio District,Bedford Park,Lawrence Manor East,Del Ray,Keelesdale,Mount Dennis,Silverthorn,Emery,Humberlea,Birch Cliff,Cliffside West,Willowdale South,Downsview Northwest,Lawrence Park,Roselawn,The Junction North,Runnymede,Weston,Dorset Park,Scarborough Town Centre,Wexford Heights,York Mills West,Davisville North,Forest Hill North,Forest Hill West,High Park,The Junction South,Westmount,Maryvale,Wexford,Willowdale West,North Toronto West,The Annex,North Midtown,Yorkville,Parkdale,Roncesvalles,Canada Post Gateway Processing Centre,Kingsview Village,Martin Grove Gardens,Richview Gardens,St. Phillips,Agincourt,Davisville,Harbord,University of Toronto,Runnymede,Swansea,Clarks Corners,Sullivan,Tam O'Shanter,Moore Park,Summerhill East,Chinatown,Grange Park,Kensington Market,Agincourt North,L'Amoreaux East,Milliken,Steeles East,Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West,CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara,Humber Bay Shores,Mimico South,New Toronto,Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown,L'Amoreaux West,Rosedale,Stn A PO Boxes 25 The Esplanade,Alderwood,Long Branch,Northwest,Upper Rouge,Cabbagetown,St. James Town,First Canadian Place,Underground city,The Kingsway,Montgomery Road,Old Mill North,Church and Wellesley,Business Reply Mail Processing Centre 969 Eastern,Humber Bay,King's Mill Park,Kingsway Park South East,Mimico NE,Old Mill South,The Queensway East,Royal York South East,Sunnylea,Kingsway Park South West,Mimico NW,The Queensway West,Royal York South West,South of Bloor",100,100,100,100,100,100
Agincourt,5,5,5,5,5,5
"Agincourt North,L'Amoreaux East,Milliken,Steeles East,Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West,CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara,Humber Bay Shores,Mimico South,New Toronto,Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown,L'Amoreaux West,Rosedale,Stn A PO Boxes 25 The Esplanade,Alderwood,Long Branch,Northwest,Upper Rouge,Cabbagetown,St. James Town,First Canadian Place,Underground city,The Kingsway,Montgomery Road,Old Mill North,Church and Wellesley,Business Reply Mail Processing Centre 969 Eastern,Humber Bay,King's Mill Park,Kingsway Park South East,Mimico NE,Old Mill South,The Queensway East,Royal York South East,Sunnylea,Kingsway Park South West,Mimico NW,The Queensway West,Royal York South West,South of Bloor",3,3,3,3,3,3
"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown,L'Amoreaux West,Rosedale,Stn A PO Boxes 25 The Esplanade,Alderwood,Long Branch,Northwest,Upper Rouge,Cabbagetown,St. James Town,First Canadian Place,Underground city,The Kingsway,Montgomery Road,Old Mill North,Church and Wellesley,Business Reply Mail Processing Centre 969 Eastern,Humber Bay,King's Mill Park,Kingsway Park South East,Mimico NE,Old Mill South,The Queensway East,Royal York South East,Sunnylea,Kingsway Park South West,Mimico NW,The Queensway West,Royal York South West,South of Bloor",9,9,9,9,9,9
"Alderwood,Long Branch,Northwest,Upper Rouge,Cabbagetown,St. James Town,First Canadian Place,Underground city,The Kingsway,Montgomery Road,Old Mill North,Church and Wellesley,Business Reply Mail Processing Centre 969 Eastern,Humber Bay,King's Mill Park,Kingsway Park South East,Mimico NE,Old Mill South,The Queensway East,Royal York South East,Sunnylea,Kingsway Park South West,Mimico NW,The Queensway West,Royal York South West,South of Bloor",9,9,9,9,9,9
"Bathurst Manor,Downsview North,Wilson Heights,Thorncliffe Park,Adelaide,King,Richmond,Dovercourt Village,Dufferin,Scarborough Village,Fairview,Henry Farm,Oriole,Northwood Park,York University,East Toronto,Harbourfront East,Toronto Islands,Union Station,Little Portugal,Trinity,East Birchmount Park,Ionview,Kennedy Park,Bayview Village,CFB Toronto,Downsview East,The Danforth West,Riverdale,Design Exchange,Toronto Dominion Centre,Brockton,Exhibition Place,Parkdale Village,Clairlea,Golden Mile,Oakridge,Silver Hills,York Mills,Downsview West,The Beaches West,India Bazaar,Commerce Court,Victoria Hotel,Downsview,North Park,Upwood Park,Humber Summit,Cliffcrest,Cliffside,Scarborough Village West,Newtonbrook,Willowdale,Downsview Central,Studio District,Bedford Park,Lawrence Manor East,Del Ray,Keelesdale,Mount Dennis,Silverthorn,Emery,Humberlea,Birch Cliff,Cliffside West,Willowdale South,Downsview Northwest,Lawrence Park,Roselawn,The Junction North,Runnymede,Weston,Dorset Park,Scarborough Town Centre,Wexford Heights,York Mills West,Davisville North,Forest Hill North,Forest Hill West,High Park,The Junction South,Westmount,Maryvale,Wexford,Willowdale West,North Toronto West,The Annex,North Midtown,Yorkville,Parkdale,Roncesvalles,Canada Post Gateway Processing Centre,Kingsview Village,Martin Grove Gardens,Richview Gardens,St. Phillips,Agincourt,Davisville,Harbord,University of Toronto,Runnymede,Swansea,Clarks Corners,Sullivan,Tam O'Shanter,Moore Park,Summerhill East,Chinatown,Grange Park,Kensington Market,Agincourt North,L'Amoreaux East,Milliken,Steeles East,Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West,CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara,Humber Bay Shores,Mimico South,New Toronto,Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown,L'Amoreaux West,Rosedale,Stn A PO Boxes 25 The Esplanade,Alderwood,Long Branch,Northwest,Upper Rouge,Cabbagetown,St. James Town,First Canadian Place,Underground city,The Kingsway,Montgomery Road,Old Mill North,Church and Wellesley,Business Reply Mail Processing Centre 969 Eastern,Humber Bay,King's Mill Park,Kingsway Park South East,Mimico NE,Old Mill South,The Queensway East,Royal York South East,Sunnylea,Kingsway Park South West,Mimico NW,The Queensway West,Royal York South West,South of Bloor",18,18,18,18,18,18
Bayview Village,4,4,4,4,4,4
"Bedford Park,Lawrence Manor East,Del Ray,Keelesdale,Mount Dennis,Silverthorn,Emery,Humberlea,Birch Cliff,Cliffside West,Willowdale South,Downsview Northwest,Lawrence Park,Roselawn,The Junction North,Runnymede,Weston,Dorset Park,Scarborough Town Centre,Wexford Heights,York Mills West,Davisville North,Forest Hill North,Forest Hill West,High Park,The Junction South,Westmount,Maryvale,Wexford,Willowdale West,North Toronto West,The Annex,North Midtown,Yorkville,Parkdale,Roncesvalles,Canada Post Gateway Processing Centre,Kingsview Village,Martin Grove Gardens,Richview Gardens,St. Phillips,Agincourt,Davisville,Harbord,University of Toronto,Runnymede,Swansea,Clarks Corners,Sullivan,Tam O'Shanter,Moore Park,Summerhill East,Chinatown,Grange Park,Kensington Market,Agincourt North,L'Amoreaux East,Milliken,Steeles East,Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West,CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara,Humber Bay Shores,Mimico South,New Toronto,Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown,L'Amoreaux West,Rosedale,Stn A PO Boxes 25 The Esplanade,Alderwood,Long Branch,Northwest,Upper Rouge,Cabbagetown,St. James Town,First Canadian Place,Underground city,The Kingsway,Montgomery Road,Old Mill North,Church and Wellesley,Business Reply Mail Processing Centre 969 Eastern,Humber Bay,King's Mill Park,Kingsway Park South East,Mimico NE,Old Mill South,The Queensway East,Royal York South East,Sunnylea,Kingsway Park South West,Mimico NW,The Queensway West,Royal York South West,South of Bloor",25,25,25,25,25,25
Berczy Park,55,55,55,55,55,55
"Birch Cliff,Cliffside West,Willowdale South,Downsview Northwest,Lawrence Park,Roselawn,The Junction North,Runnymede,Weston,Dorset Park,Scarborough Town Centre,Wexford Heights,York Mills West,Davisville North,Forest Hill North,Forest Hill West,High Park,The Junction South,Westmount,Maryvale,Wexford,Willowdale West,North Toronto West,The Annex,North Midtown,Yorkville,Parkdale,Roncesvalles,Canada Post Gateway Processing Centre,Kingsview Village,Martin Grove Gardens,Richview Gardens,St. Phillips,Agincourt,Davisville,Harbord,University of Toronto,Runnymede,Swansea,Clarks Corners,Sullivan,Tam O'Shanter,Moore Park,Summerhill East,Chinatown,Grange Park,Kensington Market,Agincourt North,L'Amoreaux East,Milliken,Steeles East,Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West,CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara,Humber Bay Shores,Mimico South,New Toronto,Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown,L'Amoreaux West,Rosedale,Stn A PO Boxes 25 The Esplanade,Alderwood,Long Branch,Northwest,Upper Rouge,Cabbagetown,St. James Town,First Canadian Place,Underground city,The Kingsway,Montgomery Road,Old Mill North,Church and Wellesley,Business Reply Mail Processing Centre 969 Eastern,Humber Bay,King's Mill Park,Kingsway Park South East,Mimico NE,Old Mill South,The Queensway East,Royal York South East,Sunnylea,Kingsway Park South West,Mimico NW,The Queensway West,Royal York South West,South of Bloor",4,4,4,4,4,4


In [69]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 281 uniques categories.


In [70]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [71]:
toronto_onehot.shape

(2246, 282)

In [72]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide,King,Richmond,Dovercourt Village,Duff...",0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.010000,0.000000,0.000000,0.000000,0.000000,0.010000,0.000000,0.000000,0.000000
1,Agincourt,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,"Alderwood,Long Branch,Northwest,Upper Rouge,Ca...",0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,"Bathurst Manor,Downsview North,Wilson Heights,...",0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.055556,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6,Bayview Village,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7,"Bedford Park,Lawrence Manor East,Del Ray,Keele...",0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8,Berczy Park,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.018182,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
9,"Birch Cliff,Cliffside West,Willowdale South,Do...",0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [73]:
toronto_grouped.shape

(100, 282)

In [74]:
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond,Dovercourt Village,Dufferin,Scarborough Village,Fairview,Henry Farm,Oriole,Northwood Park,York University,East Toronto,Harbourfront East,Toronto Islands,Union Station,Little Portugal,Trinity,East Birchmount Park,Ionview,Kennedy Park,Bayview Village,CFB Toronto,Downsview East,The Danforth West,Riverdale,Design Exchange,Toronto Dominion Centre,Brockton,Exhibition Place,Parkdale Village,Clairlea,Golden Mile,Oakridge,Silver Hills,York Mills,Downsview West,The Beaches West,India Bazaar,Commerce Court,Victoria Hotel,Downsview,North Park,Upwood Park,Humber Summit,Cliffcrest,Cliffside,Scarborough Village West,Newtonbrook,Willowdale,Downsview Central,Studio District,Bedford Park,Lawrence Manor East,Del Ray,Keelesdale,Mount Dennis,Silverthorn,Emery,Humberlea,Birch Cliff,Cliffside West,Willowdale South,Downsview Northwest,Lawrence Park,Roselawn,The Junction North,Runnymede,Weston,Dorset Park,Scarborough Town Centre,Wexford Heights,York Mills West,Davisville North,Forest

In [75]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [76]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond,Dovercourt Village,Duff...",Coffee Shop,Café,Thai Restaurant,Steakhouse,Bar,American Restaurant,Hotel,Cosmetics Shop,Bakery,Burger Joint
1,Agincourt,Breakfast Spot,Lounge,Sandwich Place,Clothing Store,Chinese Restaurant,Yoga Studio,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Park,Playground,Yoga Studio,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Grocery Store,Pharmacy,Beer Store,Sandwich Place,Fried Chicken Joint,Coffee Shop,Fast Food Restaurant,Pizza Place,Construction & Landscaping,Comic Shop
4,"Alderwood,Long Branch,Northwest,Upper Rouge,Ca...",Pizza Place,Skating Rink,Gym,Pharmacy,Coffee Shop,Pub,Pool,Sandwich Place,Yoga Studio,Dog Run


In [77]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

In [78]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 2, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [79]:
kmeans.labels_.shape

(100,)

In [80]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [None]:
neighborhoods_venues_sorted

In [None]:
# add clustering labels
toronto_merged = neighborhoods

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

In [None]:
address = 'Toronto, Ontario, Canada'

geolocator = Nominatim(user_agent="torono_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

In [None]:
toronto_merged.head() # check the last columns!

In [None]:
toronto_merged = toronto_merged.fillna(value={'Cluster Labels': 6})

In [None]:
import matplotlib.cm as cm
import matplotlib.colors as colors
import math

In [None]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels'].astype(int)):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [None]:
toronto_merged['Cluster Labels']