Import the necessary libraries

In [None]:
!conda install -c conda-forge folium=0.5.0 --yes

In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import json
from geopy.geocoders import Nominatim 
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

Get HTML document behind the URL of the Wikipedia page

In [3]:
r = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M",'html.parser')

Parse and process the html document, extracting the table and the td tags, define list of postcodes, boroughs, neighborhoods

In [4]:
soup = BeautifulSoup(r.text[:], 'html.parser')

In [5]:
tag = soup.table

In [6]:
tables = soup.table

In [7]:
tds = tables.find_all('td')

In [8]:
postcode = tds[0::3]
postcode = list(map(lambda i:str(i)[4:7],postcode))


In [9]:
borough = tds[1::3]
borough = list(map(lambda i:str(i).split('>')[2].split('<')[0], borough))


In [10]:
neighborhood = tds[2::3]
neighborhood = list(map(lambda i:str(i).split('>')[2].split('<')[0], neighborhood))


Process the lists into a Dataframe, organize column order, delete rows with N/A borough, copy borough name for rows with N/A neighborhood

In [11]:

df = pd.DataFrame({'PostalCode':postcode, 'Borough':borough, 'Neighborhood':neighborhood} )
df = df.drop(df[df['Borough']==''].index)
mask = df['Neighborhood'] == ''
df.loc[mask, 'Neighborhood'] = df.loc[mask,'Borough']
df.reset_index(drop = True, inplace = True)
cols = df.columns.tolist()
cols = [cols[2], cols[0], cols[1]]
df = df[cols]
#Combine neighborhoods with same PostalCodes
df = df.groupby(['PostalCode','Borough'])['Neighborhood'].unique().apply(', '.join).reset_index()

Get the shape of the dataframe

In [12]:
df.shape

(100, 3)

Read lats and longs into dataframe from csv, remove non-matching codes, and create final dataframe

In [13]:
df1 = pd.read_csv('https://cocl.us/Geospatial_data')

df2 = df1.drop([73,74,84]).reset_index()
df2['PostalCode'] = df2['Postal Code']

df2.drop(columns = ['Postal Code', 'index'], inplace = True)

cols = ['PostalCode', 'Latitude', 'Longitude']
df2 = df2[cols]
df3 = df.merge(df2, on = 'PostalCode')

pd.set_option('display.max_rows', 30)

df3

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Scarborough, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Scarborough, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Scarborough",43.692657,-79.264848


In [14]:
pd.set_option('display.max_rows', 130)

In [15]:
toronto = df3[df3['Borough'] == 'East Toronto']
toronto = toronto.append([df3[df3['Borough'] == 'West Toronto'],df3[df3['Borough'] == 'Central Toronto'],df3[df3['Borough'] == 'Downtown Toronto']]).reset_index()

Get coordinates of Toronto

In [16]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [17]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [18]:
# The code was removed by Watson Studio for sharing.

In [19]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius,
            LIMIT) 
            
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [20]:
toronto_venues = getNearbyVenues(names=toronto['Neighborhood'],
                                   latitudes=toronto['Latitude'],
                                   longitudes=toronto['Longitude']
                                  )


The Beaches
East Toronto, Riverdale
East Toronto, India Bazaar
East Toronto
East Toronto
Dovercourt Village, West Toronto
Little Portugal, Trinity
West Toronto, Exhibition Place, Parkdale Village
High Park, West Toronto
Parkdale, Roncesvalles
Lawrence Park
Central Toronto
Central Toronto
Central Toronto
Moore Park, Central Toronto
Deer Park, Central Toronto, Rathnelly, South Hill
Central Toronto
Forest Hill North, Central Toronto
The Annex, Central Toronto, Yorkville
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Ryerson, Downtown Toronto
St. James Town
Berczy Park
Downtown Toronto
Adelaide, King, Downtown Toronto
Downtown Toronto, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Downtown Toronto
Downtown Toronto, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Downtown Toronto, King and Spadina, Railway Lands, South Niagara
Downtown Toronto
First Canadian Place, Underground city
Downt

In [21]:
print(toronto_venues.shape)
toronto_venues.head()

(802, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
1,The Beaches,43.676357,-79.293031,Starbucks,43.678798,-79.298045,Coffee Shop
2,The Beaches,43.676357,-79.293031,Guru Raghavendra Ji,43.680187,-79.292337,Astrologer
3,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.67873,-79.297478,Grocery Store
4,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood


Check how many venues were returned for each Neighborhood

In [22]:
toronto_venues.groupby('Neighborhood').count().head()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Downtown Toronto",30,30,30,30,30,30
Berczy Park,30,30,30,30,30,30
"CN Tower, Downtown Toronto, King and Spadina, Railway Lands, South Niagara",14,14,14,14,14,14
"Cabbagetown, St. James Town",30,30,30,30,30,30
Central Toronto,59,59,59,59,59,59


Analyze the neighborhoods

In [23]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
toronto_onehot.shape

(802, 187)

In [25]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar
0,"Adelaide, King, Downtown Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"CN Tower, Downtown Toronto, King and Spadina, ...",0.0,0.0,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Toronto,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.016949,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
toronto_grouped.shape

(31, 187)

Sort venues descending

In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Create a DF of top 10 venues for each neighborhood

In [28]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Downtown Toronto",Steakhouse,Asian Restaurant,American Restaurant,Hotel,Café,Opera House,Coffee Shop,Concert Hall,Plaza,Speakeasy
1,Berczy Park,Cocktail Bar,Café,Bakery,Seafood Restaurant,Farmers Market,Comfort Food Restaurant,Concert Hall,Liquor Store,Pub,Italian Restaurant
2,"CN Tower, Downtown Toronto, King and Spadina, ...",Airport Service,Airport Terminal,Airport Lounge,Plane,Boat or Ferry,Boutique,Harbor / Marina,Sculpture Garden,Airport Gate,Airport
3,"Cabbagetown, St. James Town",Restaurant,Italian Restaurant,Café,Coffee Shop,Japanese Restaurant,Butcher,Deli / Bodega,Jewelry Store,Pub,General Entertainment
4,Central Toronto,Sandwich Place,Coffee Shop,Dessert Shop,Sporting Goods Shop,Park,Seafood Restaurant,Sushi Restaurant,Café,Pizza Place,Gym


Cluster the neighborhoods

In [29]:
# set number of clusters
kclusters = 6

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 3, 3, 1, 3, 3, 3, 1, 3, 1], dtype=int32)

In [30]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto

# merge toronto_grouped with toronto to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head(15) # check the last columns!

Unnamed: 0,index,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,37,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,Grocery Store,Coffee Shop,Pub,Astrologer,Wine Bar,Coworking Space,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop
1,41,M4K,East Toronto,"East Toronto, Riverdale",43.679557,-79.352188,3,Greek Restaurant,Ice Cream Shop,Italian Restaurant,Indian Restaurant,Brewery,Pub,Bubble Tea Shop,Dessert Shop,Cosmetics Shop,Health Food Store
2,42,M4L,East Toronto,"East Toronto, India Bazaar",43.668999,-79.315572,3,Sandwich Place,Park,Fish & Chips Shop,Board Shop,Liquor Store,Italian Restaurant,Brewery,Ice Cream Shop,Pub,Burger Joint
3,43,M4M,East Toronto,East Toronto,43.659526,-79.340923,3,Café,Coffee Shop,Light Rail Station,Bakery,Italian Restaurant,Park,American Restaurant,Coworking Space,Ice Cream Shop,Sandwich Place
4,83,M7Y,East Toronto,East Toronto,43.662744,-79.321558,3,Café,Coffee Shop,Light Rail Station,Bakery,Italian Restaurant,Park,American Restaurant,Coworking Space,Ice Cream Shop,Sandwich Place
5,74,M6H,West Toronto,"Dovercourt Village, West Toronto",43.669005,-79.442259,3,Pharmacy,Supermarket,Bakery,Park,Music Venue,Pizza Place,Café,Pool,Middle Eastern Restaurant,Brewery
6,75,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975,3,Bar,Pizza Place,Asian Restaurant,Vietnamese Restaurant,Wine Bar,French Restaurant,Record Shop,New American Restaurant,Korean Restaurant,Ice Cream Shop
7,76,M6K,West Toronto,"West Toronto, Exhibition Place, Parkdale Village",43.636847,-79.428191,3,Breakfast Spot,Coffee Shop,Café,Climbing Gym,Stadium,Burrito Place,Convenience Store,Caribbean Restaurant,Bar,Gym
8,80,M6P,West Toronto,"High Park, West Toronto",43.661608,-79.464763,3,Café,Mexican Restaurant,Cajun / Creole Restaurant,Sandwich Place,Diner,Italian Restaurant,Bakery,Speakeasy,Fast Food Restaurant,Steakhouse
9,81,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325,1,Gift Shop,Breakfast Spot,Italian Restaurant,Movie Theater,Burger Joint,Cuban Restaurant,Restaurant,Bookstore,Dessert Shop,Dog Run


In [31]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters