In [1]:
import random # library for random number generation
import numpy as np # library for vectorized computation
import pandas as pd # library to process data as dataframes

import matplotlib.pyplot as plt # plotting library
# backend for rendering plots within the browser
%matplotlib inline 

from sklearn.cluster import KMeans 
from sklearn.datasets.samples_generator import make_blobs

In [2]:
pip install beautifulsoup4

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install lxml

Note: you may need to restart the kernel to use updated packages.


In [4]:
from bs4 import BeautifulSoup
import requests

# Data
### We obtain the postal codes of the Greater Vancouver Region from public sources along with the GPS coordinates
### We will then compare this information using Foursquare API to see the types and numbers of businesses in each of the neighbourhood.

# Methodology
#### 1. put the postal codes into dataframe 
#### 2. pull the foursquare data using the coordinates of 100 business within 500 meters
#### 3. compare the neighbourhoods and group those.
#### 4. cluster those neighbourhoods by similar business establishment and map the clusters. 
#### 5. investigate the clusters


In [5]:
df = pd.read_csv("vancouver coordinates 2.csv") 

In [6]:
df.head()

Unnamed: 0,V3A,Langley Township,(Langley City)-Langley Township,49.100002,-122.657128
0,V4A,Surrey,Southwest-Surrey,49.044655,-122.869163
1,V5A,Burnaby,(Government Road / Lake City / SFU / Burnaby M...,49.276301,-122.946971
2,V6A,Vancouver,(Strathcona / Chinatown / Downtown Eastside)- ...,49.279935,-123.090704
3,V7A,Richmond,South- Richmond,49.134223,-123.099148
4,V3B,Port Coquitlam,Central- Port Coquitlam,49.259167,-122.746993


In [7]:
df.columns=["Postal Code", "City", "Neighbourhood", "Latitude","Longitude"]

In [8]:
df.head()

Unnamed: 0,Postal Code,City,Neighbourhood,Latitude,Longitude
0,V4A,Surrey,Southwest-Surrey,49.044655,-122.869163
1,V5A,Burnaby,(Government Road / Lake City / SFU / Burnaby M...,49.276301,-122.946971
2,V6A,Vancouver,(Strathcona / Chinatown / Downtown Eastside)- ...,49.279935,-123.090704
3,V7A,Richmond,South- Richmond,49.134223,-123.099148
4,V3B,Port Coquitlam,Central- Port Coquitlam,49.259167,-122.746993


In [9]:
df.shape

(89, 5)

In [10]:
df.tail()

Unnamed: 0,Postal Code,City,Neighbourhood,Latitude,Longitude
84,V7Y,Vancouver,(Pacific Centre)-Vancouver,49.282728,-123.118463
85,V2Z,Langley Township,Southwest-Langley Township,49.065755,-122.582949
86,V3Z,Surrey,Lower East-Surrey,49.048837,-122.693318
87,V5Z,Vancouver,(East Fairview / South Cambie)-Vancouver,49.233483,-123.120701
88,V6Z,Vancouver,(SW Downtown)-Vancouver,49.275944,-123.131166


In [11]:
vancouver_data=df

In [12]:
CLIENT_ID = 'PNBZRK0AATNMVG3M1CVPRL2IYCPPD5VI52ROWJKW53OZDUXM' # Foursquare ID
CLIENT_SECRET = 'IDM0XD24USYGN05IGYR5VI35ELC1UCWUA11JM3MF3AUDLVR3' #  Foursquare Secret
VERSION = '20180604'

In [13]:
def getNearbyVenues(names, latitudes, longitudes):
    radius=500
    LIMIT=100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [14]:
vancouver_venues = getNearbyVenues(names=vancouver_data['Neighbourhood'],
                                   latitudes=vancouver_data['Latitude'],
                                   longitudes=vancouver_data['Longitude']
                                  )

Southwest-Surrey
(Government Road / Lake City / SFU / Burnaby Mountain)-Burnaby
(Strathcona / Chinatown / Downtown Eastside)- Vancouver
South- Richmond
Central- Port Coquitlam
(Parkcrest-Aubrey / Ardingley-Sprott)-Burnaby
(NE Downtown / Gastown / Harbour Centre / International Village / Victory Square / Yaletown)-Vancouver
(Sea Island / YVR)-Richmond
South-Port Coquitlam
Northeast-Delta
(Burnaby Heights / Willingdon Heights / West Central Valley)- Burnaby
(Waterfront / Coal Harbour / Canada Place)-Vancouver
Northwest-Richmond
East-Delta
(Lakeview-Mayfield / Richmond Park / Kingsway-Beresford)-Burnaby
(SE West End / Davie Village)-Vancouver
Southwest-Richmond
East Central-Delta
(Cascade-Schou / Douglas-Gilpin)-Burnaby
(NW West End / Stanley Park)-Vancouver
Outer East-North Vancouver (district municipality)
Port Moody
(Maywood / Marlborough / Oakalla / Windsor)-Burnaby
(West Fairview / Granville Island / NE Shaughnessy)-Vancouver
Inner East-North Vancouver (district municipality)
(Suncre

In [15]:
vancouver_venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Southwest-Surrey,49.044655,-122.869163,crescent park grocery,49.047998,-122.868168,Market
1,Southwest-Surrey,49.044655,-122.869163,Potter's Ocean Park,49.040884,-122.867118,Flower Shop
2,(Government Road / Lake City / SFU / Burnaby M...,49.276301,-122.946971,Quebec Stonghold,49.273075,-122.944845,Men's Store
3,(Government Road / Lake City / SFU / Burnaby M...,49.276301,-122.946971,Bus Stop 59378 (144),49.276593,-122.953469,Bus Stop
4,(Strathcona / Chinatown / Downtown Eastside)- ...,49.279935,-123.090704,The Juice Truck,49.281281,-123.09212,Food Truck


In [16]:
vancouver_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
(Bentall Centre)-Vancouver,72,72,72,72,72,72
(Burnaby Heights / Willingdon Heights / West Central Valley)- Burnaby,41,41,41,41,41,41
(Cascade-Schou / Douglas-Gilpin)-Burnaby,6,6,6,6,6,6
(Central Kitsilano / Greektown)-Vancouver,31,31,31,31,31,31
(East Big Bend / Stride Avenue / Edmonds / Cariboo-Armstrong)-Burnaby,6,6,6,6,6,6
...,...,...,...,...,...,...
Southwest-Surrey,2,2,2,2,2,2
Upper East-Surrey,9,9,9,9,9,9
Upper West-Surrey,22,22,22,22,22,22
West-Maple Ridge,4,4,4,4,4,4


In [17]:
# one hot encoding
vancouver_onehot = pd.get_dummies(vancouver_venues[['Venue Category']], prefix="", prefix_sep="")

In [18]:
vancouver_onehot['Neighbourhood'] = vancouver_venues['Neighbourhood'] 

In [19]:
fixed_columns = [vancouver_onehot.columns[-1]] + list(vancouver_onehot.columns[:-1])
vancouver_onehot = vancouver_onehot[fixed_columns]

vancouver_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,African Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,Art Gallery,...,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Winery,Women's Store,Yoga Studio,Zoo
0,Southwest-Surrey,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Southwest-Surrey,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,(Government Road / Lake City / SFU / Burnaby M...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,(Government Road / Lake City / SFU / Burnaby M...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,(Strathcona / Chinatown / Downtown Eastside)- ...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
vancouver_onehot.shape

(1863, 251)

In [21]:
vancouver_grouped = vancouver_onehot.groupby('Neighbourhood').mean().reset_index()
vancouver_grouped

Unnamed: 0,Neighbourhood,Accessories Store,African Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,Art Gallery,...,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Winery,Women's Store,Yoga Studio,Zoo
0,(Bentall Centre)-Vancouver,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.013889,...,0.000000,0.00000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.013889,0.0
1,(Burnaby Heights / Willingdon Heights / West C...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,...,0.000000,0.02439,0.024390,0.000000,0.0,0.024390,0.0,0.0,0.000000,0.0
2,(Cascade-Schou / Douglas-Gilpin)-Burnaby,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.000000,...,0.000000,0.00000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0
3,(Central Kitsilano / Greektown)-Vancouver,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,...,0.064516,0.00000,0.032258,0.000000,0.0,0.032258,0.0,0.0,0.032258,0.0
4,(East Big Bend / Stride Avenue / Edmonds / Car...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,...,0.000000,0.00000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80,Southwest-Surrey,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,...,0.000000,0.00000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0
81,Upper East-Surrey,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,...,0.000000,0.00000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0
82,Upper West-Surrey,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.000000,...,0.000000,0.00000,0.090909,0.045455,0.0,0.000000,0.0,0.0,0.000000,0.0
83,West-Maple Ridge,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,...,0.000000,0.00000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0


In [22]:
vancouver_grouped.shape

(85, 251)

In [23]:
num_top_venues = 5

for hood in vancouver_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = vancouver_grouped[vancouver_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----(Bentall Centre)-Vancouver----
          venue  freq
0         Hotel  0.11
1  Dessert Shop  0.06
2          Café  0.04
3        Lounge  0.04
4   Coffee Shop  0.04


----(Burnaby Heights / Willingdon Heights / West Central Valley)- Burnaby----
              venue  freq
0       Coffee Shop  0.12
1  Sushi Restaurant  0.07
2       Pizza Place  0.05
3          Pharmacy  0.05
4       Gas Station  0.02


----(Cascade-Schou / Douglas-Gilpin)-Burnaby----
                  venue  freq
0           Coffee Shop  0.17
1                   Gym  0.17
2              Bus Stop  0.17
3   American Restaurant  0.17
4  Gym / Fitness Center  0.17


----(Central Kitsilano / Greektown)-Vancouver----
                           venue  freq
0                    Coffee Shop  0.10
1                           Café  0.06
2  Vegetarian / Vegan Restaurant  0.06
3                            Spa  0.03
4                    Pizza Place  0.03


----(East Big Bend / Stride Avenue / Edmonds / Cariboo-Armstrong)-Burnaby----


### put into pandas dataframe

In [24]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [25]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = vancouver_grouped['Neighbourhood']

for ind in np.arange(vancouver_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(vancouver_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,(Bentall Centre)-Vancouver,Hotel,Dessert Shop,Coffee Shop,Café,Lounge,Food Truck,Cosmetics Shop,Clothing Store,Italian Restaurant,Bar
1,(Burnaby Heights / Willingdon Heights / West C...,Coffee Shop,Sushi Restaurant,Pharmacy,Pizza Place,Grocery Store,Light Rail Station,Supermarket,Italian Restaurant,Sporting Goods Shop,Juice Bar
2,(Cascade-Schou / Douglas-Gilpin)-Burnaby,Bus Stop,Gym,Latin American Restaurant,Coffee Shop,American Restaurant,Gym / Fitness Center,Electronics Store,Ethiopian Restaurant,Event Space,Exhibit
3,(Central Kitsilano / Greektown)-Vancouver,Coffee Shop,Café,Vegetarian / Vegan Restaurant,Diner,Malay Restaurant,Liquor Store,Bookstore,Spa,Mexican Restaurant,Donut Shop
4,(East Big Bend / Stride Avenue / Edmonds / Car...,Bus Stop,Bus Station,Shopping Mall,Pet Store,Grocery Store,Electronics Store,Ethiopian Restaurant,Event Space,Exhibit,Zoo


## cluster the neighbourhoods 

In [26]:
# set number of clusters
kclusters = 5

vancouver_grouped_clustering = vancouver_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(vancouver_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [27]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)



In [28]:
neighborhoods_venues_sorted

Unnamed: 0,Cluster Labels,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,(Bentall Centre)-Vancouver,Hotel,Dessert Shop,Coffee Shop,Café,Lounge,Food Truck,Cosmetics Shop,Clothing Store,Italian Restaurant,Bar
1,1,(Burnaby Heights / Willingdon Heights / West C...,Coffee Shop,Sushi Restaurant,Pharmacy,Pizza Place,Grocery Store,Light Rail Station,Supermarket,Italian Restaurant,Sporting Goods Shop,Juice Bar
2,1,(Cascade-Schou / Douglas-Gilpin)-Burnaby,Bus Stop,Gym,Latin American Restaurant,Coffee Shop,American Restaurant,Gym / Fitness Center,Electronics Store,Ethiopian Restaurant,Event Space,Exhibit
3,1,(Central Kitsilano / Greektown)-Vancouver,Coffee Shop,Café,Vegetarian / Vegan Restaurant,Diner,Malay Restaurant,Liquor Store,Bookstore,Spa,Mexican Restaurant,Donut Shop
4,1,(East Big Bend / Stride Avenue / Edmonds / Car...,Bus Stop,Bus Station,Shopping Mall,Pet Store,Grocery Store,Electronics Store,Ethiopian Restaurant,Event Space,Exhibit,Zoo
...,...,...,...,...,...,...,...,...,...,...,...,...
80,2,Southwest-Surrey,Market,Flower Shop,Duty-free Shop,Financial or Legal Service,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
81,1,Upper East-Surrey,Convenience Store,Gym / Fitness Center,Design Studio,Casino,Market,Flea Market,Gas Station,Athletics & Sports,Bowling Alley,Harbor / Marina
82,1,Upper West-Surrey,Fast Food Restaurant,Coffee Shop,Vietnamese Restaurant,Restaurant,Health Food Store,Pizza Place,Pharmacy,Pet Store,Discount Store,Arts & Crafts Store
83,1,West-Maple Ridge,Restaurant,Sushi Restaurant,Gas Station,Hotel,Duty-free Shop,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Exhibit


In [29]:
vancouver_data

Unnamed: 0,Postal Code,City,Neighbourhood,Latitude,Longitude
0,V4A,Surrey,Southwest-Surrey,49.044655,-122.869163
1,V5A,Burnaby,(Government Road / Lake City / SFU / Burnaby M...,49.276301,-122.946971
2,V6A,Vancouver,(Strathcona / Chinatown / Downtown Eastside)- ...,49.279935,-123.090704
3,V7A,Richmond,South- Richmond,49.134223,-123.099148
4,V3B,Port Coquitlam,Central- Port Coquitlam,49.259167,-122.746993
...,...,...,...,...,...
84,V7Y,Vancouver,(Pacific Centre)-Vancouver,49.282728,-123.118463
85,V2Z,Langley Township,Southwest-Langley Township,49.065755,-122.582949
86,V3Z,Surrey,Lower East-Surrey,49.048837,-122.693318
87,V5Z,Vancouver,(East Fairview / South Cambie)-Vancouver,49.233483,-123.120701


In [30]:

vancouver_merged = vancouver_data

# merge vancouver_grouped with vancouver_data to add latitude/longitude for each neighbuorhood
vancouver_merged = vancouver_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

vancouver_merged.head() # check the last columns!

Unnamed: 0,Postal Code,City,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,V4A,Surrey,Southwest-Surrey,49.044655,-122.869163,2.0,Market,Flower Shop,Duty-free Shop,Financial or Legal Service,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
1,V5A,Burnaby,(Government Road / Lake City / SFU / Burnaby M...,49.276301,-122.946971,1.0,Men's Store,Bus Stop,Zoo,Duty-free Shop,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
2,V6A,Vancouver,(Strathcona / Chinatown / Downtown Eastside)- ...,49.279935,-123.090704,1.0,Coffee Shop,Park,Restaurant,Sandwich Place,BBQ Joint,Deli / Bodega,Pie Shop,Diner,Cheese Shop,Pub
3,V7A,Richmond,South- Richmond,49.134223,-123.099148,1.0,Food & Drink Shop,Chinese Restaurant,Shopping Mall,Sandwich Place,Liquor Store,Mattress Store,Breakfast Spot,Portuguese Restaurant,Poke Place,Supermarket
4,V3B,Port Coquitlam,Central- Port Coquitlam,49.259167,-122.746993,1.0,Fast Food Restaurant,Cocktail Bar,Flower Shop,Supermarket,Discount Store,Bank,Japanese Restaurant,Music Store,Pizza Place,Greek Restaurant


In [31]:
import folium 

In [32]:
pip install geopy

Note: you may need to restart the kernel to use updated packages.


In [33]:
from geopy.geocoders import Nominatim 

In [34]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [35]:
address = 'Vancouver, CA'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Vancouver are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Vancouver are 49.2608724, -123.1139529.


In [36]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(vancouver_merged['Latitude'], vancouver_merged['Longitude'], vancouver_merged['Neighbourhood'], vancouver_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

TypeError: list indices must be integers or slices, not float