In [1]:
import random # library for random number generation
import numpy as np # library for vectorized computation
import pandas as pd # library to process data as dataframes

import matplotlib.pyplot as plt # plotting library
# backend for rendering plots within the browser
%matplotlib inline 

from sklearn.cluster import KMeans 
from sklearn.datasets.samples_generator import make_blobs

In [2]:
pip install beautifulsoup4

Collecting beautifulsoup4
[?25l  Downloading https://files.pythonhosted.org/packages/e8/b5/7bb03a696f2c9b7af792a8f51b82974e51c268f15e925fc834876a4efa0b/beautifulsoup4-4.9.0-py3-none-any.whl (109kB)
[K     |████████████████████████████████| 112kB 6.4MB/s eta 0:00:01
[?25hCollecting soupsieve>1.2 (from beautifulsoup4)
  Downloading https://files.pythonhosted.org/packages/05/cf/ea245e52f55823f19992447b008bcbb7f78efc5960d77f6c34b5b45b36dd/soupsieve-2.0-py2.py3-none-any.whl
Installing collected packages: soupsieve, beautifulsoup4
Successfully installed beautifulsoup4-4.9.0 soupsieve-2.0
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install lxml

Collecting lxml
[?25l  Downloading https://files.pythonhosted.org/packages/dd/ba/a0e6866057fc0bbd17192925c1d63a3b85cf522965de9bc02364d08e5b84/lxml-4.5.0-cp36-cp36m-manylinux1_x86_64.whl (5.8MB)
[K     |████████████████████████████████| 5.8MB 21.2MB/s eta 0:00:01     |██████████████████████          | 4.0MB 21.2MB/s eta 0:00:01
[?25hInstalling collected packages: lxml
Successfully installed lxml-4.5.0
Note: you may need to restart the kernel to use updated packages.


In [4]:
from bs4 import BeautifulSoup
import requests

# Data
### We obtain the postal codes of the Greater Vancouver Region from public sources along with the GPS coordinates
### We will then compare this information using Foursquare API to see the types and numbers of businesses in each of the neighbourhood.

# Methodology
#### 1. put the postal codes into dataframe 
#### 2. pull the foursquare data using the coordinates of 100 business within 500 meters
#### 3. compare the neighbourhoods and group those.
#### 4. cluster those neighbourhoods by similar business establishment and map the clusters. 
#### 5. investigate the clusters


In [5]:
df = pd.read_csv("vancouver coordinates 2.csv") 

In [6]:
df.head()

Unnamed: 0,V3A,Langley Township,(Langley City)-Langley Township,49.100002,-122.657128
0,V4A,Surrey,Southwest-Surrey,49.044655,-122.869163
1,V5A,Burnaby,(Government Road / Lake City / SFU / Burnaby M...,49.276301,-122.946971
2,V6A,Vancouver,(Strathcona / Chinatown / Downtown Eastside)- ...,49.279935,-123.090704
3,V7A,Richmond,South- Richmond,49.134223,-123.099148
4,V3B,Port Coquitlam,Central- Port Coquitlam,49.259167,-122.746993


In [7]:
df.columns=["Postal Code", "City", "Neighbourhood", "Latitude","Longitude"]

In [8]:
df.head()

Unnamed: 0,Postal Code,City,Neighbourhood,Latitude,Longitude
0,V4A,Surrey,Southwest-Surrey,49.044655,-122.869163
1,V5A,Burnaby,(Government Road / Lake City / SFU / Burnaby M...,49.276301,-122.946971
2,V6A,Vancouver,(Strathcona / Chinatown / Downtown Eastside)- ...,49.279935,-123.090704
3,V7A,Richmond,South- Richmond,49.134223,-123.099148
4,V3B,Port Coquitlam,Central- Port Coquitlam,49.259167,-122.746993


In [9]:
df.shape

(89, 5)

In [10]:
df.tail()

Unnamed: 0,Postal Code,City,Neighbourhood,Latitude,Longitude
84,V7Y,Vancouver,(Pacific Centre)-Vancouver,49.282728,-123.118463
85,V2Z,Langley Township,Southwest-Langley Township,49.065755,-122.582949
86,V3Z,Surrey,Lower East-Surrey,49.048837,-122.693318
87,V5Z,Vancouver,(East Fairview / South Cambie)-Vancouver,49.233483,-123.120701
88,V6Z,Vancouver,(SW Downtown)-Vancouver,49.275944,-123.131166


In [11]:
CLIENT_ID = 'PNBZRK0AATNMVG3M1CVPRL2IYCPPD5VI52ROWJKW53OZDUXM' # Foursquare ID
CLIENT_SECRET = 'IDM0XD24USYGN05IGYR5VI35ELC1UCWUA11JM3MF3AUDLVR3' #  Foursquare Secret
VERSION = '20180604'

In [12]:
def getNearbyVenues(names, latitudes, longitudes):
    radius=500
    LIMIT=100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [13]:
vancouver_venues = getNearbyVenues(names=df['Neighbourhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Southwest-Surrey
(Government Road / Lake City / SFU / Burnaby Mountain)-Burnaby
(Strathcona / Chinatown / Downtown Eastside)- Vancouver
South- Richmond
Central- Port Coquitlam
(Parkcrest-Aubrey / Ardingley-Sprott)-Burnaby
(NE Downtown / Gastown / Harbour Centre / International Village / Victory Square / Yaletown)-Vancouver
(Sea Island / YVR)-Richmond
South-Port Coquitlam
Northeast-Delta
(Burnaby Heights / Willingdon Heights / West Central Valley)- Burnaby
(Waterfront / Coal Harbour / Canada Place)-Vancouver
Northwest-Richmond
East-Delta
(Lakeview-Mayfield / Richmond Park / Kingsway-Beresford)-Burnaby
(SE West End / Davie Village)-Vancouver
Southwest-Richmond
East Central-Delta
(Cascade-Schou / Douglas-Gilpin)-Burnaby
(NW West End / Stanley Park)-Vancouver
Outer East-North Vancouver (district municipality)
Port Moody
(Maywood / Marlborough / Oakalla / Windsor)-Burnaby
(West Fairview / Granville Island / NE Shaughnessy)-Vancouver
Inner East-North Vancouver (district municipality)
(Suncre

In [14]:
vancouver_venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Southwest-Surrey,49.044655,-122.869163,crescent park grocery,49.047998,-122.868168,Market
1,Southwest-Surrey,49.044655,-122.869163,Potter's Ocean Park,49.040884,-122.867118,Flower Shop
2,(Government Road / Lake City / SFU / Burnaby M...,49.276301,-122.946971,It's Halifax's The Love Shack,49.271907,-122.947826,Men's Store
3,(Strathcona / Chinatown / Downtown Eastside)- ...,49.279935,-123.090704,The Juice Truck,49.281281,-123.09212,Food Truck
4,(Strathcona / Chinatown / Downtown Eastside)- ...,49.279935,-123.090704,Finch’s Market,49.278565,-123.093473,Sandwich Place


In [15]:
vancouver_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
(Bentall Centre)-Vancouver,73,73,73,73,73,73
(Burnaby Heights / Willingdon Heights / West Central Valley)- Burnaby,41,41,41,41,41,41
(Cascade-Schou / Douglas-Gilpin)-Burnaby,6,6,6,6,6,6
(Central Kitsilano / Greektown)-Vancouver,30,30,30,30,30,30
(East Big Bend / Stride Avenue / Edmonds / Cariboo-Armstrong)-Burnaby,6,6,6,6,6,6
...,...,...,...,...,...,...
Southwest-Surrey,2,2,2,2,2,2
Upper East-Surrey,9,9,9,9,9,9
Upper West-Surrey,22,22,22,22,22,22
West-Maple Ridge,4,4,4,4,4,4


In [16]:
# one hot encoding
vancouver_onehot = pd.get_dummies(vancouver_venues[['Venue Category']], prefix="", prefix_sep="")

In [17]:
vancouver_onehot['Neighbourhood'] = vancouver_venues['Neighbourhood'] 

In [18]:
fixed_columns = [vancouver_onehot.columns[-1]] + list(vancouver_onehot.columns[:-1])
vancouver_onehot = vancouver_onehot[fixed_columns]

vancouver_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,African Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,Art Gallery,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Winery,Women's Store,Yoga Studio,Zoo
0,Southwest-Surrey,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Southwest-Surrey,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,(Government Road / Lake City / SFU / Burnaby M...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,(Strathcona / Chinatown / Downtown Eastside)- ...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,(Strathcona / Chinatown / Downtown Eastside)- ...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
vancouver_onehot.shape

(1849, 254)

In [20]:
vancouver_grouped = vancouver_onehot.groupby('Neighbourhood').mean().reset_index()
vancouver_grouped

Unnamed: 0,Neighbourhood,Accessories Store,African Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,Art Gallery,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Winery,Women's Store,Yoga Studio,Zoo
0,(Bentall Centre)-Vancouver,0.0,0.0,0.0,0.0,0.0,0.0,0.027397,0.0,0.013699,...,0.00000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.013699,0.0
1,(Burnaby Heights / Willingdon Heights / West C...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,...,0.02439,0.0,0.024390,0.000000,0.0,0.024390,0.0,0.0,0.000000,0.0
2,(Cascade-Schou / Douglas-Gilpin)-Burnaby,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.000000,...,0.00000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0
3,(Central Kitsilano / Greektown)-Vancouver,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,...,0.00000,0.0,0.033333,0.000000,0.0,0.033333,0.0,0.0,0.033333,0.0
4,(East Big Bend / Stride Avenue / Edmonds / Car...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,...,0.00000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,Southwest-Surrey,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,...,0.00000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0
80,Upper East-Surrey,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,...,0.00000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0
81,Upper West-Surrey,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.000000,...,0.00000,0.0,0.090909,0.045455,0.0,0.000000,0.0,0.0,0.000000,0.0
82,West-Maple Ridge,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,...,0.00000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0


In [21]:
vancouver_grouped.shape

(84, 254)

In [22]:
num_top_venues = 5

for hood in vancouver_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = vancouver_grouped[vancouver_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----(Bentall Centre)-Vancouver----
          venue  freq
0         Hotel  0.11
1          Café  0.05
2  Dessert Shop  0.05
3    Food Truck  0.04
4        Lounge  0.04


----(Burnaby Heights / Willingdon Heights / West Central Valley)- Burnaby----
              venue  freq
0       Coffee Shop  0.12
1  Sushi Restaurant  0.10
2       Pizza Place  0.05
3          Pharmacy  0.05
4    Sandwich Place  0.05


----(Cascade-Schou / Douglas-Gilpin)-Burnaby----
                       venue  freq
0       Gym / Fitness Center  0.17
1        American Restaurant  0.17
2  Latin American Restaurant  0.17
3                Coffee Shop  0.17
4                   Bus Stop  0.17


----(Central Kitsilano / Greektown)-Vancouver----
                           venue  freq
0                    Coffee Shop  0.10
1                           Café  0.07
2  Vegetarian / Vegan Restaurant  0.07
3                  Deli / Bodega  0.03
4                            Spa  0.03


----(East Big Bend / Stride Avenue / Edmonds / C