# Preparation: Install and import the required libraries

In [1]:
!pip install bs4
!pip install geopy
!pip install folium==0.5.0




In [58]:
from geopy import Nominatim
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.cluster import KMeans
import folium
print ('Libraries imported successfully')
from bs4 import BeautifulSoup
import requests
import pandas as pd
from pandas.io.json import json_normalize


# 1. Get the Wikipedia page for Vancouver Neighborhoods

In [59]:
url='https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Vancouver'

In [60]:
wikidata = requests.get(url).text

create a beautiful soup object

In [61]:
soup = BeautifulSoup(wikidata,"html5lib")

In [62]:
neighborhoods = [] # empty dictionary

ul_list = soup.findAll('ul')
ul_index = -1
for index,ul in enumerate(ul_list):
    if('Arbutus_Greenway') in str(ul):
        ul_index = index
if (ul_index == -1):
    print("list not found")
else:
    print("the index of the neighborhood ul element is ", ul_index)

the index of the neighborhood ul element is  1


Data Cleansing. The table contains an unordered list, with each neighborhood identified by a href object within a list item and the descriptions contain a repeat of the neighborhood name.  Parse the soup object to get the name and descript. Use python to strip out the redundant name.  

In [63]:
 
rows = ul_list[ul_index]

for li in rows.findAll('li'):
    cell = {}
    
    for link in li.find('a'):
        cell['Name'] = link.lstrip().rstrip()
    
    basedesc = str(li.text)
    strindex = basedesc.find('-')
    if (strindex > 0):
        strstart = strindex + 1
    
    strend = len(basedesc) - strstart
    cell['Description']= basedesc[strstart:strend].lstrip()
    neighborhoods.append(cell)
       

df = pd.DataFrame(neighborhoods)
df.replace("Arbutus Ridge", 
           "Arbutus-Ridge", 
           inplace=True)
   

In [64]:
df.head()

Unnamed: 0,Name,Description
0,Arbutus-Ridge,Located in the middle of Vancouver's west side...
1,Downtown,"The Central business district of Vancouver, co..."
2,Dunbar-Southlands,Southlands - An affluent neighbourhood on the ...
3,Fairview,Contains the popular attractions of Granville ...
4,Grandview-Woodland,Woodland - A mature neighbourhood in east Vanc...


Confirm the shape of the list of neighborhoods to make sure we got all 22.

In [65]:
df.shape

(22, 2)

# 2. Get the Geo-coordinates of Vancouver Neighborhoods

Again using BeautifulSoup to scrape the data, get the associated latitude and longitude of each neighborhood from the City of Vancouver Open Data Portal.

In [66]:
odpurl='https://opendata.vancouver.ca/explore/dataset/local-area-boundary/download/?format=json&timezone=America/Los_Angeles&lang=en'
odpdata = requests.get(odpurl).json()


The relevant parts of the JSON are the id, name and geo_point_2d, which contains the lat/long coordinates.

In [67]:
dfdtl = json_normalize(odpdata)
dfdtl

  dfdtl = json_normalize(odpdata)


Unnamed: 0,datasetid,recordid,record_timestamp,fields.mapid,fields.geom.type,fields.geom.coordinates,fields.name,fields.geo_point_2d,geometry.type,geometry.coordinates
0,local-area-boundary,b5f389ecdc26e66765eb7e4ddb2b26f324791f38,2019-03-14T04:02:27.283-07:00,AR,Polygon,"[[[-123.15259552001953, 49.25722503661694], [-...",Arbutus-Ridge,"[49.2468049108, -123.161669238]",Point,"[-123.161669238, 49.2468049108]"
1,local-area-boundary,f422c2100e9cb31018314a144d02519705da7e71,2019-03-14T04:02:27.283-07:00,CBD,Polygon,"[[[-123.11226654052733, 49.29016494750564], [-...",Downtown,"[49.2807470711, -123.116567008]",Point,"[-123.116567008, 49.2807470711]"
2,local-area-boundary,03187cbe941218efd0347c39aaeaeb00a16c2980,2019-03-14T04:02:27.283-07:00,FAIR,Polygon,"[[[-123.14598846435547, 49.25712203979079], [-...",Fairview,"[49.2645404871, -123.131048865]",Point,"[-123.131048865, 49.2645404871]"
3,local-area-boundary,2024a0d84997f4740ce6e96a94e0d3d1b7d00b87,2019-03-14T04:02:27.283-07:00,GW,Polygon,"[[[-123.07701873779298, 49.29024887084548], [-...",Grandview-Woodland,"[49.2764396102, -123.066728221]",Point,"[-123.066728221, 49.2764396102]"
4,local-area-boundary,8e1445b0adfd38e2ace35525518f475dd5138cd6,2019-03-14T04:02:27.283-07:00,HS,Polygon,"[[[-123.05648803710938, 49.293487548824], [-12...",Hastings-Sunrise,"[49.277934053, -123.040269923]",Point,"[-123.040269923, 49.277934053]"
5,local-area-boundary,611d9ca697724e47f1e0997f5e07e4ec87a8e0d2,2019-03-14T04:02:27.283-07:00,MARP,Polygon,"[[[-123.10696411132812, 49.20415878295483], [-...",Marpole,"[49.2102074666, -123.12838166]",Point,"[-123.12838166, 49.2102074666]"
6,local-area-boundary,d167678fba0e23f8a82e4d48159cb06ed8aad571,2019-03-14T04:02:27.283-07:00,RP,Polygon,"[[[-123.10562133789061, 49.23311614989819], [-...",Riley Park,"[49.2447664796, -123.103146806]",Point,"[-123.103146806, 49.2447664796]"
7,local-area-boundary,44d12027e568fbf55be372856a07e2dda20ce1b9,2019-03-14T04:02:27.283-07:00,SHAU,Polygon,"[[[-123.15527343749999, 49.2345237731892], [-1...",Shaughnessy,"[49.2456809771, -123.139760167]",Point,"[-123.139760167, 49.2456809771]"
8,local-area-boundary,58b2b8fdd5e48ea00c79f0a33a90ec99bf98372a,2019-03-14T04:02:27.283-07:00,STR,Polygon,"[[[-123.0992889404297, 49.28927230834548], [-1...",Strathcona,"[49.2782195786, -123.088235281]",Point,"[-123.088235281, 49.2782195786]"
9,local-area-boundary,7245d84262949196150423f3a7f69adac8ca271b,2019-03-14T04:02:27.283-07:00,WE,Polygon,"[[[-123.13768005371092, 49.27531814574782], [-...",West End,"[49.2850111894, -123.135437653]",Point,"[-123.135437653, 49.2850111894]"


key colunms are fields.mapid, fields.name, fields.geometry_coordinates - we will drop the rest

In [68]:
flds=dfdtl.drop(['datasetid','recordid','record_timestamp','fields.geom.type','fields.geom.coordinates','geometry.type','geometry.coordinates'], axis=1)

flds

Unnamed: 0,fields.mapid,fields.name,fields.geo_point_2d
0,AR,Arbutus-Ridge,"[49.2468049108, -123.161669238]"
1,CBD,Downtown,"[49.2807470711, -123.116567008]"
2,FAIR,Fairview,"[49.2645404871, -123.131048865]"
3,GW,Grandview-Woodland,"[49.2764396102, -123.066728221]"
4,HS,Hastings-Sunrise,"[49.277934053, -123.040269923]"
5,MARP,Marpole,"[49.2102074666, -123.12838166]"
6,RP,Riley Park,"[49.2447664796, -123.103146806]"
7,SHAU,Shaughnessy,"[49.2456809771, -123.139760167]"
8,STR,Strathcona,"[49.2782195786, -123.088235281]"
9,WE,West End,"[49.2850111894, -123.135437653]"


split the lat/long into separate columns

In [69]:
lat = []
lon = []

for row in flds['fields.geo_point_2d']:
    lat.append(row[0])
    lon.append(row[1])
flds['latitude'] = lat
flds['longitude']= lon

flds

Unnamed: 0,fields.mapid,fields.name,fields.geo_point_2d,latitude,longitude
0,AR,Arbutus-Ridge,"[49.2468049108, -123.161669238]",49.246805,-123.161669
1,CBD,Downtown,"[49.2807470711, -123.116567008]",49.280747,-123.116567
2,FAIR,Fairview,"[49.2645404871, -123.131048865]",49.26454,-123.131049
3,GW,Grandview-Woodland,"[49.2764396102, -123.066728221]",49.27644,-123.066728
4,HS,Hastings-Sunrise,"[49.277934053, -123.040269923]",49.277934,-123.04027
5,MARP,Marpole,"[49.2102074666, -123.12838166]",49.210207,-123.128382
6,RP,Riley Park,"[49.2447664796, -123.103146806]",49.244766,-123.103147
7,SHAU,Shaughnessy,"[49.2456809771, -123.139760167]",49.245681,-123.13976
8,STR,Strathcona,"[49.2782195786, -123.088235281]",49.27822,-123.088235
9,WE,West End,"[49.2850111894, -123.135437653]",49.285011,-123.135438


Drop the geo_pont_2d column, rename the columns and join the geopoints with the neighborhood wiki data to add the descriptions

In [70]:
flds_geopoints = flds.drop(['fields.geo_point_2d'],axis=1)
flds_geopoints.columns=['Mapid','Name','Latitude','Longitude']
flds_geopoints

neighborhoods = pd.merge(flds_geopoints,df, on='Name')


Finally print out the Data Frame to make sure we received all 22 neighborhoods

In [71]:
neighborhoods.head(5)

Unnamed: 0,Mapid,Name,Latitude,Longitude,Description
0,AR,Arbutus-Ridge,49.246805,-123.161669,Located in the middle of Vancouver's west side...
1,CBD,Downtown,49.280747,-123.116567,"The Central business district of Vancouver, co..."
2,FAIR,Fairview,49.26454,-123.131049,Contains the popular attractions of Granville ...
3,GW,Grandview-Woodland,49.27644,-123.066728,Woodland - A mature neighbourhood in east Vanc...
4,HS,Hastings-Sunrise,49.277934,-123.04027,Sunrise - One of Vancouver's oldest neighbourh...


# 3. Get the dog population data for each neighborhood

Prepare to scrape a pdf table by installing tabula-py library

In [3]:
!pip install tabula-py


  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
Collecting tabula-py
  Downloading tabula_py-2.2.0-py3-none-any.whl (11.7 MB)
[K     |████████████████████████████████| 11.7 MB 10.2 MB/s eta 0:00:01
Collecting distro
  Downloading distro-1.5.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: distro, tabula-py
Successfully installed distro-1.5.0 tabula-py-2.2.0


In [4]:
import tabula
import os
java_home = os.environ.get('JAVA_HOME', None)
if not java_home:
    java_path = 'C:/Program Files/Java/jdk1.8.0_11/bin'
    os.environ['JAVA_HOME'] = java_path
else:
    print(java_home)

/opt/ibm/java-x86_64-80


In [5]:
%%capture
file='https://vancouver.ca/files/cov/people-parks-dogs-strategy-appendix.pdf'

tables=tabula.read_pdf('https://vancouver.ca/files/cov/people-parks-dogs-strategy-appendix.pdf', pages = "all")
# ignore the errors regarding Java Advanced Imaging as we do not require the images. The capture directive hides the output which contains a long list of these errors


We are interested in the first table.  Create a new DataFrame containing the relevant columns.

In [6]:
dogs_table_raw=tables[0]
dogs_table_raw.drop(['Population\r(2011\rCensus)','Off-leash\rArea2\r(ha)','Dog\rdensity\rrelative\rto OLA\rarea3'],inplace=True,axis=1)

Rename the columns to simplify merging with the other datasets.

In [7]:
dogs_table=dogs_table_raw
dogs_table.columns=['Name','Area','LicensedDogs','DogDensity']


Check the output

In [8]:
dogs_table.head(5)

Unnamed: 0,Name,Area,LicensedDogs,DogDensity
0,West End,225.6,1563,6.9
1,Downtown,467.4,3037,6.5
2,Kitsilano,636.3,2888,4.5
3,Mount Pleasant,372.1,1663,4.5
4,Fairview,363.5,1522,4.2


# 4. Use Foursquare API to get dog parks near each neighborhood

In [9]:
# The code was removed by Watson Studio for sharing.

In [10]:
VERSION = '20210616'
LIMIT = 100

In [186]:
radius=3000
latitude=neighborhoods['Latitude'][0]
longitude=neighborhoods['Longitude'][0]
print("Lat:  " ,latitude)
print("Long: " ,longitude)
print("Limit: ",LIMIT)
print("query: ",search_query)
print("Radius(m) ",radius)



Lat:   49.2468049108
Long:  -123.161669238
Limit:  100
query:  Dog Park
Radius(m)  3000


In [187]:
search_query='Dog Park'
parkurl = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&oauth_token={}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude,ACCESS_TOKEN, VERSION, search_query, radius, LIMIT)
parkurl

'https://api.foursquare.com/v2/venues/search?client_id=CAUY0NVNOIN1151A2UDU2JI4IFAGVOKOB1W0UFG5PVBDHIQR&client_secret=2B3OHXWDMBGQBMDOWDFJWJW4CDTGRHJ0XVZ2YADOZ1S13OVJ2B3OHXWDMBGQBMDOWDFJWJW4CDTGRHJ0XVZ2YADOZ1S13OVJ&ll=49.2468049108,-123.161669238&oauth_token=XQJX5QJZRCZOA24OINDZWAHBCOTJJWPI3K3UTA0PCVYNU1YA&v=20210616&query=Dog Park&radius=3000&limit=100'

Send a GET request and check the results

In [188]:
# inspect the result
results = requests.get(parkurl).json()
venues=results['response']['venues']

#create a DataFrame from the results

In [189]:
parkvenues= json_normalize(venues)
parkvenues.head(2)

  parkvenues= json_normalize(venues)


Unnamed: 0,id,name,categories,referralId,hasPerk,location.lat,location.lng,location.labeledLatLngs,location.distance,location.cc,location.city,location.state,location.country,location.formattedAddress,location.address,location.crossStreet,location.postalCode,venuePage.id
0,4dc60d7d18387d1bd54f2259,Riley Off-leash Dog Park,"[{'id': '4bf58dd8d48988d1e5941735', 'name': 'D...",v-1626905593,False,49.233811,-123.145569,"[{'label': 'display', 'lat': 49.23381057062191...",1860,CA,Vancouver,BC,Canada,[Vancouver BC],,,,
1,51cce800498e94f69b46bc55,Tennyson Dog Park,"[{'id': '4bf58dd8d48988d1e5941735', 'name': 'D...",v-1626905593,False,49.263107,-123.144244,"[{'label': 'display', 'lat': 49.26310749421685...",2212,CA,Vancouver,BC,Canada,[Vancouver BC],,,,


### Data Wrangling

In [223]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in parkvenues.columns if col.startswith('location.')] + ['id']
dataframe_filtered = parkvenues.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    
# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]
dataframe_filtered

Unnamed: 0,name,categories,lat,lng,labeledLatLngs,distance,cc,city,state,country,formattedAddress,address,crossStreet,postalCode,id
0,Riley Off-leash Dog Park,Dog Run,49.233811,-123.145569,"[{'label': 'display', 'lat': 49.23381057062191...",1860,CA,Vancouver,BC,Canada,[Vancouver BC],,,,4dc60d7d18387d1bd54f2259
1,Tennyson Dog Park,Dog Run,49.263107,-123.144244,"[{'label': 'display', 'lat': 49.26310749421685...",2212,CA,Vancouver,BC,Canada,[Vancouver BC],,,,51cce800498e94f69b46bc55
2,Oak Meadows - Dog Park,Dog Run,49.237624,-123.132565,"[{'label': 'display', 'lat': 49.23762412275326...",2349,CA,Vancouver,BC,Canada,"[899 37th Ave (Oak Street), Vancouver BC]",899 37th Ave,Oak Street,,5068f79ae4b0bb2afa9aba56
3,Crown Dog Park,,49.233924,-123.192948,"[{'label': 'display', 'lat': 49.23392366884049...",2687,CA,Vancouver,BC,Canada,[Vancouver BC],,,,4da9d58693a04642f070c1be
4,Charleson Park,Park,49.26688,-123.124681,"[{'label': 'display', 'lat': 49.26688043290664...",3495,CA,Vancouver,BC,Canada,"[999 Charleson St, Vancouver BC V5Z]",999 Charleson St,,V5Z,4aada430f964a5201f6120e3
5,Dogcity Dog Daycare,Dog Run,49.264332,-123.180234,"[{'label': 'display', 'lat': 49.26433201857972...",2371,CA,Vancouver,BC,Canada,"[3458 West Broadway (Collingwood St.), Vancouv...",3458 West Broadway,Collingwood St.,V6R 2B3,4ded63e2c65bf34fc6d4933e
6,Delamont Park,Park,49.265822,-123.152312,"[{'label': 'display', 'lat': 49.265822, 'lng':...",2223,CA,Vancouver,BC,Canada,"[2091 W 7th Ave (at Arbutus St), Vancouver BC]",2091 W 7th Ave,at Arbutus St,,4bae9b34f964a520edc53be3
7,Queen Elizabeth Park,Park,49.241565,-123.113355,"[{'label': 'display', 'lat': 49.24156455524772...",3559,CA,Vancouver,BC,Canada,"[4600 Cambie St, Vancouver BC]",4600 Cambie St,,,4bdf6cddffdec9287a09eca1
8,Douglas Park & Playground,Park,49.253132,-123.122123,"[{'label': 'display', 'lat': 49.25313168807759...",2958,CA,Vancouver,BC,Canada,"[W 22nd Ave (Willow St), Vancouver BC]",W 22nd Ave,Willow St,,4aaae9fff964a5200b5820e3
9,Arbutus Village Park,Park,49.249486,-123.158716,"[{'label': 'display', 'lat': 49.24948575944954...",367,CA,,,Canada,,,,,50f48eb2e4b0e3ab976fd593


In [262]:
#get those where either the name matches 'Dog Park' or the category matches 'Dog Run', and add them together into a dataframe
dfc1=dataframe_filtered[dataframe_filtered['name'].str.contains('Dog Park')]
dfc2=dataframe_filtered[dataframe_filtered['categories'].str.contains('Dog Run', na=False)]
dogparks=pd.concat([dfc1, dfc2])

#remove duplicates where it matched both
dogparks=dogparks.drop_duplicates(subset=['name'])

dogparks.shape

(5, 15)

In [263]:
dogparks

Unnamed: 0,name,categories,lat,lng,labeledLatLngs,distance,cc,city,state,country,formattedAddress,address,crossStreet,postalCode,id
0,Riley Off-leash Dog Park,Dog Run,49.233811,-123.145569,"[{'label': 'display', 'lat': 49.23381057062191...",1860,CA,Vancouver,BC,Canada,[Vancouver BC],,,,4dc60d7d18387d1bd54f2259
1,Tennyson Dog Park,Dog Run,49.263107,-123.144244,"[{'label': 'display', 'lat': 49.26310749421685...",2212,CA,Vancouver,BC,Canada,[Vancouver BC],,,,51cce800498e94f69b46bc55
2,Oak Meadows - Dog Park,Dog Run,49.237624,-123.132565,"[{'label': 'display', 'lat': 49.23762412275326...",2349,CA,Vancouver,BC,Canada,"[899 37th Ave (Oak Street), Vancouver BC]",899 37th Ave,Oak Street,,5068f79ae4b0bb2afa9aba56
3,Crown Dog Park,,49.233924,-123.192948,"[{'label': 'display', 'lat': 49.23392366884049...",2687,CA,Vancouver,BC,Canada,[Vancouver BC],,,,4da9d58693a04642f070c1be
5,Dogcity Dog Daycare,Dog Run,49.264332,-123.180234,"[{'label': 'display', 'lat': 49.26433201857972...",2371,CA,Vancouver,BC,Canada,"[3458 West Broadway (Collingwood St.), Vancouv...",3458 West Broadway,Collingwood St.,V6R 2B3,4ded63e2c65bf34fc6d4933e


# 5. Use Foursquare again to identify pubs and bars near each venue

In [296]:
search_query='pub'
pubsurl = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&oauth_token={}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude,ACCESS_TOKEN, VERSION, search_query, radius, LIMIT)
pubsurl

'https://api.foursquare.com/v2/venues/search?client_id=CAUY0NVNOIN1151A2UDU2JI4IFAGVOKOB1W0UFG5PVBDHIQR&client_secret=2B3OHXWDMBGQBMDOWDFJWJW4CDTGRHJ0XVZ2YADOZ1S13OVJ2B3OHXWDMBGQBMDOWDFJWJW4CDTGRHJ0XVZ2YADOZ1S13OVJ&ll=49.2468049108,-123.161669238&oauth_token=XQJX5QJZRCZOA24OINDZWAHBCOTJJWPI3K3UTA0PCVYNU1YA&v=20210616&query=pub&radius=3000&limit=100'

Send the GET request

In [297]:
# inspect the result
pubresults = requests.get(pubsurl).json()
pubvenues=pubresults['response']['venues']

#create a DataFrame from the results

### Data Wrangling

In [298]:
pubs_raw= json_normalize(pubvenues)
pubs_raw.head(2)

  pubs_raw= json_normalize(pubvenues)


Unnamed: 0,id,name,categories,referralId,hasPerk,location.address,location.crossStreet,location.lat,location.lng,location.labeledLatLngs,location.distance,location.postalCode,location.cc,location.city,location.state,location.country,location.formattedAddress,venuePage.id,location.neighborhood
0,4aabfb4af964a520625b20e3,Darby's Public House,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",v-1626912401,False,2001 MacDonald St,at W 4th Ave,49.268337,-123.168383,"[{'label': 'display', 'lat': 49.26833731277357...",2446,V6K 3Y2,CA,Vancouver,BC,Canada,"[2001 MacDonald St (at W 4th Ave), Vancouver B...",,
1,4da64682a86ecc5168cc7385,The Manchester Pub Eatery,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",v-1626912401,False,1941 West Broadway,Cypress St.,49.263855,-123.149194,"[{'label': 'display', 'lat': 49.26385482819924...",2103,V6J 1Z3,CA,Vancouver,BC,Canada,"[1941 West Broadway (Cypress St.), Vancouver B...",,


In [299]:
filtered_columns = ['name', 'categories'] + [col for col in pubs_raw.columns if col.startswith('location.')] + ['id']
pubs_filtered = pubs_raw.loc[:, filtered_columns]
pubs_filtered

Unnamed: 0,name,categories,location.address,location.crossStreet,location.lat,location.lng,location.labeledLatLngs,location.distance,location.postalCode,location.cc,location.city,location.state,location.country,location.formattedAddress,location.neighborhood,id
0,Darby's Public House,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",2001 MacDonald St,at W 4th Ave,49.268337,-123.168383,"[{'label': 'display', 'lat': 49.26833731277357...",2446,V6K 3Y2,CA,Vancouver,BC,Canada,"[2001 MacDonald St (at W 4th Ave), Vancouver B...",,4aabfb4af964a520625b20e3
1,The Manchester Pub Eatery,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",1941 West Broadway,Cypress St.,49.263855,-123.149194,"[{'label': 'display', 'lat': 49.26385482819924...",2103,V6J 1Z3,CA,Vancouver,BC,Canada,"[1941 West Broadway (Cypress St.), Vancouver B...",,4da64682a86ecc5168cc7385
2,The Kings Head Pub,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",1618 Yew St,btwn York Ave & 1st Ave W,49.271264,-123.154943,"[{'label': 'display', 'lat': 49.27126405934131...",2766,V6K 3E7,CA,Vancouver,BC,Canada,"[1618 Yew St (btwn York Ave & 1st Ave W), Vanc...",,50c2589fe4b0489424ed0c46
3,Darby's Pub,[],2001 Macdonald Street,,49.268367,-123.168335,"[{'label': 'display', 'lat': 49.2683669, 'lng'...",2448,V6K 3Y2,CA,Vancouver,BC,Canada,"[2001 Macdonald Street, Vancouver BC V6K 3Y2]",,546d2cc9498e44fc86358f4e
4,Buru-Bin Japanese Pub,"[{'id': '4bf58dd8d48988d111941735', 'name': 'J...",3089 Oak St,,49.261951,-123.127045,"[{'label': 'display', 'lat': 49.26195136007178...",3028,V6H 2K8,CA,Vancouver,BC,Canada,"[3089 Oak St, Vancouver BC V6H 2K8]",,51a6779e498e83f4ce88c0a2
5,The Cove Neighborhood Pub,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",3681 W 4th Ave.,at Alma St.,49.268657,-123.185277,"[{'label': 'display', 'lat': 49.26865659044266...",2976,V6R 1P2,CA,Vancouver,BC,Canada,"[3681 W 4th Ave. (at Alma St.), Vancouver BC V...",,4ce34268ef2db60c08c1bf5b
6,Kitsilano Public Pool,"[{'id': '4bf58dd8d48988d15e941735', 'name': 'P...",2305 Cornwall Ave,Vine,49.273215,-123.157413,"[{'label': 'display', 'lat': 49.27321534899229...",2956,,CA,Vancouver,BC,Canada,"[2305 Cornwall Ave (Vine), Vancouver BC]",,4aaad0fff964a5208c5720e3
7,Local Kitsilano,"[{'id': '4bf58dd8d48988d157941735', 'name': 'N...",2210 Cornwall Ave,,49.272481,-123.15524,"[{'label': 'display', 'lat': 49.27248135691792...",2896,V6K 1B5,CA,Vancouver,BC,Canada,"[2210 Cornwall Ave, Vancouver BC V6K 1B5]",,4b10463af964a520ce6c23e3
8,Granville Island Public Market,"[{'id': '50be8ee891d4fa8dcc7199a7', 'name': 'M...",1661 Duranleau St,at Johnston St,49.272488,-123.135281,"[{'label': 'display', 'lat': 49.27248788116003...",3442,V6H 3S4,CA,Vancouver,BC,Canada,"[1661 Duranleau St (at Johnston St), Vancouver...",,4aa7f646f964a5203d4e20e3
9,Vancouver Public Library - Kerrisdale,"[{'id': '4bf58dd8d48988d12f941735', 'name': 'L...",2112 W 42nd Ave.,at West Blvd.,49.233276,-123.156252,"[{'label': 'display', 'lat': 49.23327575266476...",1556,V6M 2B6,CA,Vancouver,BC,Canada,"[2112 W 42nd Ave. (at West Blvd.), Vancouver B...",,4b662e96f964a520e1172be3


In [302]:

# filter the category for each row
#pubs_filtered['categories'] = pubs_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
#pubs_filtered.columns = [column.split('.')[-1] for column in pubs_filtered.columns]
#pubs_filtered

In [301]:
pubs_filtered.shape

(25, 16)

In [303]:
pubs_filtered.head(5)

Unnamed: 0,name,categories,location.address,location.crossStreet,location.lat,location.lng,location.labeledLatLngs,location.distance,location.postalCode,location.cc,location.city,location.state,location.country,location.formattedAddress,location.neighborhood,id
0,Darby's Public House,Pub,2001 MacDonald St,at W 4th Ave,49.268337,-123.168383,"[{'label': 'display', 'lat': 49.26833731277357...",2446,V6K 3Y2,CA,Vancouver,BC,Canada,"[2001 MacDonald St (at W 4th Ave), Vancouver B...",,4aabfb4af964a520625b20e3
1,The Manchester Pub Eatery,Pub,1941 West Broadway,Cypress St.,49.263855,-123.149194,"[{'label': 'display', 'lat': 49.26385482819924...",2103,V6J 1Z3,CA,Vancouver,BC,Canada,"[1941 West Broadway (Cypress St.), Vancouver B...",,4da64682a86ecc5168cc7385
2,The Kings Head Pub,Pub,1618 Yew St,btwn York Ave & 1st Ave W,49.271264,-123.154943,"[{'label': 'display', 'lat': 49.27126405934131...",2766,V6K 3E7,CA,Vancouver,BC,Canada,"[1618 Yew St (btwn York Ave & 1st Ave W), Vanc...",,50c2589fe4b0489424ed0c46
3,Darby's Pub,,2001 Macdonald Street,,49.268367,-123.168335,"[{'label': 'display', 'lat': 49.2683669, 'lng'...",2448,V6K 3Y2,CA,Vancouver,BC,Canada,"[2001 Macdonald Street, Vancouver BC V6K 3Y2]",,546d2cc9498e44fc86358f4e
4,Buru-Bin Japanese Pub,Japanese Restaurant,3089 Oak St,,49.261951,-123.127045,"[{'label': 'display', 'lat': 49.26195136007178...",3028,V6H 2K8,CA,Vancouver,BC,Canada,"[3089 Oak St, Vancouver BC V6H 2K8]",,51a6779e498e83f4ce88c0a2
