In [1]:
from dotenv import find_dotenv, load_dotenv

load_dotenv(find_dotenv())

True

In [72]:
from datetime import datetime
import json
import os
from typing import List, Optional

from azure.cosmos import CosmosClient
import h3
import pandas as pd
from requests import Session, HTTPError

In [28]:
yelp_client_id = os.getenv('YELP_CLIENT_ID')
yelp_api_key = os.getenv('YELP_API_KEY')

yelp_api_root = 'https://api.yelp.com/v3'
yelp_search_url = yelp_api_root + '/businesses/search'
yelp_details_url = yelp_api_root + '/businesses/'
h3_res = 9

loc_x, loc_y = -122.91479355798339, 47.042924037902935  # house in Olympia
# loc_x, loc_y = -105.4468809729821, 20.488120802895565  # where we stayed in Yelapa
search_radius = 1600

In [4]:
def add_h3_index(yelp_item: List[dict], h3_resolution: int = 9) -> List[dict]:
    
    # get the h3 index
    h3_idx = h3.latlng_to_cell(yelp_item.get('coordinates').get('latitude'), yelp_item.get('coordinates').get('longitude'), h3_resolution)
    
    # add the h3 index
    yelp_item[f'h3_{h3_resolution:02d}'] = h3_idx
    
    return yelp_item


def add_geometry(yelp_item: List[dict], format: str = 'esri') -> List[dict]:
    
    # get the coordinates
    coord_x = yelp_item.get('coordinates').get('longitude')
    coord_y = yelp_item.get('coordinates').get('latitude')
    
    # if esri, create ArcGIS Point geometry
    if format == 'arcgis':
        
        pt = Point({'x': coord_x, 'y': coord_y, 'spatialReference' : {'wkid' : 4326}})
    
    # if geojson, do that
    elif format == 'geojson':
        
        pt = {
          "type": "Point",
          "coordinates": [coord_x, coord_y]
        }

    # add to the Yelp item
    yelp_item['geometry'] = pt
    
    return yelp_item


def prune_yelp_item(yelp_item: dict, keep_keys: Optional[List[str]] = None) -> dict:

    # prune the dictionary if desired
    if keep_keys is None:
        keep_keys = ['id', 'name', 'image_url', 'url', 'review_count', 'categories', 'rating', 'price', 'address']
        

    # ensure geometry and h3 keys are retained
    keep_keys = [k for k in yelp_lst[0].keys() if k in keep_keys or k.startswith('h3_') or k == 'geometry']
    
    # prune the dictionary
    yelp_item = dict((k, yelp_item.get(k)) for k in keep_keys)

    return yelp_item

In [5]:
headers = {
    'accept': 'application/json',
    'Authorization': f'Bearer {yelp_api_key}'
}

In [6]:
yelp = Session()
yelp.headers = headers

In [59]:
params = {
    'limit': 50,
    'sort_by': 'distance',
    'radius': search_radius,  # straight-line distance in meters
    'longitude': loc_x,
    'latitude': loc_y,
    'term': 'food, bar'
}

In [70]:
res = yelp.get(yelp_business_search_url, params=params)

if res.status_code == 200:
    
    # get the list of businesses from the response
    yelp_lst = res.json().get('businesses')
    
    # add the level h3 level 7 through 11 indices
    for h3_lvl in range(7, 12):
        yelp_lst = [add_h3_index(yelp_itm, h3_lvl) for yelp_itm in yelp_lst]
        
    # add ArcGIS Python API point geometry
    yelp_lst = [add_geometry(yelp_itm, format="geojson") for yelp_itm in yelp_lst]
    
    # get a string formatted address for display
    for yelp_item in yelp_lst:
        loc = yelp_item.get('location')
        if loc is not None:
            yelp_item['address'] = ', '.join(loc.get('display_address'))
    
    # add retrieval timestamp
    dt_now = datetime.now().isoformat()
    for itm in yelp_lst:
        itm['retrieval_timestamp'] = dt_now

len(yelp_lst)

50

In [71]:
yelp_item = yelp_lst[6]

yelp_item

{'id': 'ZBp5SiXpapzCKm-GLsbirw',
 'alias': 'swing-wine-bar-and-cafe-olympia',
 'name': 'Swing Wine Bar & Cafe',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/Y-VXRm6MaNUULqHaN1RzvQ/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/swing-wine-bar-and-cafe-olympia?adjust_creative=ZAyA5LhP0CVLQ3sJwFX2SA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=ZAyA5LhP0CVLQ3sJwFX2SA',
 'review_count': 268,
 'categories': [{'alias': 'newamerican', 'title': 'New American'},
  {'alias': 'cocktailbars', 'title': 'Cocktail Bars'},
  {'alias': 'wine_bars', 'title': 'Wine Bars'}],
 'rating': 4.0,
 'coordinates': {'latitude': 47.04068, 'longitude': -122.902667},
 'transactions': ['delivery'],
 'price': '$$',
 'location': {'address1': '825 Columbia St SW',
  'address2': '',
  'address3': '',
  'city': 'Olympia',
  'zip_code': '98501',
  'country': 'US',
  'state': 'WA',
  'display_address': ['825 Columbia St SW', 'Olympia, WA 98501']},
 'phone': '+13603579464',
 '

In [62]:
biz_res = yelp.get(yelp_details_url + yelp_item.get('id'))

In [63]:
biz_res.url

'https://api.yelp.com/v3/businesses/ZBp5SiXpapzCKm-GLsbirw'

In [69]:
biz_res.json()

{'id': 'ZBp5SiXpapzCKm-GLsbirw',
 'alias': 'swing-wine-bar-and-cafe-olympia',
 'name': 'Swing Wine Bar & Cafe',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/Y-VXRm6MaNUULqHaN1RzvQ/o.jpg',
 'is_claimed': True,
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/swing-wine-bar-and-cafe-olympia?adjust_creative=ZAyA5LhP0CVLQ3sJwFX2SA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_lookup&utm_source=ZAyA5LhP0CVLQ3sJwFX2SA',
 'phone': '+13603579464',
 'display_phone': '(360) 357-9464',
 'review_count': 268,
 'categories': [{'alias': 'newamerican', 'title': 'New American'},
  {'alias': 'cocktailbars', 'title': 'Cocktail Bars'},
  {'alias': 'wine_bars', 'title': 'Wine Bars'}],
 'rating': 4.0,
 'location': {'address1': '825 Columbia St SW',
  'address2': '',
  'address3': '',
  'city': 'Olympia',
  'zip_code': '98501',
  'country': 'US',
  'state': 'WA',
  'display_address': ['825 Columbia St SW', 'Olympia, WA 98501'],
  'cross_streets': ''},
 'coordinates': {'latitude': 47.040

In [64]:
cosmos_url = os.getenv('COSMOS_URL')
cosmos_key = os.getenv('COSMOS_KEY')
cosmos_db = os.getenv('COSMOS_DB')
cosmos_container = os.getenv('COSMOS_CONTAINER')

client = CosmosClient(url=cosmos_url, credential=cosmos_key)
db = client.get_database_client('Yelp')
container = db.get_container_client('Items')

container

<ContainerProxy [dbs/Yelp/colls/Items]>

In [65]:
for itm in container.query_items('SELECT c.id FROM c', enable_cross_partition_query=True):
    itm_id = itm['id']
    container.delete_item(itm_id, itm_id)

In [66]:
for yelp_itm in yelp_lst:
    container.create_item(yelp_itm)