Skip to content

Commit

Permalink
Issue mozilla-mobile#25: Add yelp->ta verification fn.
Browse files Browse the repository at this point in the history
This will allow devs to check out yelp places in different areas to see how
well TA matches.
  • Loading branch information
mcomella committed Jan 28, 2017
1 parent acb2822 commit 0584889
Showing 1 changed file with 49 additions and 2 deletions.
51 changes: 49 additions & 2 deletions scripts/prox_crosswalk.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,22 @@
Intended to be used from a REPL. Primary functions are:
- yelp_ids_to_tripadvisor_ids
- (for debugging) verify_yelp_id_to_tripadvisor_ids
- write_to_db
TODO:
- Handle hitting API limits
- Handle network errors (e.g. TA API)
- TEST ACCURACY
- Make it easy to run (read place ids from file?)
- Make it easy to run (read place ids from file? store intermediate xw into file? then try atomic update?)
- Make this code re-usable and put into separate files.
- How does this handle incorrect Yelp places?
- Keeps throwing 504: Gateway Time-out from Yelp3Client (rate limit?).
"""
from __future__ import print_function

from app import util
from app.constants import _tablePrefix
from app.providers import tripadvisor as ta
from app.providers import yelp
Expand All @@ -29,12 +35,16 @@
'wikipedia',
}

# HACK: used to avoid duplicating Yelp place requests.
_YELP_ID_TO_PLACE_CACHE = {}


def _get_crosswalk_db(): return _firebase.database().child(_CROSSWALK_PATH)


def _yelp_id_to_tripadvisor(yelp_id):
place = yelp.resolve_with_key(yelp_id)
place = _YELP_ID_TO_PLACE_CACHE.get(yelp_id, yelp.resolve_with_key(yelp_id))
_YELP_ID_TO_PLACE_CACHE[yelp_id] = place
name = place['name']
coord = place['coordinates']
coord_tuple = (coord['latitude'], coord['longitude'])
Expand Down Expand Up @@ -130,3 +140,40 @@ def add_ids_to_dict(provider_key, yelp_to_other_id):

for yelp_id, provider_map in crosswalk.iteritems():
_write_crosswalk_to_db(yelp_id, provider_map)


def verify_yelp_ids_to_tripadvisor_ids(yelp_ids):
"""Returns the expanded output of 1) Yelp -> TA places that match to allow human verification that
they're the same places and 2) Yelp places that did not match a TA equivalent to allow humans to find
out why not!
It is recommended to `pprint` the results.
:return: {'not_missing_ta': {'yelp': <yelp-place-obj>,
'ta': [<ta-place-obj>, ...]},
'missing_ta': [<yelp-place-obj>, ...]}
"""
tas = _yelp_ids_to_raw_tripadvisor(yelp_ids)
missing_out = []
not_missing_out = []
for yelp_id, ta in tas.iteritems():
yplace = _YELP_ID_TO_PLACE_CACHE.get(yelp_id, yelp.resolve_with_key(yelp_id)) # These should all be cached.
yout = {'name': yplace['name'],
'url': util.strip_url_params(yplace['url']),
'loc': ', '.join(yplace['location']['display_address'])}
if len(ta) < 1:
missing_out.append(yout)
continue

tout = []
for ta_place in ta:
tout.append({'name': ta_place['name'],
'id': ta_place['location_id'],
'distance': ta_place['distance'],
'loc': ta_place['address_obj'].get('address_string', '')})

val = {'yelp': yout, 'ta': tout}
not_missing_out.append(val)

return {'not_missing_ta': not_missing_out,
'missing_ta': missing_out}

0 comments on commit 0584889

Please sign in to comment.