Skip to content

Commit

Permalink
Issue mozilla-mobile#25: Add accent correction for yelp->TA queries; …
Browse files Browse the repository at this point in the history
…add improvements notes.

See code comments for details and improvement notes.
  • Loading branch information
mcomella committed Jan 28, 2017
1 parent 517229d commit acb2822
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 2 deletions.
36 changes: 34 additions & 2 deletions app/providers/tripadvisor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# coding=utf-8

import re
import requests

from app import util
from app.clients import tripadvisorkey

idPattern = re.compile("-d([0-9]*)-")
Expand All @@ -19,7 +22,9 @@ def resolve(idObj):


TRIP_ADVISOR_LOC_MAPPER_API = 'http://api.tripadvisor.com/api/partner/2.0/location_mapper/{}'
LOC_MAPPER_DEFAULT_PARAMS = { 'key': tripadvisorkey + '-mapper' }


def _get_loc_mapper_default_params(): return { 'key': tripadvisorkey + '-mapper' }


def search(coord, query):
Expand All @@ -41,6 +46,33 @@ def search(coord, query):
coord_str = ','.join([str(x) for x in coord])
url = TRIP_ADVISOR_LOC_MAPPER_API.format(coord_str)

params = dict(LOC_MAPPER_DEFAULT_PARAMS)
params = _get_loc_mapper_default_params()
params['q'] = query
res = requests.get(url, params).json()
if not _search_query_has_results(res) and util.str_contains_accents(query):
res = _search_without_accents(query, url)

# TODO: Further improve results by querying TA without a query_str and doing our own
# name matching on the results. For example, "Mariposa Baking" (from Yelp) finds no matches
# despite "Mariposa" being on TA. For a full list of relevant places, see `docs/yelp_ta_name_mismatches.yml`.
#
# Implementation notes:
# - TA will return at most 10 results
# - A place won't necessarily exist in TA (or at that location, e.g. Señor Sisig is a food truck).
# - Custom fuzzy matching may introduce more error, making a trade-off between some error & missing data.
return res


def _search_without_accents(query, url):
"""Searches TA location_mapper API, removing the query string's accents.
Why would you want to? Yelp place names are frequently listed with accents - TA are not, potentially causing
name mismatches. I've found removing accents corrects some places (e.g. La Mar Cebichería Peruana).
"""
accent_stripped_str = util.strip_accents(unicode(query))
params = _get_loc_mapper_default_params()
params['q'] = accent_stripped_str
return requests.get(url, params).json()


def _search_query_has_results(res): return len(res['data']) > 0
60 changes: 60 additions & 0 deletions docs/yelp_ta_name_mismatches.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
---
# A list of places that exist on TripAdvisor but are not found when putting a
# Yelp place name directly into the TA location_mapper/ API. See
# `app.providers.tripadvisor.search` for motivations for this list.
#
# Yelp place names come first (even indices), followed by the associated
# TripAdvisor name (odd indices).
#
# Consider using this file for unit testing. :) Not that some names may not be
# correctable.

sf:
- Frog Hollow Farm Market & Cafe
- Frog Hollow Farm
- Mariposa Baking
- Mariposa
- Mourad Restaurant
- Mourad
- DragonEats
- dragonEats
- Blue Bottle Coffee Stand
- Blue Bottle Coffee Ferry Building

yvr:
- Guu with Otokomae
- Otokomae at Kitanoya Guu
- Canra Srilankan Cuisine
- Canra Sri Lankan Plus
- Nuba Gastown
- Nuba Restaurant
- The Sardine Can
- Sardine Can
- Jam Cafe on Beatty
- Jam Cafe YVR
- Brioche Urban Baking & Catering
- Brioche
- Finch's Tea & Coffee House
- Finch’s Teahouse
- La Taqueria Pinche Taco Shop
- La Taqueria

chicago:
- Fremont Chicago
- Fremont
- Osaka Sushi Express & Fresh Fruit Smoothies
- Osaka Sushi Express
- MingHin Cuisine
- Ming Hin Cuisine
- Eddie Vs Prime Seafood and Steak
- Eddie V’s Prime Seafood
- Peach And Green
- Peach & Green
- Volare Ristorante Italiano
- Volare
- Cherry Circle Room
- Cherry Circle
- Brightwok Kitchen
- Brightwok
- Doc Bs Fresh Kitchen - River North
- Doc B’s Fresh Kitchen

0 comments on commit acb2822

Please sign in to comment.