Permalink
Browse files

Rework advanced search options, and default search behavior (#833)

* Stop splitting words in a sentence by default when searching
* Add "Each Word" option to search all word in a sentence (old default behavior)
* Rename "Perfect march" to "Entire string", "Whole Words" to "Entire Words"
* Reword and reverse API promotion links
* Ignore words shorter than 2 characters when searching for each word
* Added tooltips to search options
  • Loading branch information...
flodolo committed Jan 11, 2017
1 parent 6876a6d commit e53e11790605f6037b2ca02464a267dc92790a96
@@ -15,7 +15,7 @@
* Calls are like this:
* api/<version>/<service>/<repository>/<search type>/<source locale>/<target locale>/<url escaped search>/?optional_parameter1=foo&optional_parameter2=bar
* Example for an entity search containing bookmark:
- * https://transvision.mozfr.org/api/v1/tm/release/entity/en-US/fr/bookmark/?case_sensitive=1
+ * https://transvision.mozfr.org/api/v1/tm/release/entity/en-US/fr/bookmark/?case_sensitive=case_sensitive
* (tm = translation memory service)
*
* Example for the list of locales supported for a repo:
@@ -245,7 +245,7 @@ private function isValidServiceCall($service)
break;
case 'search':
- // ex: /api/v1/search/string/central/en-US/fr/Bookmark/?case_sensitive=1
+ // ex: /api/v1/search/string/central/en-US/fr/Bookmark/?case_sensitive=case_sensitive
if (! $this->verifyEnoughParameters(7)) {
return false;
}
@@ -10,9 +10,10 @@
* e.g.:
* $search = (new Search)
* ->setSearchTerms('Bookmark this page')
- * ->setRegexWholeWords(true)
* ->setRegexCaseInsensitive(true)
- * ->setRegexPerfectMatch(false)
+ * ->setRegexEntireString(false)
+ * ->setEachWord(false)
+ * ->setEntireWords(false)
* ->setRepository('release')
* ->setSearchType('strings')
* ->setLocales(['en-US', 'fr']);
@@ -38,16 +39,22 @@ class Search
protected $regex_case;
/**
- * Consider the space separated string as a single word for search
- * @var string
+ * Only return strings that entirely match the search (case excluded)
+ * @var boolean
+ */
+ protected $regex_entire_string;
+
+ /**
+ * Only return strings where entire words match the search (case excluded)
+ * @var boolean
*/
- protected $regex_whole_words;
+ protected $regex_entire_words;
/**
- * Only return strings that match the search perfectly (case excluded)
+ * Set to search for each word in the query instead of using it as a whole.
* @var boolean
*/
- protected $regex_perfect_match;
+ protected $each_word;
/**
* The search terms for the regex, these differ from $search_terms as
@@ -79,7 +86,8 @@ class Search
* @var array
*/
protected $form_search_options = [
- 'case_sensitive', 'perfect_match', 'repo', 'search_type', 't2t', 'whole_word',
+ 'case_sensitive', 'entire_string', 'repo',
+ 'search_type', 't2t', 'each_word', 'entire_words',
];
/**
@@ -102,8 +110,9 @@ public function __construct()
$this->search_terms = '';
$this->regex = '';
$this->regex_case = 'i';
- $this->regex_whole_words = '';
- $this->regex_perfect_match = false;
+ $this->regex_entire_string = false;
+ $this->regex_entire_words = false;
+ $this->each_word = false;
$this->regex_search_terms = '';
$this->repository = 'aurora'; // Most locales work on Aurora
$this->search_type = 'strings';
@@ -161,35 +170,51 @@ public function setRegexCaseInsensitive($flag)
}
/**
- * Set the regex to only return perfect matches for the searched string.
+ * Set the regex to only return strings that entirely match the
+ * searched string.
* We cast the value to a boolean because we usually get it from a GET.
*
- * @param boolean $flag Set to True for a perfect match
+ * @param boolean $flag Set to True for an entire string match
* @return $this
*/
- public function setRegexPerfectMatch($flag)
+ public function setRegexEntireString($flag)
{
- $this->regex_perfect_match = (boolean) $flag;
+ $this->regex_entire_string = (boolean) $flag;
$this->updateRegex();
return $this;
}
/**
- * Set the regex so as that a multi-word search is taken as a single word.
+ * Set the regex to only return strings where entire words match
+ * the searched string.
* We cast the value to a boolean because we usually get it from a GET.
*
- * @param boolean $flag A string evaluated to True will add \b to the regex
+ * @param boolean $flag Set to True for an entire words match
* @return $this
*/
- public function setRegexWholeWords($flag)
+ public function setRegexEntireWords($flag)
{
- $this->regex_whole_words = (boolean) $flag ? '\b' : '';
+ $this->regex_entire_words = (boolean) $flag ? '\b' : '';
$this->updateRegex();
return $this;
}
+ /**
+ * Set to search for each word in the query instead of using it as a whole.
+ * We cast the value to a boolean because we usually get it from a GET.
+ *
+ * @param boolean $flag Set to True to search for each word.
+ * @return $this
+ */
+ public function setEachWord($flag)
+ {
+ $this->each_word = (boolean) $flag;
+
+ return $this;
+ }
+
/**
* Update the $regex_search_terms value every time a setter to the regex
* is called.
@@ -199,15 +224,15 @@ public function setRegexWholeWords($flag)
private function updateRegex()
{
$search = preg_quote($this->regex_search_terms);
- if ($this->regex_perfect_match) {
+ if ($this->regex_entire_string) {
$search = "^{$search}$";
}
$this->regex =
'~'
- . $this->regex_whole_words
+ . $this->regex_entire_words
. $search
- . $this->regex_whole_words
+ . $this->regex_entire_words
. '~'
. $this->regex_case
. 'u';
@@ -226,53 +251,63 @@ public function getRegex()
}
/**
- * Get the state of regex_perfect_match
+ * Get the state of regex_entire_string
*
- * @return boolean True if the regex searches for a perfect string match
+ * @return boolean True if the regex searches for an entire string match
*/
- public function isPerfectMatch()
+ public function isEntireString()
{
- return $this->regex_perfect_match;
+ return $this->regex_entire_string;
}
/**
- * Get search terms
+ * Get the state of each_word
*
- * @return string Searched terms
+ * @return boolean True if the search should be for each word.
*/
- public function getSearchTerms()
+ public function isEachWord()
{
- return $this->search_terms;
+ return $this->each_word;
}
/**
- * Get search terms in regex
+ * Get the state of entire_words
*
- * @return string Searched terms in regex
+ * @return boolean True if the search should be only for entire word.
*/
- public function getRegexSearchTerms()
+ public function isEntireWords()
{
- return $this->regex_search_terms;
+ return $this->regex_entire_words == '\b' ? true : false;
}
/**
- * Get the regex case
+ * Get the state of case_sensitive
*
- * @return string Return 'i' for case insensitive search, '' for sensitive
+ * @return boolean False if the search should be case sensitive
*/
- public function getRegexCase()
+ public function isCaseSensitive()
{
- return $this->regex_case;
+ return $this->regex_case == 'i' ? false : true;
}
/**
- * Get the regex whole words
+ * Get search terms
*
- * @return boolean True if we have the 'whole words' option for the regex
+ * @return string Searched terms
*/
- public function isWholeWords()
+ public function getSearchTerms()
{
- return $this->regex_whole_words;
+ return $this->search_terms;
+ }
+
+ /**
+ * Get search terms in regex
+ *
+ * @return string Searched terms in regex
+ */
+ public function getRegexSearchTerms()
+ {
+ return $this->regex_search_terms;
}
/**
@@ -270,10 +270,10 @@ public static function resultsTable($search_object, $search_results, $page)
</thead>
<tbody>\n";
- if (! $search_object->isWholeWords() && ! $search_object->isPerfectMatch()) {
- $search = Utils::uniqueWords($search_object->getSearchTerms());
+ if ($search_object->isEachWord()) {
+ $search_terms = Utils::uniqueWords($search_object->getSearchTerms());
} else {
- $search = [$search_object->getSearchTerms()];
+ $search_terms = [$search_object->getSearchTerms()];
}
$current_repo = $search_object->getRepository();
@@ -284,7 +284,7 @@ public static function resultsTable($search_object, $search_results, $page)
if ($search_object->getSearchType() == 'strings') {
$result_entity = self::formatEntity($key);
} else {
- $result_entity = self::formatEntity($key, $search[0]);
+ $result_entity = self::formatEntity($key, $search_terms[0]);
}
$component = explode('/', $key)[0];
@@ -296,7 +296,7 @@ public static function resultsTable($search_object, $search_results, $page)
. "&locale={$locale2}"
. "&repo={$current_repo}"
. "&search_type=entities&recherche={$key}"
- . "&perfect_match=perfect_match";
+ . "&entire_string=entire_string";
$bz_link = [Bugzilla::reportErrorLink(
$locale2, $key, $source_string, $target_string, $current_repo, $entity_link
@@ -308,7 +308,7 @@ public static function resultsTable($search_object, $search_results, $page)
. "&locale={$search_object->getLocale('extra')}"
. "&repo={$current_repo}"
. "&search_type=entities&recherche={$key}"
- . "&perfect_match=perfect_match";
+ . "&entire_string=entire_string";
$bz_link[] = Bugzilla::reportErrorLink(
$search_object->getLocale('extra'), $key, $source_string, $target_string2, $current_repo, $entity_link
);
@@ -338,14 +338,14 @@ public static function resultsTable($search_object, $search_results, $page)
$transliterate_string_id = 'transliterate_' . $string_id;
}
- foreach ($search as $val) {
- $source_string = Strings::markString($val, $source_string);
- $target_string = Strings::markString($val, $target_string);
+ foreach ($search_terms as $search_term) {
+ $source_string = Strings::markString($search_term, $source_string);
+ $target_string = Strings::markString($search_term, $target_string);
if ($extra_locale) {
- $target_string2 = Strings::markString($val, $target_string2);
+ $target_string2 = Strings::markString($search_term, $target_string2);
}
if ($transliterate) {
- $transliterated_string = Strings::markString($val, $transliterated_string);
+ $transliterated_string = Strings::markString($search_term, $transliterated_string);
}
}
@@ -531,7 +531,7 @@ public static function searchEntities($source_strings, $regex)
/*
If there are no results, search also through the entity names.
- This is needed for "perfect match" when only the entity name is
+ This is needed for "entire string" when only the entity name is
provided.
*/
if (empty($entities)) {
@@ -124,7 +124,8 @@ public static function printSimpleTable(
}
/**
- * Split a sentence in words from longest to shortest
+ * Split a sentence in words from longest to shortest, ignoring
+ * words shorter than 2 characters.
*
* @param string $sentence
* @return array all the words in the sentence sorted by length
@@ -133,7 +134,14 @@ public static function uniqueWords($sentence)
{
$words = explode(' ', $sentence);
$words = array_filter($words); // Filter out extra spaces
- $words = array_unique($words); // Remove duplicate words
+ // Filter out 1-character words
+ $words = array_filter($words, function ($a) {
+ return (mb_strlen($a) >= 2);
+ });
+
+ // Remove duplicate words
+ $words = array_unique($words);
+
// Sort words from longest to shortest
usort(
$words,
@@ -444,8 +452,8 @@ public static function APIPromotion($source_locale, $target_locale)
array_map($sanitize, array_values($args))
);
- $args['locale'] = $source_locale;
- $args['sourcelocale'] = $target_locale;
+ $args['locale'] = $target_locale;
+ $args['sourcelocale'] = $source_locale;
$args['json'] = 'true';
// We don't want to encode slashes in searches for entity names
@@ -31,9 +31,10 @@
: '';
$regex = [];
- $regex['whole'] = isset($_GET['whole_word']) ? 'whole_word=1' : '';
- $regex['case'] = isset($_GET['case_sensitive']) ? 'case_sensitive=1' : '';
- $regex['perfect'] = isset($_GET['perfect_match']) ? 'perfect_match=1' : '';
+ $regex['each_word'] = isset($_GET['each_word']) ? 'each_word=each_word' : '';
+ $regex['case_sensitive'] = isset($_GET['case_sensitive']) ? 'case_sensitive=case_sensitive' : '';
+ $regex['entire_string'] = isset($_GET['entire_string']) ? 'entire_string=entire_string' : '';
+ $regex['entire_words'] = isset($_GET['entire_words']) ? 'entire_words=entire_words' : '';
$regex = array_filter($regex);
$regex = count($regex) > 0 ? '?' . implode('&', $regex) : '';
@@ -28,9 +28,10 @@
// Define our regex and search parameters
$search
->setSearchTerms($my_search)
- ->setRegexWholeWords($check['whole_word'])
+ ->setEachWord($check['each_word'])
->setRegexCaseInsensitive($check['case_sensitive'])
- ->setRegexPerfectMatch($check['perfect_match'])
+ ->setRegexEntireString($check['entire_string'])
+ ->setRegexEntireWords($check['entire_words'])
->setRepository($repo)
->setSearchType($search_type)
->setLocales([$source_locale, $locale, $locale2]);
@@ -3,11 +3,16 @@
$tmx_target2 = Utils::getRepoStrings($locale2, $search->getRepository());
-if ($search->isPerfectMatch()) {
+if ($search->isEntireString()) {
$locale3_strings = $search->grep($tmx_target2);
} else {
$locale3_strings = $tmx_target2;
- foreach (Utils::uniqueWords($search->getSearchTerms()) as $word) {
+
+ $search_terms = $search->isEachWord()
+ ? Utils::uniqueWords($search->getSearchTerms())
+ : [$search->getSearchTerms()];
+
+ foreach ($search_terms as $word) {
$search->setRegexSearchTerms($word);
$locale3_strings = $search->grep($locale3_strings);
}
Oops, something went wrong.

0 comments on commit e53e117

Please sign in to comment.