Skip to content
Permalink
Browse files

Rework advanced search options, and default search behavior (#833)

* Stop splitting words in a sentence by default when searching
* Add "Each Word" option to search all word in a sentence (old default behavior)
* Rename "Perfect march" to "Entire string", "Whole Words" to "Entire Words"
* Reword and reverse API promotion links
* Ignore words shorter than 2 characters when searching for each word
* Added tooltips to search options
  • Loading branch information
flodolo committed Jan 11, 2017
1 parent 6876a6d commit e53e11790605f6037b2ca02464a267dc92790a96
@@ -15,7 +15,7 @@
* Calls are like this:
* api/<version>/<service>/<repository>/<search type>/<source locale>/<target locale>/<url escaped search>/?optional_parameter1=foo&optional_parameter2=bar
* Example for an entity search containing bookmark:
* https://transvision.mozfr.org/api/v1/tm/release/entity/en-US/fr/bookmark/?case_sensitive=1
* https://transvision.mozfr.org/api/v1/tm/release/entity/en-US/fr/bookmark/?case_sensitive=case_sensitive
* (tm = translation memory service)
*
* Example for the list of locales supported for a repo:
@@ -245,7 +245,7 @@ private function isValidServiceCall($service)

break;
case 'search':
// ex: /api/v1/search/string/central/en-US/fr/Bookmark/?case_sensitive=1
// ex: /api/v1/search/string/central/en-US/fr/Bookmark/?case_sensitive=case_sensitive
if (! $this->verifyEnoughParameters(7)) {
return false;
}
@@ -10,9 +10,10 @@
* e.g.:
* $search = (new Search)
* ->setSearchTerms('Bookmark this page')
* ->setRegexWholeWords(true)
* ->setRegexCaseInsensitive(true)
* ->setRegexPerfectMatch(false)
* ->setRegexEntireString(false)
* ->setEachWord(false)
* ->setEntireWords(false)
* ->setRepository('release')
* ->setSearchType('strings')
* ->setLocales(['en-US', 'fr']);
@@ -38,16 +39,22 @@ class Search
protected $regex_case;

/**
* Consider the space separated string as a single word for search
* @var string
* Only return strings that entirely match the search (case excluded)
* @var boolean
*/
protected $regex_entire_string;

/**
* Only return strings where entire words match the search (case excluded)
* @var boolean
*/
protected $regex_whole_words;
protected $regex_entire_words;

/**
* Only return strings that match the search perfectly (case excluded)
* Set to search for each word in the query instead of using it as a whole.
* @var boolean
*/
protected $regex_perfect_match;
protected $each_word;

/**
* The search terms for the regex, these differ from $search_terms as
@@ -79,7 +86,8 @@ class Search
* @var array
*/
protected $form_search_options = [
'case_sensitive', 'perfect_match', 'repo', 'search_type', 't2t', 'whole_word',
'case_sensitive', 'entire_string', 'repo',
'search_type', 't2t', 'each_word', 'entire_words',
];

/**
@@ -102,8 +110,9 @@ public function __construct()
$this->search_terms = '';
$this->regex = '';
$this->regex_case = 'i';
$this->regex_whole_words = '';
$this->regex_perfect_match = false;
$this->regex_entire_string = false;
$this->regex_entire_words = false;
$this->each_word = false;
$this->regex_search_terms = '';
$this->repository = 'aurora'; // Most locales work on Aurora
$this->search_type = 'strings';
@@ -161,35 +170,51 @@ public function setRegexCaseInsensitive($flag)
}

/**
* Set the regex to only return perfect matches for the searched string.
* Set the regex to only return strings that entirely match the
* searched string.
* We cast the value to a boolean because we usually get it from a GET.
*
* @param boolean $flag Set to True for a perfect match
* @param boolean $flag Set to True for an entire string match
* @return $this
*/
public function setRegexPerfectMatch($flag)
public function setRegexEntireString($flag)
{
$this->regex_perfect_match = (boolean) $flag;
$this->regex_entire_string = (boolean) $flag;
$this->updateRegex();

return $this;
}

/**
* Set the regex so as that a multi-word search is taken as a single word.
* Set the regex to only return strings where entire words match
* the searched string.
* We cast the value to a boolean because we usually get it from a GET.
*
* @param boolean $flag A string evaluated to True will add \b to the regex
* @param boolean $flag Set to True for an entire words match
* @return $this
*/
public function setRegexWholeWords($flag)
public function setRegexEntireWords($flag)
{
$this->regex_whole_words = (boolean) $flag ? '\b' : '';
$this->regex_entire_words = (boolean) $flag ? '\b' : '';
$this->updateRegex();

return $this;
}

/**
* Set to search for each word in the query instead of using it as a whole.
* We cast the value to a boolean because we usually get it from a GET.
*
* @param boolean $flag Set to True to search for each word.
* @return $this
*/
public function setEachWord($flag)
{
$this->each_word = (boolean) $flag;

return $this;
}

/**
* Update the $regex_search_terms value every time a setter to the regex
* is called.
@@ -199,15 +224,15 @@ public function setRegexWholeWords($flag)
private function updateRegex()
{
$search = preg_quote($this->regex_search_terms);
if ($this->regex_perfect_match) {
if ($this->regex_entire_string) {
$search = "^{$search}$";
}

$this->regex =
'~'
. $this->regex_whole_words
. $this->regex_entire_words
. $search
. $this->regex_whole_words
. $this->regex_entire_words
. '~'
. $this->regex_case
. 'u';
@@ -226,53 +251,63 @@ public function getRegex()
}

/**
* Get the state of regex_perfect_match
* Get the state of regex_entire_string
*
* @return boolean True if the regex searches for a perfect string match
* @return boolean True if the regex searches for an entire string match
*/
public function isPerfectMatch()
public function isEntireString()
{
return $this->regex_perfect_match;
return $this->regex_entire_string;
}

/**
* Get search terms
* Get the state of each_word
*
* @return string Searched terms
* @return boolean True if the search should be for each word.
*/
public function getSearchTerms()
public function isEachWord()
{
return $this->search_terms;
return $this->each_word;
}

/**
* Get search terms in regex
* Get the state of entire_words
*
* @return string Searched terms in regex
* @return boolean True if the search should be only for entire word.
*/
public function getRegexSearchTerms()
public function isEntireWords()
{
return $this->regex_search_terms;
return $this->regex_entire_words == '\b' ? true : false;
}

/**
* Get the regex case
* Get the state of case_sensitive
*
* @return string Return 'i' for case insensitive search, '' for sensitive
* @return boolean False if the search should be case sensitive
*/
public function getRegexCase()
public function isCaseSensitive()
{
return $this->regex_case;
return $this->regex_case == 'i' ? false : true;
}

/**
* Get the regex whole words
* Get search terms
*
* @return boolean True if we have the 'whole words' option for the regex
* @return string Searched terms
*/
public function isWholeWords()
public function getSearchTerms()
{
return $this->regex_whole_words;
return $this->search_terms;
}

/**
* Get search terms in regex
*
* @return string Searched terms in regex
*/
public function getRegexSearchTerms()
{
return $this->regex_search_terms;
}

/**
@@ -270,10 +270,10 @@ public static function resultsTable($search_object, $search_results, $page)
</thead>
<tbody>\n";

if (! $search_object->isWholeWords() && ! $search_object->isPerfectMatch()) {
$search = Utils::uniqueWords($search_object->getSearchTerms());
if ($search_object->isEachWord()) {
$search_terms = Utils::uniqueWords($search_object->getSearchTerms());
} else {
$search = [$search_object->getSearchTerms()];
$search_terms = [$search_object->getSearchTerms()];
}

$current_repo = $search_object->getRepository();
@@ -284,7 +284,7 @@ public static function resultsTable($search_object, $search_results, $page)
if ($search_object->getSearchType() == 'strings') {
$result_entity = self::formatEntity($key);
} else {
$result_entity = self::formatEntity($key, $search[0]);
$result_entity = self::formatEntity($key, $search_terms[0]);
}

$component = explode('/', $key)[0];
@@ -296,7 +296,7 @@ public static function resultsTable($search_object, $search_results, $page)
. "&locale={$locale2}"
. "&repo={$current_repo}"
. "&search_type=entities&recherche={$key}"
. "&perfect_match=perfect_match";
. "&entire_string=entire_string";

$bz_link = [Bugzilla::reportErrorLink(
$locale2, $key, $source_string, $target_string, $current_repo, $entity_link
@@ -308,7 +308,7 @@ public static function resultsTable($search_object, $search_results, $page)
. "&locale={$search_object->getLocale('extra')}"
. "&repo={$current_repo}"
. "&search_type=entities&recherche={$key}"
. "&perfect_match=perfect_match";
. "&entire_string=entire_string";
$bz_link[] = Bugzilla::reportErrorLink(
$search_object->getLocale('extra'), $key, $source_string, $target_string2, $current_repo, $entity_link
);
@@ -338,14 +338,14 @@ public static function resultsTable($search_object, $search_results, $page)
$transliterate_string_id = 'transliterate_' . $string_id;
}

foreach ($search as $val) {
$source_string = Strings::markString($val, $source_string);
$target_string = Strings::markString($val, $target_string);
foreach ($search_terms as $search_term) {
$source_string = Strings::markString($search_term, $source_string);
$target_string = Strings::markString($search_term, $target_string);
if ($extra_locale) {
$target_string2 = Strings::markString($val, $target_string2);
$target_string2 = Strings::markString($search_term, $target_string2);
}
if ($transliterate) {
$transliterated_string = Strings::markString($val, $transliterated_string);
$transliterated_string = Strings::markString($search_term, $transliterated_string);
}
}

@@ -531,7 +531,7 @@ public static function searchEntities($source_strings, $regex)

/*
If there are no results, search also through the entity names.
This is needed for "perfect match" when only the entity name is
This is needed for "entire string" when only the entity name is
provided.
*/
if (empty($entities)) {
@@ -124,7 +124,8 @@ public static function printSimpleTable(
}

/**
* Split a sentence in words from longest to shortest
* Split a sentence in words from longest to shortest, ignoring
* words shorter than 2 characters.
*
* @param string $sentence
* @return array all the words in the sentence sorted by length
@@ -133,7 +134,14 @@ public static function uniqueWords($sentence)
{
$words = explode(' ', $sentence);
$words = array_filter($words); // Filter out extra spaces
$words = array_unique($words); // Remove duplicate words
// Filter out 1-character words
$words = array_filter($words, function ($a) {
return (mb_strlen($a) >= 2);
});

// Remove duplicate words
$words = array_unique($words);

// Sort words from longest to shortest
usort(
$words,
@@ -444,8 +452,8 @@ public static function APIPromotion($source_locale, $target_locale)
array_map($sanitize, array_values($args))
);

$args['locale'] = $source_locale;
$args['sourcelocale'] = $target_locale;
$args['locale'] = $target_locale;
$args['sourcelocale'] = $source_locale;
$args['json'] = 'true';

// We don't want to encode slashes in searches for entity names
@@ -31,9 +31,10 @@
: '';

$regex = [];
$regex['whole'] = isset($_GET['whole_word']) ? 'whole_word=1' : '';
$regex['case'] = isset($_GET['case_sensitive']) ? 'case_sensitive=1' : '';
$regex['perfect'] = isset($_GET['perfect_match']) ? 'perfect_match=1' : '';
$regex['each_word'] = isset($_GET['each_word']) ? 'each_word=each_word' : '';
$regex['case_sensitive'] = isset($_GET['case_sensitive']) ? 'case_sensitive=case_sensitive' : '';
$regex['entire_string'] = isset($_GET['entire_string']) ? 'entire_string=entire_string' : '';
$regex['entire_words'] = isset($_GET['entire_words']) ? 'entire_words=entire_words' : '';
$regex = array_filter($regex);
$regex = count($regex) > 0 ? '?' . implode('&', $regex) : '';

@@ -28,9 +28,10 @@
// Define our regex and search parameters
$search
->setSearchTerms($my_search)
->setRegexWholeWords($check['whole_word'])
->setEachWord($check['each_word'])
->setRegexCaseInsensitive($check['case_sensitive'])
->setRegexPerfectMatch($check['perfect_match'])
->setRegexEntireString($check['entire_string'])
->setRegexEntireWords($check['entire_words'])
->setRepository($repo)
->setSearchType($search_type)
->setLocales([$source_locale, $locale, $locale2]);
@@ -3,11 +3,16 @@

$tmx_target2 = Utils::getRepoStrings($locale2, $search->getRepository());

if ($search->isPerfectMatch()) {
if ($search->isEntireString()) {
$locale3_strings = $search->grep($tmx_target2);
} else {
$locale3_strings = $tmx_target2;
foreach (Utils::uniqueWords($search->getSearchTerms()) as $word) {

$search_terms = $search->isEachWord()
? Utils::uniqueWords($search->getSearchTerms())
: [$search->getSearchTerms()];

foreach ($search_terms as $word) {
$search->setRegexSearchTerms($word);
$locale3_strings = $search->grep($locale3_strings);
}

0 comments on commit e53e117

Please sign in to comment.
You can’t perform that action at this time.