Skip to content

Commit

Permalink
Rework advanced search options, and default search behavior (#833)
Browse files Browse the repository at this point in the history
* Stop splitting words in a sentence by default when searching
* Add "Each Word" option to search all word in a sentence (old default behavior)
* Rename "Perfect march" to "Entire string", "Whole Words" to "Entire Words"
* Reword and reverse API promotion links
* Ignore words shorter than 2 characters when searching for each word
* Added tooltips to search options
  • Loading branch information
flodolo authored Jan 11, 2017
1 parent 6876a6d commit e53e117
Show file tree
Hide file tree
Showing 27 changed files with 353 additions and 188 deletions.
4 changes: 2 additions & 2 deletions app/classes/Transvision/API.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* Calls are like this:
* api/<version>/<service>/<repository>/<search type>/<source locale>/<target locale>/<url escaped search>/?optional_parameter1=foo&optional_parameter2=bar
* Example for an entity search containing bookmark:
* https://transvision.mozfr.org/api/v1/tm/release/entity/en-US/fr/bookmark/?case_sensitive=1
* https://transvision.mozfr.org/api/v1/tm/release/entity/en-US/fr/bookmark/?case_sensitive=case_sensitive
* (tm = translation memory service)
*
* Example for the list of locales supported for a repo:
Expand Down Expand Up @@ -245,7 +245,7 @@ private function isValidServiceCall($service)

break;
case 'search':
// ex: /api/v1/search/string/central/en-US/fr/Bookmark/?case_sensitive=1
// ex: /api/v1/search/string/central/en-US/fr/Bookmark/?case_sensitive=case_sensitive
if (! $this->verifyEnoughParameters(7)) {
return false;
}
Expand Down
117 changes: 76 additions & 41 deletions app/classes/Transvision/Search.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
* e.g.:
* $search = (new Search)
* ->setSearchTerms('Bookmark this page')
* ->setRegexWholeWords(true)
* ->setRegexCaseInsensitive(true)
* ->setRegexPerfectMatch(false)
* ->setRegexEntireString(false)
* ->setEachWord(false)
* ->setEntireWords(false)
* ->setRepository('release')
* ->setSearchType('strings')
* ->setLocales(['en-US', 'fr']);
Expand All @@ -38,16 +39,22 @@ class Search
protected $regex_case;

/**
* Consider the space separated string as a single word for search
* @var string
* Only return strings that entirely match the search (case excluded)
* @var boolean
*/
protected $regex_entire_string;

/**
* Only return strings where entire words match the search (case excluded)
* @var boolean
*/
protected $regex_whole_words;
protected $regex_entire_words;

/**
* Only return strings that match the search perfectly (case excluded)
* Set to search for each word in the query instead of using it as a whole.
* @var boolean
*/
protected $regex_perfect_match;
protected $each_word;

/**
* The search terms for the regex, these differ from $search_terms as
Expand Down Expand Up @@ -79,7 +86,8 @@ class Search
* @var array
*/
protected $form_search_options = [
'case_sensitive', 'perfect_match', 'repo', 'search_type', 't2t', 'whole_word',
'case_sensitive', 'entire_string', 'repo',
'search_type', 't2t', 'each_word', 'entire_words',
];

/**
Expand All @@ -102,8 +110,9 @@ public function __construct()
$this->search_terms = '';
$this->regex = '';
$this->regex_case = 'i';
$this->regex_whole_words = '';
$this->regex_perfect_match = false;
$this->regex_entire_string = false;
$this->regex_entire_words = false;
$this->each_word = false;
$this->regex_search_terms = '';
$this->repository = 'aurora'; // Most locales work on Aurora
$this->search_type = 'strings';
Expand Down Expand Up @@ -161,35 +170,51 @@ public function setRegexCaseInsensitive($flag)
}

/**
* Set the regex to only return perfect matches for the searched string.
* Set the regex to only return strings that entirely match the
* searched string.
* We cast the value to a boolean because we usually get it from a GET.
*
* @param boolean $flag Set to True for a perfect match
* @param boolean $flag Set to True for an entire string match
* @return $this
*/
public function setRegexPerfectMatch($flag)
public function setRegexEntireString($flag)
{
$this->regex_perfect_match = (boolean) $flag;
$this->regex_entire_string = (boolean) $flag;
$this->updateRegex();

return $this;
}

/**
* Set the regex so as that a multi-word search is taken as a single word.
* Set the regex to only return strings where entire words match
* the searched string.
* We cast the value to a boolean because we usually get it from a GET.
*
* @param boolean $flag A string evaluated to True will add \b to the regex
* @param boolean $flag Set to True for an entire words match
* @return $this
*/
public function setRegexWholeWords($flag)
public function setRegexEntireWords($flag)
{
$this->regex_whole_words = (boolean) $flag ? '\b' : '';
$this->regex_entire_words = (boolean) $flag ? '\b' : '';
$this->updateRegex();

return $this;
}

/**
* Set to search for each word in the query instead of using it as a whole.
* We cast the value to a boolean because we usually get it from a GET.
*
* @param boolean $flag Set to True to search for each word.
* @return $this
*/
public function setEachWord($flag)
{
$this->each_word = (boolean) $flag;

return $this;
}

/**
* Update the $regex_search_terms value every time a setter to the regex
* is called.
Expand All @@ -199,15 +224,15 @@ public function setRegexWholeWords($flag)
private function updateRegex()
{
$search = preg_quote($this->regex_search_terms);
if ($this->regex_perfect_match) {
if ($this->regex_entire_string) {
$search = "^{$search}$";
}

$this->regex =
'~'
. $this->regex_whole_words
. $this->regex_entire_words
. $search
. $this->regex_whole_words
. $this->regex_entire_words
. '~'
. $this->regex_case
. 'u';
Expand All @@ -226,53 +251,63 @@ public function getRegex()
}

/**
* Get the state of regex_perfect_match
* Get the state of regex_entire_string
*
* @return boolean True if the regex searches for a perfect string match
* @return boolean True if the regex searches for an entire string match
*/
public function isPerfectMatch()
public function isEntireString()
{
return $this->regex_perfect_match;
return $this->regex_entire_string;
}

/**
* Get search terms
* Get the state of each_word
*
* @return string Searched terms
* @return boolean True if the search should be for each word.
*/
public function getSearchTerms()
public function isEachWord()
{
return $this->search_terms;
return $this->each_word;
}

/**
* Get search terms in regex
* Get the state of entire_words
*
* @return string Searched terms in regex
* @return boolean True if the search should be only for entire word.
*/
public function getRegexSearchTerms()
public function isEntireWords()
{
return $this->regex_search_terms;
return $this->regex_entire_words == '\b' ? true : false;
}

/**
* Get the regex case
* Get the state of case_sensitive
*
* @return string Return 'i' for case insensitive search, '' for sensitive
* @return boolean False if the search should be case sensitive
*/
public function getRegexCase()
public function isCaseSensitive()
{
return $this->regex_case;
return $this->regex_case == 'i' ? false : true;
}

/**
* Get the regex whole words
* Get search terms
*
* @return boolean True if we have the 'whole words' option for the regex
* @return string Searched terms
*/
public function isWholeWords()
public function getSearchTerms()
{
return $this->regex_whole_words;
return $this->search_terms;
}

/**
* Get search terms in regex
*
* @return string Searched terms in regex
*/
public function getRegexSearchTerms()
{
return $this->regex_search_terms;
}

/**
Expand Down
24 changes: 12 additions & 12 deletions app/classes/Transvision/ShowResults.php
Original file line number Diff line number Diff line change
Expand Up @@ -270,10 +270,10 @@ public static function resultsTable($search_object, $search_results, $page)
</thead>
<tbody>\n";

if (! $search_object->isWholeWords() && ! $search_object->isPerfectMatch()) {
$search = Utils::uniqueWords($search_object->getSearchTerms());
if ($search_object->isEachWord()) {
$search_terms = Utils::uniqueWords($search_object->getSearchTerms());
} else {
$search = [$search_object->getSearchTerms()];
$search_terms = [$search_object->getSearchTerms()];
}

$current_repo = $search_object->getRepository();
Expand All @@ -284,7 +284,7 @@ public static function resultsTable($search_object, $search_results, $page)
if ($search_object->getSearchType() == 'strings') {
$result_entity = self::formatEntity($key);
} else {
$result_entity = self::formatEntity($key, $search[0]);
$result_entity = self::formatEntity($key, $search_terms[0]);
}

$component = explode('/', $key)[0];
Expand All @@ -296,7 +296,7 @@ public static function resultsTable($search_object, $search_results, $page)
. "&locale={$locale2}"
. "&repo={$current_repo}"
. "&search_type=entities&recherche={$key}"
. "&perfect_match=perfect_match";
. "&entire_string=entire_string";

$bz_link = [Bugzilla::reportErrorLink(
$locale2, $key, $source_string, $target_string, $current_repo, $entity_link
Expand All @@ -308,7 +308,7 @@ public static function resultsTable($search_object, $search_results, $page)
. "&locale={$search_object->getLocale('extra')}"
. "&repo={$current_repo}"
. "&search_type=entities&recherche={$key}"
. "&perfect_match=perfect_match";
. "&entire_string=entire_string";
$bz_link[] = Bugzilla::reportErrorLink(
$search_object->getLocale('extra'), $key, $source_string, $target_string2, $current_repo, $entity_link
);
Expand Down Expand Up @@ -338,14 +338,14 @@ public static function resultsTable($search_object, $search_results, $page)
$transliterate_string_id = 'transliterate_' . $string_id;
}

foreach ($search as $val) {
$source_string = Strings::markString($val, $source_string);
$target_string = Strings::markString($val, $target_string);
foreach ($search_terms as $search_term) {
$source_string = Strings::markString($search_term, $source_string);
$target_string = Strings::markString($search_term, $target_string);
if ($extra_locale) {
$target_string2 = Strings::markString($val, $target_string2);
$target_string2 = Strings::markString($search_term, $target_string2);
}
if ($transliterate) {
$transliterated_string = Strings::markString($val, $transliterated_string);
$transliterated_string = Strings::markString($search_term, $transliterated_string);
}
}

Expand Down Expand Up @@ -531,7 +531,7 @@ public static function searchEntities($source_strings, $regex)

/*
If there are no results, search also through the entity names.
This is needed for "perfect match" when only the entity name is
This is needed for "entire string" when only the entity name is
provided.
*/
if (empty($entities)) {
Expand Down
16 changes: 12 additions & 4 deletions app/classes/Transvision/Utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ public static function printSimpleTable(
}

/**
* Split a sentence in words from longest to shortest
* Split a sentence in words from longest to shortest, ignoring
* words shorter than 2 characters.
*
* @param string $sentence
* @return array all the words in the sentence sorted by length
Expand All @@ -133,7 +134,14 @@ public static function uniqueWords($sentence)
{
$words = explode(' ', $sentence);
$words = array_filter($words); // Filter out extra spaces
$words = array_unique($words); // Remove duplicate words
// Filter out 1-character words
$words = array_filter($words, function ($a) {
return (mb_strlen($a) >= 2);
});

// Remove duplicate words
$words = array_unique($words);

// Sort words from longest to shortest
usort(
$words,
Expand Down Expand Up @@ -444,8 +452,8 @@ public static function APIPromotion($source_locale, $target_locale)
array_map($sanitize, array_values($args))
);

$args['locale'] = $source_locale;
$args['sourcelocale'] = $target_locale;
$args['locale'] = $target_locale;
$args['sourcelocale'] = $source_locale;
$args['json'] = 'true';

// We don't want to encode slashes in searches for entity names
Expand Down
7 changes: 4 additions & 3 deletions app/controllers/mainsearch.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@
: '';

$regex = [];
$regex['whole'] = isset($_GET['whole_word']) ? 'whole_word=1' : '';
$regex['case'] = isset($_GET['case_sensitive']) ? 'case_sensitive=1' : '';
$regex['perfect'] = isset($_GET['perfect_match']) ? 'perfect_match=1' : '';
$regex['each_word'] = isset($_GET['each_word']) ? 'each_word=each_word' : '';
$regex['case_sensitive'] = isset($_GET['case_sensitive']) ? 'case_sensitive=case_sensitive' : '';
$regex['entire_string'] = isset($_GET['entire_string']) ? 'entire_string=entire_string' : '';
$regex['entire_words'] = isset($_GET['entire_words']) ? 'entire_words=entire_words' : '';
$regex = array_filter($regex);
$regex = count($regex) > 0 ? '?' . implode('&', $regex) : '';

Expand Down
5 changes: 3 additions & 2 deletions app/inc/search_options.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
// Define our regex and search parameters
$search
->setSearchTerms($my_search)
->setRegexWholeWords($check['whole_word'])
->setEachWord($check['each_word'])
->setRegexCaseInsensitive($check['case_sensitive'])
->setRegexPerfectMatch($check['perfect_match'])
->setRegexEntireString($check['entire_string'])
->setRegexEntireWords($check['entire_words'])
->setRepository($repo)
->setSearchType($search_type)
->setLocales([$source_locale, $locale, $locale2]);
Expand Down
9 changes: 7 additions & 2 deletions app/models/3locales_search.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,16 @@

$tmx_target2 = Utils::getRepoStrings($locale2, $search->getRepository());

if ($search->isPerfectMatch()) {
if ($search->isEntireString()) {
$locale3_strings = $search->grep($tmx_target2);
} else {
$locale3_strings = $tmx_target2;
foreach (Utils::uniqueWords($search->getSearchTerms()) as $word) {

$search_terms = $search->isEachWord()
? Utils::uniqueWords($search->getSearchTerms())
: [$search->getSearchTerms()];

foreach ($search_terms as $word) {
$search->setRegexSearchTerms($word);
$locale3_strings = $search->grep($locale3_strings);
}
Expand Down
Loading

0 comments on commit e53e117

Please sign in to comment.