Skip to content

Commit e53e117

Browse files
authored
Rework advanced search options, and default search behavior (#833)
* Stop splitting words in a sentence by default when searching * Add "Each Word" option to search all word in a sentence (old default behavior) * Rename "Perfect march" to "Entire string", "Whole Words" to "Entire Words" * Reword and reverse API promotion links * Ignore words shorter than 2 characters when searching for each word * Added tooltips to search options
1 parent 6876a6d commit e53e117

27 files changed

+353
-188
lines changed

app/classes/Transvision/API.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* Calls are like this:
1616
* api/<version>/<service>/<repository>/<search type>/<source locale>/<target locale>/<url escaped search>/?optional_parameter1=foo&optional_parameter2=bar
1717
* Example for an entity search containing bookmark:
18-
* https://transvision.mozfr.org/api/v1/tm/release/entity/en-US/fr/bookmark/?case_sensitive=1
18+
* https://transvision.mozfr.org/api/v1/tm/release/entity/en-US/fr/bookmark/?case_sensitive=case_sensitive
1919
* (tm = translation memory service)
2020
*
2121
* Example for the list of locales supported for a repo:
@@ -245,7 +245,7 @@ private function isValidServiceCall($service)
245245

246246
break;
247247
case 'search':
248-
// ex: /api/v1/search/string/central/en-US/fr/Bookmark/?case_sensitive=1
248+
// ex: /api/v1/search/string/central/en-US/fr/Bookmark/?case_sensitive=case_sensitive
249249
if (! $this->verifyEnoughParameters(7)) {
250250
return false;
251251
}

app/classes/Transvision/Search.php

Lines changed: 76 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
* e.g.:
1111
* $search = (new Search)
1212
* ->setSearchTerms('Bookmark this page')
13-
* ->setRegexWholeWords(true)
1413
* ->setRegexCaseInsensitive(true)
15-
* ->setRegexPerfectMatch(false)
14+
* ->setRegexEntireString(false)
15+
* ->setEachWord(false)
16+
* ->setEntireWords(false)
1617
* ->setRepository('release')
1718
* ->setSearchType('strings')
1819
* ->setLocales(['en-US', 'fr']);
@@ -38,16 +39,22 @@ class Search
3839
protected $regex_case;
3940

4041
/**
41-
* Consider the space separated string as a single word for search
42-
* @var string
42+
* Only return strings that entirely match the search (case excluded)
43+
* @var boolean
44+
*/
45+
protected $regex_entire_string;
46+
47+
/**
48+
* Only return strings where entire words match the search (case excluded)
49+
* @var boolean
4350
*/
44-
protected $regex_whole_words;
51+
protected $regex_entire_words;
4552

4653
/**
47-
* Only return strings that match the search perfectly (case excluded)
54+
* Set to search for each word in the query instead of using it as a whole.
4855
* @var boolean
4956
*/
50-
protected $regex_perfect_match;
57+
protected $each_word;
5158

5259
/**
5360
* The search terms for the regex, these differ from $search_terms as
@@ -79,7 +86,8 @@ class Search
7986
* @var array
8087
*/
8188
protected $form_search_options = [
82-
'case_sensitive', 'perfect_match', 'repo', 'search_type', 't2t', 'whole_word',
89+
'case_sensitive', 'entire_string', 'repo',
90+
'search_type', 't2t', 'each_word', 'entire_words',
8391
];
8492

8593
/**
@@ -102,8 +110,9 @@ public function __construct()
102110
$this->search_terms = '';
103111
$this->regex = '';
104112
$this->regex_case = 'i';
105-
$this->regex_whole_words = '';
106-
$this->regex_perfect_match = false;
113+
$this->regex_entire_string = false;
114+
$this->regex_entire_words = false;
115+
$this->each_word = false;
107116
$this->regex_search_terms = '';
108117
$this->repository = 'aurora'; // Most locales work on Aurora
109118
$this->search_type = 'strings';
@@ -161,35 +170,51 @@ public function setRegexCaseInsensitive($flag)
161170
}
162171

163172
/**
164-
* Set the regex to only return perfect matches for the searched string.
173+
* Set the regex to only return strings that entirely match the
174+
* searched string.
165175
* We cast the value to a boolean because we usually get it from a GET.
166176
*
167-
* @param boolean $flag Set to True for a perfect match
177+
* @param boolean $flag Set to True for an entire string match
168178
* @return $this
169179
*/
170-
public function setRegexPerfectMatch($flag)
180+
public function setRegexEntireString($flag)
171181
{
172-
$this->regex_perfect_match = (boolean) $flag;
182+
$this->regex_entire_string = (boolean) $flag;
173183
$this->updateRegex();
174184

175185
return $this;
176186
}
177187

178188
/**
179-
* Set the regex so as that a multi-word search is taken as a single word.
189+
* Set the regex to only return strings where entire words match
190+
* the searched string.
180191
* We cast the value to a boolean because we usually get it from a GET.
181192
*
182-
* @param boolean $flag A string evaluated to True will add \b to the regex
193+
* @param boolean $flag Set to True for an entire words match
183194
* @return $this
184195
*/
185-
public function setRegexWholeWords($flag)
196+
public function setRegexEntireWords($flag)
186197
{
187-
$this->regex_whole_words = (boolean) $flag ? '\b' : '';
198+
$this->regex_entire_words = (boolean) $flag ? '\b' : '';
188199
$this->updateRegex();
189200

190201
return $this;
191202
}
192203

204+
/**
205+
* Set to search for each word in the query instead of using it as a whole.
206+
* We cast the value to a boolean because we usually get it from a GET.
207+
*
208+
* @param boolean $flag Set to True to search for each word.
209+
* @return $this
210+
*/
211+
public function setEachWord($flag)
212+
{
213+
$this->each_word = (boolean) $flag;
214+
215+
return $this;
216+
}
217+
193218
/**
194219
* Update the $regex_search_terms value every time a setter to the regex
195220
* is called.
@@ -199,15 +224,15 @@ public function setRegexWholeWords($flag)
199224
private function updateRegex()
200225
{
201226
$search = preg_quote($this->regex_search_terms);
202-
if ($this->regex_perfect_match) {
227+
if ($this->regex_entire_string) {
203228
$search = "^{$search}$";
204229
}
205230

206231
$this->regex =
207232
'~'
208-
. $this->regex_whole_words
233+
. $this->regex_entire_words
209234
. $search
210-
. $this->regex_whole_words
235+
. $this->regex_entire_words
211236
. '~'
212237
. $this->regex_case
213238
. 'u';
@@ -226,53 +251,63 @@ public function getRegex()
226251
}
227252

228253
/**
229-
* Get the state of regex_perfect_match
254+
* Get the state of regex_entire_string
230255
*
231-
* @return boolean True if the regex searches for a perfect string match
256+
* @return boolean True if the regex searches for an entire string match
232257
*/
233-
public function isPerfectMatch()
258+
public function isEntireString()
234259
{
235-
return $this->regex_perfect_match;
260+
return $this->regex_entire_string;
236261
}
237262

238263
/**
239-
* Get search terms
264+
* Get the state of each_word
240265
*
241-
* @return string Searched terms
266+
* @return boolean True if the search should be for each word.
242267
*/
243-
public function getSearchTerms()
268+
public function isEachWord()
244269
{
245-
return $this->search_terms;
270+
return $this->each_word;
246271
}
247272

248273
/**
249-
* Get search terms in regex
274+
* Get the state of entire_words
250275
*
251-
* @return string Searched terms in regex
276+
* @return boolean True if the search should be only for entire word.
252277
*/
253-
public function getRegexSearchTerms()
278+
public function isEntireWords()
254279
{
255-
return $this->regex_search_terms;
280+
return $this->regex_entire_words == '\b' ? true : false;
256281
}
257282

258283
/**
259-
* Get the regex case
284+
* Get the state of case_sensitive
260285
*
261-
* @return string Return 'i' for case insensitive search, '' for sensitive
286+
* @return boolean False if the search should be case sensitive
262287
*/
263-
public function getRegexCase()
288+
public function isCaseSensitive()
264289
{
265-
return $this->regex_case;
290+
return $this->regex_case == 'i' ? false : true;
266291
}
267292

268293
/**
269-
* Get the regex whole words
294+
* Get search terms
270295
*
271-
* @return boolean True if we have the 'whole words' option for the regex
296+
* @return string Searched terms
272297
*/
273-
public function isWholeWords()
298+
public function getSearchTerms()
274299
{
275-
return $this->regex_whole_words;
300+
return $this->search_terms;
301+
}
302+
303+
/**
304+
* Get search terms in regex
305+
*
306+
* @return string Searched terms in regex
307+
*/
308+
public function getRegexSearchTerms()
309+
{
310+
return $this->regex_search_terms;
276311
}
277312

278313
/**

app/classes/Transvision/ShowResults.php

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -270,10 +270,10 @@ public static function resultsTable($search_object, $search_results, $page)
270270
</thead>
271271
<tbody>\n";
272272

273-
if (! $search_object->isWholeWords() && ! $search_object->isPerfectMatch()) {
274-
$search = Utils::uniqueWords($search_object->getSearchTerms());
273+
if ($search_object->isEachWord()) {
274+
$search_terms = Utils::uniqueWords($search_object->getSearchTerms());
275275
} else {
276-
$search = [$search_object->getSearchTerms()];
276+
$search_terms = [$search_object->getSearchTerms()];
277277
}
278278

279279
$current_repo = $search_object->getRepository();
@@ -284,7 +284,7 @@ public static function resultsTable($search_object, $search_results, $page)
284284
if ($search_object->getSearchType() == 'strings') {
285285
$result_entity = self::formatEntity($key);
286286
} else {
287-
$result_entity = self::formatEntity($key, $search[0]);
287+
$result_entity = self::formatEntity($key, $search_terms[0]);
288288
}
289289

290290
$component = explode('/', $key)[0];
@@ -296,7 +296,7 @@ public static function resultsTable($search_object, $search_results, $page)
296296
. "&locale={$locale2}"
297297
. "&repo={$current_repo}"
298298
. "&search_type=entities&recherche={$key}"
299-
. "&perfect_match=perfect_match";
299+
. "&entire_string=entire_string";
300300

301301
$bz_link = [Bugzilla::reportErrorLink(
302302
$locale2, $key, $source_string, $target_string, $current_repo, $entity_link
@@ -308,7 +308,7 @@ public static function resultsTable($search_object, $search_results, $page)
308308
. "&locale={$search_object->getLocale('extra')}"
309309
. "&repo={$current_repo}"
310310
. "&search_type=entities&recherche={$key}"
311-
. "&perfect_match=perfect_match";
311+
. "&entire_string=entire_string";
312312
$bz_link[] = Bugzilla::reportErrorLink(
313313
$search_object->getLocale('extra'), $key, $source_string, $target_string2, $current_repo, $entity_link
314314
);
@@ -338,14 +338,14 @@ public static function resultsTable($search_object, $search_results, $page)
338338
$transliterate_string_id = 'transliterate_' . $string_id;
339339
}
340340

341-
foreach ($search as $val) {
342-
$source_string = Strings::markString($val, $source_string);
343-
$target_string = Strings::markString($val, $target_string);
341+
foreach ($search_terms as $search_term) {
342+
$source_string = Strings::markString($search_term, $source_string);
343+
$target_string = Strings::markString($search_term, $target_string);
344344
if ($extra_locale) {
345-
$target_string2 = Strings::markString($val, $target_string2);
345+
$target_string2 = Strings::markString($search_term, $target_string2);
346346
}
347347
if ($transliterate) {
348-
$transliterated_string = Strings::markString($val, $transliterated_string);
348+
$transliterated_string = Strings::markString($search_term, $transliterated_string);
349349
}
350350
}
351351

@@ -531,7 +531,7 @@ public static function searchEntities($source_strings, $regex)
531531

532532
/*
533533
If there are no results, search also through the entity names.
534-
This is needed for "perfect match" when only the entity name is
534+
This is needed for "entire string" when only the entity name is
535535
provided.
536536
*/
537537
if (empty($entities)) {

app/classes/Transvision/Utils.php

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ public static function printSimpleTable(
124124
}
125125

126126
/**
127-
* Split a sentence in words from longest to shortest
127+
* Split a sentence in words from longest to shortest, ignoring
128+
* words shorter than 2 characters.
128129
*
129130
* @param string $sentence
130131
* @return array all the words in the sentence sorted by length
@@ -133,7 +134,14 @@ public static function uniqueWords($sentence)
133134
{
134135
$words = explode(' ', $sentence);
135136
$words = array_filter($words); // Filter out extra spaces
136-
$words = array_unique($words); // Remove duplicate words
137+
// Filter out 1-character words
138+
$words = array_filter($words, function ($a) {
139+
return (mb_strlen($a) >= 2);
140+
});
141+
142+
// Remove duplicate words
143+
$words = array_unique($words);
144+
137145
// Sort words from longest to shortest
138146
usort(
139147
$words,
@@ -444,8 +452,8 @@ public static function APIPromotion($source_locale, $target_locale)
444452
array_map($sanitize, array_values($args))
445453
);
446454

447-
$args['locale'] = $source_locale;
448-
$args['sourcelocale'] = $target_locale;
455+
$args['locale'] = $target_locale;
456+
$args['sourcelocale'] = $source_locale;
449457
$args['json'] = 'true';
450458

451459
// We don't want to encode slashes in searches for entity names

app/controllers/mainsearch.php

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@
3131
: '';
3232

3333
$regex = [];
34-
$regex['whole'] = isset($_GET['whole_word']) ? 'whole_word=1' : '';
35-
$regex['case'] = isset($_GET['case_sensitive']) ? 'case_sensitive=1' : '';
36-
$regex['perfect'] = isset($_GET['perfect_match']) ? 'perfect_match=1' : '';
34+
$regex['each_word'] = isset($_GET['each_word']) ? 'each_word=each_word' : '';
35+
$regex['case_sensitive'] = isset($_GET['case_sensitive']) ? 'case_sensitive=case_sensitive' : '';
36+
$regex['entire_string'] = isset($_GET['entire_string']) ? 'entire_string=entire_string' : '';
37+
$regex['entire_words'] = isset($_GET['entire_words']) ? 'entire_words=entire_words' : '';
3738
$regex = array_filter($regex);
3839
$regex = count($regex) > 0 ? '?' . implode('&', $regex) : '';
3940

app/inc/search_options.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@
2828
// Define our regex and search parameters
2929
$search
3030
->setSearchTerms($my_search)
31-
->setRegexWholeWords($check['whole_word'])
31+
->setEachWord($check['each_word'])
3232
->setRegexCaseInsensitive($check['case_sensitive'])
33-
->setRegexPerfectMatch($check['perfect_match'])
33+
->setRegexEntireString($check['entire_string'])
34+
->setRegexEntireWords($check['entire_words'])
3435
->setRepository($repo)
3536
->setSearchType($search_type)
3637
->setLocales([$source_locale, $locale, $locale2]);

app/models/3locales_search.php

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,16 @@
33

44
$tmx_target2 = Utils::getRepoStrings($locale2, $search->getRepository());
55

6-
if ($search->isPerfectMatch()) {
6+
if ($search->isEntireString()) {
77
$locale3_strings = $search->grep($tmx_target2);
88
} else {
99
$locale3_strings = $tmx_target2;
10-
foreach (Utils::uniqueWords($search->getSearchTerms()) as $word) {
10+
11+
$search_terms = $search->isEachWord()
12+
? Utils::uniqueWords($search->getSearchTerms())
13+
: [$search->getSearchTerms()];
14+
15+
foreach ($search_terms as $word) {
1116
$search->setRegexSearchTerms($word);
1217
$locale3_strings = $search->grep($locale3_strings);
1318
}

0 commit comments

Comments
 (0)