Skip to content

Commit 6b677ba

Browse files
pascalchevrelTheoChevalier
authored andcommitted
* improved server side perfs (memory and CPU) (#1)
* * improved server side perfs (memory and CPU) * add some caching to the view as calculations are still time intensive * add text blurb about # of occurrences * Add a column selector at the top to choose locales * nits
1 parent 815c07c commit 6b677ba

File tree

7 files changed

+147
-141
lines changed

7 files changed

+147
-141
lines changed

app/inc/dispatcher.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@
123123
$page_title = 'Commonly Unlocalized Words';
124124
$page_descr = 'Display the list of the most common untranslated words. Click on the table headers to sort results.';
125125
$js_files[] = '/js/sorttable.js';
126+
$js_files[] = '/js/hide_table_rows.js';
126127
break;
127128
case 'unlocalized-json':
128129
$controller = 'unlocalized_words';

app/inc/stop_word_list.php

Lines changed: 0 additions & 2 deletions
This file was deleted.

app/models/unlocalized_words.php

Lines changed: 66 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1,110 +1,88 @@
11
<?php
22
namespace Transvision;
33

4+
use Cache\Cache;
5+
6+
// Filtering out stop words from results.
7+
$stopwords = ['318419', '9999', '8601', '6667', '2000ms', '2000', '1990', '1024', '500', '360', '200', '140', '120', '100', '45em', '30em', '26em', '22em', '6ch', '005', '128px', 'adobe', 'android', 'ansi', 'ascii', 'aurora', 'doctype', 'e10s', 'ftp', 'gecko', 'gif', 'https', 'jpg', 'nntp', 'rgb', 'txt', 'unicode', 'usascii', 'vcard', 'wwwexamplecom', 'b-163', 'k-163', 'nist', 'secg', 'sect113r1', 'sect113r2', 'sect131r1', 'sect131r2', 'sect163k1', 'sect163r1', 'sect163r2', 'sect193r1', 'sect193r2', 'secp112r1', 'secp112r2', 'secp128r1', 'secp128r2', 'secp160k1', 'secp160r1', 'secp160r2', 'secp192k1', 'secp224k1', 'secp224r1', 'secp256k1', 'secp384r1', 'secp521r1','javascript', 'prime256v1', 'c2tnb191v2', 'sect239k1', 'c2onb239v4', 'c2onb191v5', 'c2pnb163v2', 'c2tnb191v1', 'c2pnb163v3', 'c2pnb208w1', 'c2tnb431r1', 'c2tnb239v1', 'c2tnb239v2', 'c2tnb239v3', 'sect409r1', 'c2tnb359v1', 'c2tnb191v3', 'c2pnb272w1', 'c2onb191v4', 'c2pnb368w1', 'c2onb239v5', 'c2pnb163v1', 'c2pnb176v1', 'sect233k1', 'sect409k1', 'c2pnb304w1', 'iii', 'sect233r1', 'sect283r1', 'sect283k1', 'sect571r1', 'sect571k1', 'iframe', 'enctype', 'charset', 'chrome', 'pprint', 'mozcmd', 'prime239v3', 'prime239v1', 'prime192v2', 'prime239v2', 'prime192v3', 'prime192v1', 'srcdir', 'newsrc',
8+
];
9+
410
// Build arrays for the search form.
511
$channel_selector = Utils::getHtmlSelectOptions(
612
$repos_nice_names,
713
$repo,
814
true
915
);
10-
$target_locales_list = Utils::getHtmlSelectOptions(
11-
Project::getRepositoryLocales($repo),
12-
$locale
13-
);
14-
15-
$ref_locale = Project::getReferenceLocale($repo);
16-
$unlocalized_words = [];
17-
$skip_pspell = true;
18-
19-
/*
20-
pspell helps getting rid of false positive results by keeping only valid
21-
English words. The downside is that it’s filtering out 'jargon' words that
22-
can be used in devtools or Mozilla-specific words.
23-
*/
24-
if (extension_loaded('pspell')) {
25-
$pspell_link = \pspell_new('en_US', '', '', '', PSPELL_FAST);
26-
$skip_pspell = false;
27-
} else {
28-
$logger->error('Please install libpspell-dev, php5-pspell and aspell-en ' .
29-
'packages and make sure pspell module is enabled in PHP config.');
30-
}
3116

3217
// Load reference strings.
33-
$strings_reference = array_map('strtolower', Utils::getRepoStrings(
34-
$ref_locale,
35-
$repo
36-
));
37-
38-
$all_locales = array_diff($all_locales, ['en-US', 'en-ZA', 'en-GB', 'ltg']);
39-
40-
41-
/*
42-
Go through all strings in $strings_reference, extract valid English words
43-
then check if any of them is present in the localized string from
44-
$strings_locale.
45-
*/
46-
foreach ($strings_reference as $string_ref_id => $ref_string) {
47-
48-
/*
49-
Remove punctuation characters from the strings then explode them into
50-
words.
51-
*/
52-
$ref_words = strip_tags($ref_string);
53-
$ref_words = explode(
54-
' ',
55-
preg_replace('/\p{P}/u', '', $ref_words)
56-
);
57-
58-
$english_words = [];
59-
60-
/*
61-
Only keep valid English words with more than 1 character in the current
62-
string.
63-
*/
64-
foreach ($ref_words as $word) {
65-
if (strlen($word) > 1 && ! in_array($word, $english_words)) {
66-
// Skip pspell when extension is not loaded
67-
if ($skip_pspell) {
68-
$english_words[] = $word;
69-
continue;
70-
}
71-
72-
if (pspell_check($pspell_link, $word)) {
73-
$english_words[] = $word;
74-
}
75-
}
18+
$ref_locale = Project::getReferenceLocale($repo);
19+
$strings_reference = Utils::getRepoStrings($ref_locale, $repo);
20+
21+
function filter_strings($locale, $repo, $strings_reference)
22+
{
23+
$strings = Utils::getRepoStrings($locale, $repo);
24+
foreach ($strings as $k => &$n) {
25+
if (! isset($strings_reference[$k])) {
26+
unset($strings[$k]);
27+
continue;
7628
}
7729

78-
foreach ($all_locales as $locale) {
30+
if ($strings[$k] == $strings_reference[$k]) {
31+
unset($strings[$k]);
32+
continue;
33+
}
7934

80-
// Load locale strings.
81-
$strings_locale = array_map('strtolower', Utils::getRepoStrings($locale, $repo));
35+
$n = strip_tags($n);
36+
$n = strtolower($n);
37+
$n = preg_replace('/\p{P}/u', '', $n);
38+
$n = trim($n);
8239

83-
/*
84-
If the string is missing in the locale or has been copy pasted from
85-
source (e.g. not translated), skip it.
86-
*/
87-
if (! isset($strings_locale[$string_ref_id])) {
40+
if (is_null($n)) {
41+
unset($strings[$k]);
8842
continue;
8943
}
9044

91-
if ($ref_string == $strings_locale[$string_ref_id] && $locale != $ref_locale) {
92-
continue;
45+
if (mb_strlen($n) < 2) {
46+
unset($strings[$k]);
9347
}
48+
}
49+
50+
return $strings;
51+
}
9452

53+
$all_locales = array_diff($all_locales, ['en-US', 'en-ZA', 'en-GB', 'ja-JP-mac', 'ltg']);
9554

96-
$locale_words = strip_tags($strings_locale[$string_ref_id]);
97-
$locale_words = explode(
98-
' ',
99-
preg_replace('/\p{P}/u', '', $locale_words)
100-
);
55+
$cache_id = $repo . $page . 'unlocalized_words';
56+
57+
if (! $unlocalized_words = Cache::getKey($cache_id)) {
58+
$unlocalized_words = [];
59+
foreach ($all_locales as $locale) {
60+
// Load locale strings.
61+
$cache_id2 = $repo . $page . $locale . 'unlocalized_words';
62+
if (! $strings = Cache::getKey($cache_id2)) {
63+
$strings = filter_strings($locale, $repo, $strings_reference);
64+
Cache::setKey($cache_id2, $strings);
65+
}
66+
67+
foreach ($strings as $id => $locale_words) {
68+
/*
69+
Check if there is any English word in the current translated string and
70+
count matches.
71+
*/
72+
$suspicious_words = array_intersect(
73+
explode(' ', $locale_words),
74+
explode(' ', $strings_reference[$id])
75+
);
76+
77+
foreach ($suspicious_words as $word) {
78+
if (mb_strlen($word) <= 2) {
79+
continue;
80+
}
81+
82+
if (in_array($word, $stopwords)) {
83+
continue;
84+
}
10185

102-
/*
103-
Check if there is any English word in the current translated string and
104-
count matches.
105-
*/
106-
foreach ($locale_words as $word) {
107-
if (in_array($word, $english_words)) {
10886
if (! isset($unlocalized_words[$word][$locale])) {
10987
$unlocalized_words[$word][$locale] = 1;
11088
} else {
@@ -113,18 +91,8 @@
11391
}
11492
}
11593
}
94+
Cache::setKey($cache_id, $unlocalized_words);
11695
}
117-
Utils::logScriptPerformances();
118-
unset($strings_reference);
119-
unset($strings_locale);
12096

121-
// Filtering out stop words from results at the end for performance reasons.
122-
include INC . 'stop_word_list.php';
123-
124-
foreach ($unlocalized_words as $word => $v) {
125-
if (in_array($word, $stopwords)) {
126-
unset($unlocalized_words[$word]);
127-
}
128-
}
129-
unset($stopwords);
130-
asort($unlocalized_words);
97+
unset($strings_reference, $strings, $stopwords);
98+
arsort($unlocalized_words);

app/views/unlocalized_words.php

Lines changed: 42 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,48 @@
11
<?php
22
namespace Transvision;
33

4+
?>
5+
<p>You might be interested in high values to validate your translation choices and in low values to check for potential mistakes.</p>
6+
<?php
47
// Include the common simple search form
58
include __DIR__ . '/simplesearchform.php';
6-
7-
$search_id = 'unlocalized_strings';
8-
9-
$content = "<table class='collapsable results_table sortable {$search_id}'>
10-
<thead>
11-
<tr class='column_headers'>
12-
<th>English</th>";
13-
14-
foreach ($all_locales as $locale) {
15-
$content .= "<th>{$locale}</th>";
16-
}
17-
18-
$content .= "</tr>
19-
</thead>
20-
<tbody>\n";
21-
22-
foreach ($unlocalized_words as $english_term => $locales) {
23-
24-
$content .= " <tr class='{$search_id}'>\n" .
25-
" <td>{$english_term}</td>\n";
26-
27-
foreach ($all_locales as $locale) {
28-
$count = 0;
29-
if (in_array($locale, array_keys($locales))) {
30-
$count = $locales[$locale];
9+
?>
10+
<p>Click on each checkbox below to show/hide the corresponding column.</p>
11+
<fieldset id="grpChkBox">
12+
<legend>Locales</legend>
13+
<?php foreach ($all_locales as $locale) : ?>
14+
<label><input type="checkbox" name="<?=$locale?>" /> <?=$locale?></label>
15+
<?php endforeach ?>
16+
</fieldset>
17+
<table class="collapsable results_table sortable" id="words">
18+
<thead>
19+
<tr class="column_headers">
20+
<th>Word</th>
21+
<?php foreach ($all_locales as $locale) : ?>
22+
<th class="<?=$locale?> hide"><?=$locale?></th>
23+
<?php endforeach ?>
24+
</tr>
25+
</thead>
26+
<tbody>
27+
<?php foreach ($unlocalized_words as $english_term => $locales) : ?>
28+
<tr><td><?=$english_term?></td><?php
29+
foreach ($all_locales as $locale) {
30+
$count = 0;
31+
if (in_array($locale, array_keys($locales))) {
32+
$count = $locales[$locale];
33+
}
34+
35+
$link = "/?recherche={$english_term}&repo={$repo}&sourcelocale={$locale}" .
36+
"&locale={$ref_locale}&search_type=strings&whole_word=whole_word";
37+
38+
if ($count > 0) {
39+
print "<td><a href='{$link}'>{$count}</a></td>";
40+
} else {
41+
print "<td></td>";
42+
}
3143
}
32-
33-
$link = "/?recherche={$english_term}&repo={$repo}&sourcelocale={$locale}" .
34-
"&locale={$ref_locale}&search_type=strings&whole_word=whole_word";
35-
36-
$link_title = $count == 1
37-
? 'Search for this occurrence'
38-
: 'Search for these occurrences';
39-
40-
$content .= " <td><a href='{$link}' title='{$link_title}'>{$count}</a></td>\n";
41-
}
42-
$content .= " </tr>\n";
43-
}
44-
$content .= "</tbody>\n</table>\n";
45-
46-
echo $content;
44+
?></tr>
45+
<?php endforeach ?>
46+
</tbody>
47+
</table>
48+
<?php unset($unlocalized_words);?>

tests/functional/pages.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
['stats/', 200, 'Repository status overview', 'Status estimate'],
1616
['string/?entity=browser/chrome/browser/places/places.properties:bookmarkResultLabel&repo=central', 200, 'supportedLocales', 'Marque-page'],
1717
['unchanged/', 200, 'Display a list of strings identical', 'Locale'],
18-
['unlocalized/', 200, 'Display the list of the most common untranslated words', 'Occurrences'],
18+
['unlocalized/', 200, 'Display the list of the most common untranslated words', 'Word'],
1919
['variables/', 200, 'Show potential errors related to', 'no errors found'],
2020
['foo/', 400, '404: Page Not Found', 'You can use the menu at the top'],
2121
['123/', 400, '404: Page Not Found', 'You can use the menu at the top'],

web/js/hide_table_rows.js

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
$(document).ready(function() {
2+
var $chk = $('#grpChkBox input:checkbox');
3+
var $tbl = $('#words');
4+
var $tblhead = $('#words th');
5+
6+
$chk.prop('checked', false);
7+
8+
$chk.click(function() {
9+
var colToHide = $tblhead.filter('.' + $(this).attr('name'));
10+
var index = $(colToHide).index();
11+
if (colToHide.css('display') === 'none') {
12+
$tbl.find('tr :nth-child(' + (index + 1) + ')').css('display', 'table-cell');
13+
} else {
14+
$tbl.find('tr :nth-child(' + (index + 1) + ')').css('display', 'none');
15+
}
16+
});
17+
});

web/style/transvision.css

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,3 +1164,23 @@ fieldset {
11641164
left: 60%;
11651165
z-index: 99;
11661166
}
1167+
1168+
/* Unlocalized words view */
1169+
#unlocalized fieldset#grpChkBox {
1170+
border: 1px solid #000;
1171+
background-color: rgba(255, 255, 255, 0.5);
1172+
}
1173+
1174+
#unlocalized #pagecontent p {
1175+
text-align: center;
1176+
}
1177+
1178+
#unlocalized label {
1179+
display: inline-block;
1180+
width: 5em;
1181+
}
1182+
1183+
#unlocalized #words th.hide,
1184+
#unlocalized #words tr :nth-child(1) ~ td {
1185+
display: none;
1186+
}

0 commit comments

Comments
 (0)