Skip to content
Permalink
Browse files

* improved server side perfs (memory and CPU) (#1)

* * improved server side perfs (memory and CPU)
* add some caching to the view as calculations are still time intensive
* add text blurb about # of occurrences
* Add a column selector at the top to choose locales

* nits
  • Loading branch information
pascalchevrel authored and TheoChevalier committed Apr 11, 2016
1 parent 815c07c commit 6b677ba926cfd1a0ff727e7424c48eb5aebdaa3c
@@ -123,6 +123,7 @@
$page_title = 'Commonly Unlocalized Words';
$page_descr = 'Display the list of the most common untranslated words. Click on the table headers to sort results.';
$js_files[] = '/js/sorttable.js';
$js_files[] = '/js/hide_table_rows.js';
break;
case 'unlocalized-json':
$controller = 'unlocalized_words';

This file was deleted.

@@ -1,110 +1,88 @@
<?php
namespace Transvision;

use Cache\Cache;

// Filtering out stop words from results.
$stopwords = ['318419', '9999', '8601', '6667', '2000ms', '2000', '1990', '1024', '500', '360', '200', '140', '120', '100', '45em', '30em', '26em', '22em', '6ch', '005', '128px', 'adobe', 'android', 'ansi', 'ascii', 'aurora', 'doctype', 'e10s', 'ftp', 'gecko', 'gif', 'https', 'jpg', 'nntp', 'rgb', 'txt', 'unicode', 'usascii', 'vcard', 'wwwexamplecom', 'b-163', 'k-163', 'nist', 'secg', 'sect113r1', 'sect113r2', 'sect131r1', 'sect131r2', 'sect163k1', 'sect163r1', 'sect163r2', 'sect193r1', 'sect193r2', 'secp112r1', 'secp112r2', 'secp128r1', 'secp128r2', 'secp160k1', 'secp160r1', 'secp160r2', 'secp192k1', 'secp224k1', 'secp224r1', 'secp256k1', 'secp384r1', 'secp521r1','javascript', 'prime256v1', 'c2tnb191v2', 'sect239k1', 'c2onb239v4', 'c2onb191v5', 'c2pnb163v2', 'c2tnb191v1', 'c2pnb163v3', 'c2pnb208w1', 'c2tnb431r1', 'c2tnb239v1', 'c2tnb239v2', 'c2tnb239v3', 'sect409r1', 'c2tnb359v1', 'c2tnb191v3', 'c2pnb272w1', 'c2onb191v4', 'c2pnb368w1', 'c2onb239v5', 'c2pnb163v1', 'c2pnb176v1', 'sect233k1', 'sect409k1', 'c2pnb304w1', 'iii', 'sect233r1', 'sect283r1', 'sect283k1', 'sect571r1', 'sect571k1', 'iframe', 'enctype', 'charset', 'chrome', 'pprint', 'mozcmd', 'prime239v3', 'prime239v1', 'prime192v2', 'prime239v2', 'prime192v3', 'prime192v1', 'srcdir', 'newsrc',
];

// Build arrays for the search form.
$channel_selector = Utils::getHtmlSelectOptions(
$repos_nice_names,
$repo,
true
);
$target_locales_list = Utils::getHtmlSelectOptions(
Project::getRepositoryLocales($repo),
$locale
);

$ref_locale = Project::getReferenceLocale($repo);
$unlocalized_words = [];
$skip_pspell = true;

/*
pspell helps getting rid of false positive results by keeping only valid
English words. The downside is that it’s filtering out 'jargon' words that
can be used in devtools or Mozilla-specific words.
*/
if (extension_loaded('pspell')) {
$pspell_link = \pspell_new('en_US', '', '', '', PSPELL_FAST);
$skip_pspell = false;
} else {
$logger->error('Please install libpspell-dev, php5-pspell and aspell-en ' .
'packages and make sure pspell module is enabled in PHP config.');
}

// Load reference strings.
$strings_reference = array_map('strtolower', Utils::getRepoStrings(
$ref_locale,
$repo
));

$all_locales = array_diff($all_locales, ['en-US', 'en-ZA', 'en-GB', 'ltg']);


/*
Go through all strings in $strings_reference, extract valid English words
then check if any of them is present in the localized string from
$strings_locale.
*/
foreach ($strings_reference as $string_ref_id => $ref_string) {

/*
Remove punctuation characters from the strings then explode them into
words.
*/
$ref_words = strip_tags($ref_string);
$ref_words = explode(
' ',
preg_replace('/\p{P}/u', '', $ref_words)
);

$english_words = [];

/*
Only keep valid English words with more than 1 character in the current
string.
*/
foreach ($ref_words as $word) {
if (strlen($word) > 1 && ! in_array($word, $english_words)) {
// Skip pspell when extension is not loaded
if ($skip_pspell) {
$english_words[] = $word;
continue;
}

if (pspell_check($pspell_link, $word)) {
$english_words[] = $word;
}
}
$ref_locale = Project::getReferenceLocale($repo);
$strings_reference = Utils::getRepoStrings($ref_locale, $repo);

function filter_strings($locale, $repo, $strings_reference)
{
$strings = Utils::getRepoStrings($locale, $repo);
foreach ($strings as $k => &$n) {
if (! isset($strings_reference[$k])) {
unset($strings[$k]);
continue;
}

foreach ($all_locales as $locale) {
if ($strings[$k] == $strings_reference[$k]) {
unset($strings[$k]);
continue;
}

// Load locale strings.
$strings_locale = array_map('strtolower', Utils::getRepoStrings($locale, $repo));
$n = strip_tags($n);
$n = strtolower($n);
$n = preg_replace('/\p{P}/u', '', $n);
$n = trim($n);

/*
If the string is missing in the locale or has been copy pasted from
source (e.g. not translated), skip it.
*/
if (! isset($strings_locale[$string_ref_id])) {
if (is_null($n)) {
unset($strings[$k]);
continue;
}

if ($ref_string == $strings_locale[$string_ref_id] && $locale != $ref_locale) {
continue;
if (mb_strlen($n) < 2) {
unset($strings[$k]);
}
}

return $strings;
}

$all_locales = array_diff($all_locales, ['en-US', 'en-ZA', 'en-GB', 'ja-JP-mac', 'ltg']);

$locale_words = strip_tags($strings_locale[$string_ref_id]);
$locale_words = explode(
' ',
preg_replace('/\p{P}/u', '', $locale_words)
);
$cache_id = $repo . $page . 'unlocalized_words';

if (! $unlocalized_words = Cache::getKey($cache_id)) {
$unlocalized_words = [];
foreach ($all_locales as $locale) {
// Load locale strings.
$cache_id2 = $repo . $page . $locale . 'unlocalized_words';
if (! $strings = Cache::getKey($cache_id2)) {
$strings = filter_strings($locale, $repo, $strings_reference);
Cache::setKey($cache_id2, $strings);
}

foreach ($strings as $id => $locale_words) {
/*
Check if there is any English word in the current translated string and
count matches.
*/
$suspicious_words = array_intersect(
explode(' ', $locale_words),
explode(' ', $strings_reference[$id])
);

foreach ($suspicious_words as $word) {
if (mb_strlen($word) <= 2) {
continue;
}

if (in_array($word, $stopwords)) {
continue;
}

/*
Check if there is any English word in the current translated string and
count matches.
*/
foreach ($locale_words as $word) {
if (in_array($word, $english_words)) {
if (! isset($unlocalized_words[$word][$locale])) {
$unlocalized_words[$word][$locale] = 1;
} else {
@@ -113,18 +91,8 @@
}
}
}
Cache::setKey($cache_id, $unlocalized_words);
}
Utils::logScriptPerformances();
unset($strings_reference);
unset($strings_locale);

// Filtering out stop words from results at the end for performance reasons.
include INC . 'stop_word_list.php';

foreach ($unlocalized_words as $word => $v) {
if (in_array($word, $stopwords)) {
unset($unlocalized_words[$word]);
}
}
unset($stopwords);
asort($unlocalized_words);
unset($strings_reference, $strings, $stopwords);
arsort($unlocalized_words);
@@ -1,46 +1,48 @@
<?php
namespace Transvision;

?>
<p>You might be interested in high values to validate your translation choices and in low values to check for potential mistakes.</p>
<?php
// Include the common simple search form
include __DIR__ . '/simplesearchform.php';

$search_id = 'unlocalized_strings';

$content = "<table class='collapsable results_table sortable {$search_id}'>
<thead>
<tr class='column_headers'>
<th>English</th>";

foreach ($all_locales as $locale) {
$content .= "<th>{$locale}</th>";
}

$content .= "</tr>
</thead>
<tbody>\n";

foreach ($unlocalized_words as $english_term => $locales) {

$content .= " <tr class='{$search_id}'>\n" .
" <td>{$english_term}</td>\n";

foreach ($all_locales as $locale) {
$count = 0;
if (in_array($locale, array_keys($locales))) {
$count = $locales[$locale];
?>
<p>Click on each checkbox below to show/hide the corresponding column.</p>
<fieldset id="grpChkBox">
<legend>Locales</legend>
<?php foreach ($all_locales as $locale) : ?>
<label><input type="checkbox" name="<?=$locale?>" /> <?=$locale?></label>
<?php endforeach ?>
</fieldset>
<table class="collapsable results_table sortable" id="words">
<thead>
<tr class="column_headers">
<th>Word</th>
<?php foreach ($all_locales as $locale) : ?>
<th class="<?=$locale?> hide"><?=$locale?></th>
<?php endforeach ?>
</tr>
</thead>
<tbody>
<?php foreach ($unlocalized_words as $english_term => $locales) : ?>
<tr><td><?=$english_term?></td><?php
foreach ($all_locales as $locale) {
$count = 0;
if (in_array($locale, array_keys($locales))) {
$count = $locales[$locale];
}

$link = "/?recherche={$english_term}&repo={$repo}&sourcelocale={$locale}" .
"&locale={$ref_locale}&search_type=strings&whole_word=whole_word";

if ($count > 0) {
print "<td><a href='{$link}'>{$count}</a></td>";
} else {
print "<td></td>";
}
}

$link = "/?recherche={$english_term}&repo={$repo}&sourcelocale={$locale}" .
"&locale={$ref_locale}&search_type=strings&whole_word=whole_word";

$link_title = $count == 1
? 'Search for this occurrence'
: 'Search for these occurrences';

$content .= " <td><a href='{$link}' title='{$link_title}'>{$count}</a></td>\n";
}
$content .= " </tr>\n";
}
$content .= "</tbody>\n</table>\n";

echo $content;
?></tr>
<?php endforeach ?>
</tbody>
</table>
<?php unset($unlocalized_words);?>
@@ -15,7 +15,7 @@
['stats/', 200, 'Repository status overview', 'Status estimate'],
['string/?entity=browser/chrome/browser/places/places.properties:bookmarkResultLabel&repo=central', 200, 'supportedLocales', 'Marque-page'],
['unchanged/', 200, 'Display a list of strings identical', 'Locale'],
['unlocalized/', 200, 'Display the list of the most common untranslated words', 'Occurrences'],
['unlocalized/', 200, 'Display the list of the most common untranslated words', 'Word'],
['variables/', 200, 'Show potential errors related to', 'no errors found'],
['foo/', 400, '404: Page Not Found', 'You can use the menu at the top'],
['123/', 400, '404: Page Not Found', 'You can use the menu at the top'],
@@ -0,0 +1,17 @@
$(document).ready(function() {
var $chk = $('#grpChkBox input:checkbox');
var $tbl = $('#words');
var $tblhead = $('#words th');

$chk.prop('checked', false);

$chk.click(function() {
var colToHide = $tblhead.filter('.' + $(this).attr('name'));
var index = $(colToHide).index();
if (colToHide.css('display') === 'none') {
$tbl.find('tr :nth-child(' + (index + 1) + ')').css('display', 'table-cell');
} else {
$tbl.find('tr :nth-child(' + (index + 1) + ')').css('display', 'none');
}
});
});
@@ -1164,3 +1164,23 @@ fieldset {
left: 60%;
z-index: 99;
}

/* Unlocalized words view */
#unlocalized fieldset#grpChkBox {
border: 1px solid #000;
background-color: rgba(255, 255, 255, 0.5);
}

#unlocalized #pagecontent p {
text-align: center;
}

#unlocalized label {
display: inline-block;
width: 5em;
}

#unlocalized #words th.hide,
#unlocalized #words tr :nth-child(1) ~ td {
display: none;
}

0 comments on commit 6b677ba

Please sign in to comment.
You can’t perform that action at this time.