Permalink
Browse files

* improved server side perfs (memory and CPU) (#1)

* * improved server side perfs (memory and CPU)
* add some caching to the view as calculations are still time intensive
* add text blurb about # of occurrences
* Add a column selector at the top to choose locales

* nits
  • Loading branch information...
pascalchevrel authored and TheoChevalier committed Apr 11, 2016
1 parent 815c07c commit 6b677ba926cfd1a0ff727e7424c48eb5aebdaa3c
View
@@ -123,6 +123,7 @@
$page_title = 'Commonly Unlocalized Words';
$page_descr = 'Display the list of the most common untranslated words. Click on the table headers to sort results.';
$js_files[] = '/js/sorttable.js';
+ $js_files[] = '/js/hide_table_rows.js';
break;
case 'unlocalized-json':
$controller = 'unlocalized_words';
@@ -1,2 +0,0 @@
-<?php
-$stopwords = ['a', 'about', 'above', 'above', 'across', 'after', 'afterwards', 'again', 'against', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'am', 'among', 'amongst', 'amoungst', 'amount', 'an', 'and', 'another', 'any', 'anyhow', 'anyone', 'anything', 'anyway', 'anywhere', 'are', 'around', 'as', 'at', 'back', 'be', 'became', 'because', 'become', 'becomes', 'becoming', 'been', 'before', 'beforehand', 'behind', 'being', 'below', 'beside', 'besides', 'between', 'beyond', 'bill', 'both', 'bottom', 'but', 'by', 'call', 'can', 'cannot', 'cant', 'co', 'con', 'could', 'couldnt', 'cry', 'de', 'describe', 'detail', 'do', 'done', 'down', 'due', 'during', 'each', 'eg', 'eight', 'either', 'eleven', 'else', 'elsewhere', 'empty', 'enough', 'etc', 'even', 'ever', 'every', 'everyone', 'everything', 'everywhere', 'except', 'few', 'fifteen', 'fify', 'fill', 'find', 'fire', 'first', 'five', 'for', 'former', 'formerly', 'forty', 'found', 'four', 'from', 'front', 'full', 'further', 'get', 'give', 'go', 'had', 'has', 'hasnt', 'have', 'he', 'hence', 'her', 'here', 'hereafter', 'hereby', 'herein', 'hereupon', 'hers', 'herself', 'him', 'himself', 'his', 'how', 'however', 'hundred', 'ie', 'if', 'in', 'inc', 'indeed', 'interest', 'into', 'is', 'it', 'its', 'itself', 'keep', 'last', 'latter', 'latterly', 'least', 'less', 'ltd', 'made', 'many', 'may', 'me', 'meanwhile', 'might', 'mill', 'mine', 'more', 'moreover', 'most', 'mostly', 'move', 'much', 'must', 'my', 'myself', 'name', 'namely', 'neither', 'never', 'nevertheless', 'next', 'nine', 'no', 'nobody', 'none', 'noone', 'nor', 'not', 'nothing', 'now', 'nowhere', 'of', 'off', 'often', 'on', 'once', 'one', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'our', 'ours', 'ourselves', 'out', 'over', 'own', 'part', 'per', 'perhaps', 'please', 'put', 'rather', 're', 'same', 'see', 'seem', 'seemed', 'seeming', 'seems', 'serious', 'several', 'she', 'should', 'show', 'side', 'since', 'sincere', 'six', 'sixty', 'so', 'some', 'somehow', 'someone', 'something', 'sometime', 'sometimes', 'somewhere', 'still', 'such', 'system', 'take', 'ten', 'than', 'that', 'the', 'their', 'them', 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', 'therefore', 'therein', 'thereupon', 'these', 'they', 'thickv', 'thin', 'third', 'this', 'those', 'though', 'three', 'through', 'throughout', 'thru', 'thus', 'to', 'together', 'too', 'top', 'toward', 'towards', 'twelve', 'twenty', 'two', 'un', 'under', 'until', 'up', 'upon', 'us', 'very', 'via', 'was', 'we', 'well', 'were', 'what', 'whatever', 'when', 'whence', 'whenever', 'where', 'whereafter', 'whereas', 'whereby', 'wherein', 'whereupon', 'wherever', 'whether', 'which', 'while', 'whither', 'who', 'whoever', 'whole', 'whom', 'whose', 'why', 'will', 'with', 'within', 'without', 'would', 'yet', 'you', 'your', 'yours', 'yourself', 'yourselves', 'the'];
@@ -1,110 +1,88 @@
<?php
namespace Transvision;
+use Cache\Cache;
+
+// Filtering out stop words from results.
+$stopwords = ['318419', '9999', '8601', '6667', '2000ms', '2000', '1990', '1024', '500', '360', '200', '140', '120', '100', '45em', '30em', '26em', '22em', '6ch', '005', '128px', 'adobe', 'android', 'ansi', 'ascii', 'aurora', 'doctype', 'e10s', 'ftp', 'gecko', 'gif', 'https', 'jpg', 'nntp', 'rgb', 'txt', 'unicode', 'usascii', 'vcard', 'wwwexamplecom', 'b-163', 'k-163', 'nist', 'secg', 'sect113r1', 'sect113r2', 'sect131r1', 'sect131r2', 'sect163k1', 'sect163r1', 'sect163r2', 'sect193r1', 'sect193r2', 'secp112r1', 'secp112r2', 'secp128r1', 'secp128r2', 'secp160k1', 'secp160r1', 'secp160r2', 'secp192k1', 'secp224k1', 'secp224r1', 'secp256k1', 'secp384r1', 'secp521r1','javascript', 'prime256v1', 'c2tnb191v2', 'sect239k1', 'c2onb239v4', 'c2onb191v5', 'c2pnb163v2', 'c2tnb191v1', 'c2pnb163v3', 'c2pnb208w1', 'c2tnb431r1', 'c2tnb239v1', 'c2tnb239v2', 'c2tnb239v3', 'sect409r1', 'c2tnb359v1', 'c2tnb191v3', 'c2pnb272w1', 'c2onb191v4', 'c2pnb368w1', 'c2onb239v5', 'c2pnb163v1', 'c2pnb176v1', 'sect233k1', 'sect409k1', 'c2pnb304w1', 'iii', 'sect233r1', 'sect283r1', 'sect283k1', 'sect571r1', 'sect571k1', 'iframe', 'enctype', 'charset', 'chrome', 'pprint', 'mozcmd', 'prime239v3', 'prime239v1', 'prime192v2', 'prime239v2', 'prime192v3', 'prime192v1', 'srcdir', 'newsrc',
+];
+
// Build arrays for the search form.
$channel_selector = Utils::getHtmlSelectOptions(
$repos_nice_names,
$repo,
true
);
-$target_locales_list = Utils::getHtmlSelectOptions(
- Project::getRepositoryLocales($repo),
- $locale
-);
-
-$ref_locale = Project::getReferenceLocale($repo);
-$unlocalized_words = [];
-$skip_pspell = true;
-
-/*
- pspell helps getting rid of false positive results by keeping only valid
- English words. The downside is that it’s filtering out 'jargon' words that
- can be used in devtools or Mozilla-specific words.
-*/
-if (extension_loaded('pspell')) {
- $pspell_link = \pspell_new('en_US', '', '', '', PSPELL_FAST);
- $skip_pspell = false;
-} else {
- $logger->error('Please install libpspell-dev, php5-pspell and aspell-en ' .
- 'packages and make sure pspell module is enabled in PHP config.');
-}
// Load reference strings.
-$strings_reference = array_map('strtolower', Utils::getRepoStrings(
- $ref_locale,
- $repo
-));
-
-$all_locales = array_diff($all_locales, ['en-US', 'en-ZA', 'en-GB', 'ltg']);
-
-
- /*
- Go through all strings in $strings_reference, extract valid English words
- then check if any of them is present in the localized string from
- $strings_locale.
- */
- foreach ($strings_reference as $string_ref_id => $ref_string) {
-
- /*
- Remove punctuation characters from the strings then explode them into
- words.
- */
- $ref_words = strip_tags($ref_string);
- $ref_words = explode(
- ' ',
- preg_replace('/\p{P}/u', '', $ref_words)
- );
-
- $english_words = [];
-
- /*
- Only keep valid English words with more than 1 character in the current
- string.
- */
- foreach ($ref_words as $word) {
- if (strlen($word) > 1 && ! in_array($word, $english_words)) {
- // Skip pspell when extension is not loaded
- if ($skip_pspell) {
- $english_words[] = $word;
- continue;
- }
-
- if (pspell_check($pspell_link, $word)) {
- $english_words[] = $word;
- }
- }
+$ref_locale = Project::getReferenceLocale($repo);
+$strings_reference = Utils::getRepoStrings($ref_locale, $repo);
+
+function filter_strings($locale, $repo, $strings_reference)
+{
+ $strings = Utils::getRepoStrings($locale, $repo);
+ foreach ($strings as $k => &$n) {
+ if (! isset($strings_reference[$k])) {
+ unset($strings[$k]);
+ continue;
}
- foreach ($all_locales as $locale) {
+ if ($strings[$k] == $strings_reference[$k]) {
+ unset($strings[$k]);
+ continue;
+ }
- // Load locale strings.
- $strings_locale = array_map('strtolower', Utils::getRepoStrings($locale, $repo));
+ $n = strip_tags($n);
+ $n = strtolower($n);
+ $n = preg_replace('/\p{P}/u', '', $n);
+ $n = trim($n);
- /*
- If the string is missing in the locale or has been copy pasted from
- source (e.g. not translated), skip it.
- */
- if (! isset($strings_locale[$string_ref_id])) {
+ if (is_null($n)) {
+ unset($strings[$k]);
continue;
}
- if ($ref_string == $strings_locale[$string_ref_id] && $locale != $ref_locale) {
- continue;
+ if (mb_strlen($n) < 2) {
+ unset($strings[$k]);
}
+ }
+
+ return $strings;
+}
+$all_locales = array_diff($all_locales, ['en-US', 'en-ZA', 'en-GB', 'ja-JP-mac', 'ltg']);
- $locale_words = strip_tags($strings_locale[$string_ref_id]);
- $locale_words = explode(
- ' ',
- preg_replace('/\p{P}/u', '', $locale_words)
- );
+$cache_id = $repo . $page . 'unlocalized_words';
+
+if (! $unlocalized_words = Cache::getKey($cache_id)) {
+ $unlocalized_words = [];
+ foreach ($all_locales as $locale) {
+ // Load locale strings.
+ $cache_id2 = $repo . $page . $locale . 'unlocalized_words';
+ if (! $strings = Cache::getKey($cache_id2)) {
+ $strings = filter_strings($locale, $repo, $strings_reference);
+ Cache::setKey($cache_id2, $strings);
+ }
+
+ foreach ($strings as $id => $locale_words) {
+ /*
+ Check if there is any English word in the current translated string and
+ count matches.
+ */
+ $suspicious_words = array_intersect(
+ explode(' ', $locale_words),
+ explode(' ', $strings_reference[$id])
+ );
+
+ foreach ($suspicious_words as $word) {
+ if (mb_strlen($word) <= 2) {
+ continue;
+ }
+
+ if (in_array($word, $stopwords)) {
+ continue;
+ }
- /*
- Check if there is any English word in the current translated string and
- count matches.
- */
- foreach ($locale_words as $word) {
- if (in_array($word, $english_words)) {
if (! isset($unlocalized_words[$word][$locale])) {
$unlocalized_words[$word][$locale] = 1;
} else {
@@ -113,18 +91,8 @@
}
}
}
+ Cache::setKey($cache_id, $unlocalized_words);
}
-Utils::logScriptPerformances();
-unset($strings_reference);
-unset($strings_locale);
-// Filtering out stop words from results at the end for performance reasons.
-include INC . 'stop_word_list.php';
-
-foreach ($unlocalized_words as $word => $v) {
- if (in_array($word, $stopwords)) {
- unset($unlocalized_words[$word]);
- }
-}
-unset($stopwords);
-asort($unlocalized_words);
+unset($strings_reference, $strings, $stopwords);
+arsort($unlocalized_words);
@@ -1,46 +1,48 @@
<?php
namespace Transvision;
+?>
+<p>You might be interested in high values to validate your translation choices and in low values to check for potential mistakes.</p>
+<?php
// Include the common simple search form
include __DIR__ . '/simplesearchform.php';
-
-$search_id = 'unlocalized_strings';
-
-$content = "<table class='collapsable results_table sortable {$search_id}'>
- <thead>
- <tr class='column_headers'>
- <th>English</th>";
-
-foreach ($all_locales as $locale) {
- $content .= "<th>{$locale}</th>";
-}
-
-$content .= "</tr>
- </thead>
- <tbody>\n";
-
-foreach ($unlocalized_words as $english_term => $locales) {
-
- $content .= " <tr class='{$search_id}'>\n" .
- " <td>{$english_term}</td>\n";
-
- foreach ($all_locales as $locale) {
- $count = 0;
- if (in_array($locale, array_keys($locales))) {
- $count = $locales[$locale];
+?>
+<p>Click on each checkbox below to show/hide the corresponding column.</p>
+<fieldset id="grpChkBox">
+ <legend>Locales</legend>
+ <?php foreach ($all_locales as $locale) : ?>
+ <label><input type="checkbox" name="<?=$locale?>" /> <?=$locale?></label>
+ <?php endforeach ?>
+</fieldset>
+<table class="collapsable results_table sortable" id="words">
+ <thead>
+ <tr class="column_headers">
+ <th>Word</th>
+ <?php foreach ($all_locales as $locale) : ?>
+ <th class="<?=$locale?> hide"><?=$locale?></th>
+ <?php endforeach ?>
+ </tr>
+ </thead>
+ <tbody>
+<?php foreach ($unlocalized_words as $english_term => $locales) : ?>
+ <tr><td><?=$english_term?></td><?php
+ foreach ($all_locales as $locale) {
+ $count = 0;
+ if (in_array($locale, array_keys($locales))) {
+ $count = $locales[$locale];
+ }
+
+ $link = "/?recherche={$english_term}&repo={$repo}&sourcelocale={$locale}" .
+ "&locale={$ref_locale}&search_type=strings&whole_word=whole_word";
+
+ if ($count > 0) {
+ print "<td><a href='{$link}'>{$count}</a></td>";
+ } else {
+ print "<td></td>";
+ }
}
-
- $link = "/?recherche={$english_term}&repo={$repo}&sourcelocale={$locale}" .
- "&locale={$ref_locale}&search_type=strings&whole_word=whole_word";
-
- $link_title = $count == 1
- ? 'Search for this occurrence'
- : 'Search for these occurrences';
-
- $content .= " <td><a href='{$link}' title='{$link_title}'>{$count}</a></td>\n";
- }
- $content .= " </tr>\n";
-}
-$content .= "</tbody>\n</table>\n";
-
-echo $content;
+ ?></tr>
+<?php endforeach ?>
+ </tbody>
+</table>
+<?php unset($unlocalized_words);?>
@@ -15,7 +15,7 @@
['stats/', 200, 'Repository status overview', 'Status estimate'],
['string/?entity=browser/chrome/browser/places/places.properties:bookmarkResultLabel&repo=central', 200, 'supportedLocales', 'Marque-page'],
['unchanged/', 200, 'Display a list of strings identical', 'Locale'],
- ['unlocalized/', 200, 'Display the list of the most common untranslated words', 'Occurrences'],
+ ['unlocalized/', 200, 'Display the list of the most common untranslated words', 'Word'],
['variables/', 200, 'Show potential errors related to', 'no errors found'],
['foo/', 400, '404: Page Not Found', 'You can use the menu at the top'],
['123/', 400, '404: Page Not Found', 'You can use the menu at the top'],
View
@@ -0,0 +1,17 @@
+$(document).ready(function() {
+ var $chk = $('#grpChkBox input:checkbox');
+ var $tbl = $('#words');
+ var $tblhead = $('#words th');
+
+ $chk.prop('checked', false);
+
+ $chk.click(function() {
+ var colToHide = $tblhead.filter('.' + $(this).attr('name'));
+ var index = $(colToHide).index();
+ if (colToHide.css('display') === 'none') {
+ $tbl.find('tr :nth-child(' + (index + 1) + ')').css('display', 'table-cell');
+ } else {
+ $tbl.find('tr :nth-child(' + (index + 1) + ')').css('display', 'none');
+ }
+ });
+});
View
@@ -1164,3 +1164,23 @@ fieldset {
left: 60%;
z-index: 99;
}
+
+/* Unlocalized words view */
+#unlocalized fieldset#grpChkBox {
+ border: 1px solid #000;
+ background-color: rgba(255, 255, 255, 0.5);
+}
+
+#unlocalized #pagecontent p {
+ text-align: center;
+}
+
+#unlocalized label {
+ display: inline-block;
+ width: 5em;
+}
+
+#unlocalized #words th.hide,
+#unlocalized #words tr :nth-child(1) ~ td {
+ display: none;
+}

0 comments on commit 6b677ba

Please sign in to comment.