Permalink
Browse files

Add a view for commonly unlocalized words

  • Loading branch information...
TheoChevalier committed Feb 23, 2016
1 parent dfb232e commit 2d70d2bfbc2fc44424b40e6f6e023f8ab57a3ac9
View
@@ -33,6 +33,7 @@ The Transvision team uses Git and GitHub for both development and issue tracking
- Composer (Dependency Manager for PHP, https://getcomposer.org/)
- mercurial, git, svn to check out data sources (only for a Full installation for production)
- php5-xsl and GraphViz packages for generating the documentation with [phpDocumentor][]
- libpspell-dev, php5-pspell and aspell-en packages for running spell checking in English on [Unlocalized words view][].
- Apache with mod_rewrite activated
## Full Installation (production or heavy development)
@@ -70,4 +71,5 @@ MPL 2
[Transvision classes]: https://transvision-beta.mozfr.org/docs
[Transvision Project]: https://github.com/mozfr/transvision
[phpDocumentor]: http://phpdoc.org/
[Unlocalized words view]: https://transvision.mozfr.org/unlocalized
[Coding Standards]:https://github.com/mozfr/transvision/wiki/Code-conventions
@@ -0,0 +1,20 @@
<?php
namespace Transvision;
// Get requested repo and locale.
require_once INC . 'l10n-init.php';
// Include JS lib after $javascript_include gets reset in l10n-init.php.
$javascript_include = ['/js/sorttable.js'];
include MODELS . 'unlocalized_words.php';
switch ($page) {
case 'unlocalized_json':
$json = $unlocalized_words;
include VIEWS . 'json.php';
break;
default:
include VIEWS . 'unlocalized_words.php';
break;
}
View
@@ -99,6 +99,16 @@
$page_title = 'Unchanged Strings';
$page_descr = 'Display a list of strings identical to English';
break;
case 'unlocalized':
$experimental = true;
$controller = 'unlocalized_words';
$page_title = 'Commonly Unlocalized Words';
$page_descr = 'Display the list of the most common untranslated words. Click on the table headers to sort results.';
break;
case 'unlocalized-json':
$controller = 'unlocalized_words';
$template = false;
break;
case 'variables':
$controller = 'check_variables';
$page_title = 'Variables Overview';
@@ -0,0 +1,2 @@
<?php
$stopwords = ['a', 'about', 'above', 'above', 'across', 'after', 'afterwards', 'again', 'against', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'am', 'among', 'amongst', 'amoungst', 'amount', 'an', 'and', 'another', 'any', 'anyhow', 'anyone', 'anything', 'anyway', 'anywhere', 'are', 'around', 'as', 'at', 'back', 'be', 'became', 'because', 'become', 'becomes', 'becoming', 'been', 'before', 'beforehand', 'behind', 'being', 'below', 'beside', 'besides', 'between', 'beyond', 'bill', 'both', 'bottom', 'but', 'by', 'call', 'can', 'cannot', 'cant', 'co', 'con', 'could', 'couldnt', 'cry', 'de', 'describe', 'detail', 'do', 'done', 'down', 'due', 'during', 'each', 'eg', 'eight', 'either', 'eleven', 'else', 'elsewhere', 'empty', 'enough', 'etc', 'even', 'ever', 'every', 'everyone', 'everything', 'everywhere', 'except', 'few', 'fifteen', 'fify', 'fill', 'find', 'fire', 'first', 'five', 'for', 'former', 'formerly', 'forty', 'found', 'four', 'from', 'front', 'full', 'further', 'get', 'give', 'go', 'had', 'has', 'hasnt', 'have', 'he', 'hence', 'her', 'here', 'hereafter', 'hereby', 'herein', 'hereupon', 'hers', 'herself', 'him', 'himself', 'his', 'how', 'however', 'hundred', 'ie', 'if', 'in', 'inc', 'indeed', 'interest', 'into', 'is', 'it', 'its', 'itself', 'keep', 'last', 'latter', 'latterly', 'least', 'less', 'ltd', 'made', 'many', 'may', 'me', 'meanwhile', 'might', 'mill', 'mine', 'more', 'moreover', 'most', 'mostly', 'move', 'much', 'must', 'my', 'myself', 'name', 'namely', 'neither', 'never', 'nevertheless', 'next', 'nine', 'no', 'nobody', 'none', 'noone', 'nor', 'not', 'nothing', 'now', 'nowhere', 'of', 'off', 'often', 'on', 'once', 'one', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'our', 'ours', 'ourselves', 'out', 'over', 'own', 'part', 'per', 'perhaps', 'please', 'put', 'rather', 're', 'same', 'see', 'seem', 'seemed', 'seeming', 'seems', 'serious', 'several', 'she', 'should', 'show', 'side', 'since', 'sincere', 'six', 'sixty', 'so', 'some', 'somehow', 'someone', 'something', 'sometime', 'sometimes', 'somewhere', 'still', 'such', 'system', 'take', 'ten', 'than', 'that', 'the', 'their', 'them', 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', 'therefore', 'therein', 'thereupon', 'these', 'they', 'thickv', 'thin', 'third', 'this', 'those', 'though', 'three', 'through', 'throughout', 'thru', 'thus', 'to', 'together', 'too', 'top', 'toward', 'towards', 'twelve', 'twenty', 'two', 'un', 'under', 'until', 'up', 'upon', 'us', 'very', 'via', 'was', 'we', 'well', 'were', 'what', 'whatever', 'when', 'whence', 'whenever', 'where', 'whereafter', 'whereas', 'whereby', 'wherein', 'whereupon', 'wherever', 'whether', 'which', 'while', 'whither', 'who', 'whoever', 'whole', 'whom', 'whose', 'why', 'will', 'with', 'within', 'without', 'would', 'yet', 'you', 'your', 'yours', 'yourself', 'yourselves', 'the'];
View
@@ -13,6 +13,8 @@
'showrepos' => 'showrepos',
'gaia' => 'gaia',
'unchanged' => 'unchangedstrings',
'unlocalized' => 'unlocalized',
'unlocalized-json' => 'unlocalized_json',
'variables' => 'checkvariables',
'3locales' => '3locales',
'string' => 'onestring',
@@ -0,0 +1,117 @@
<?php
namespace Transvision;
// Build arrays for the search form.
$channel_selector = Utils::getHtmlSelectOptions(
$repos_nice_names,
$repo,
true
);
$target_locales_list = Utils::getHtmlSelectOptions(
Project::getRepositoryLocales($repo),
$locale
);
$ref_locale = Project::getReferenceLocale($repo);
$unlocalized_words = [];
$skip_pspell = true;
/*
pspell helps getting rid of false positive results by keeping only valid
English words. The downside is that it’s filtering out 'jargon' words that
can be used in devtools or Mozilla-specific words.
*/
if (extension_loaded('pspell')) {
$pspell_link = \pspell_new('en_US', '', '', '', PSPELL_FAST);
$skip_pspell = false;
} else {
$logger->error('Please install libpspell-dev, php5-pspell and aspell-en ' .
'packages and make sure pspell module is enabled in PHP config.');
}
// Load reference strings.
$strings_reference = array_map('strtolower', Utils::getRepoStrings(
$ref_locale,
$repo
));
// Load locale strings.
$strings_locale = array_map('strtolower', Utils::getRepoStrings($locale, $repo));
/*
Go through all strings in $strings_reference, extract valid English words
then check if any of them is present in the localized string from
$strings_locale.
*/
foreach ($strings_reference as $string_ref_id => $ref_words) {
/*
If the string is missing in the locale or has been copy pasted from
source (e.g. not translated), skip it.
*/
if (! isset($strings_locale[$string_ref_id])
|| ($ref_words == $strings_locale[$string_ref_id] && $locale != $ref_locale)) {
continue;
}
$english_words = [];
$locale_words = $strings_locale[$string_ref_id];
/*
Remove punctuation characters from the strings then explode them into
words.
*/
$ref_words = explode(
' ',
preg_replace('/\p{P}/u', '', $ref_words)
);
$locale_words = explode(
' ',
preg_replace('/\p{P}/u', '', $locale_words)
);
/*
Only keep valid English words with more than 1 character in the current
string.
*/
foreach ($ref_words as $word) {
if (strlen($word) > 1 && ! in_array($word, $english_words)) {
// Skip pspell when extension is not loaded
if ($skip_pspell) {
$english_words[] = $word;
continue;
}
if (pspell_check($pspell_link, $word)) {
$english_words[] = $word;
}
}
}
/*
Check if there is any English word in the current translated string and
count matches.
*/
foreach ($locale_words as $word) {
if (in_array($word, $english_words)) {
if (! isset($unlocalized_words[$word])) {
$unlocalized_words[$word] = 1;
} else {
$unlocalized_words[$word]++;
}
}
}
}
unset($strings_reference);
unset($strings_locale);
// Filtering out stop words from results at the end for performance reasons.
include INC . 'stop_word_list.php';
foreach ($unlocalized_words as $key => $value) {
if (in_array($key, $stopwords)) {
unset($unlocalized_words[$key]);
}
}
unset($stopwords);
asort($unlocalized_words);
@@ -41,6 +41,7 @@
<li><a ' . ($url['path'] == 'gaia' ? 'class="selected_view" ' : '') . 'href="/gaia/" title="Compare strings across Gaia channels">Gaia Comparison</a></li>
<li><a ' . ($url['path'] == 'consistency' ? 'class="selected_view" ' : '') . 'href="/consistency/" title="Translation Consistency">Translation Consistency</a></li>
<li><a ' . ($url['path'] == 'unchanged_strings' ? 'class="selected_view" ' : '') . 'href="/unchanged/" title="Display all strings identical to English">Unchanged Strings</a></li>
<li><a ' . ($url['path'] == 'unlocalized_words' ? 'class="selected_view" ' : '') . 'href="/unlocalized/" title="Display common words remaining in English">Unlocalized Words</a></li>
<li><a ' . ($url['path'] == 'variables' ? 'class="selected_view" ' : '') . 'href="/variables/" title="Check what variable differences there are from English">Check Variables</a></li>
</ul>
</div>
@@ -0,0 +1,30 @@
<?php
namespace Transvision;
// Include the common simple search form
include __DIR__ . '/simplesearchform.php';
$search_id = 'unlocalized_strings';
$content = "<table class='collapsable results_table sortable {$search_id}'>\n" .
" <tr class='column_headers'>\n" .
" <th>English</th>\n" .
" <th>Occurrences</th>\n" .
" </tr>\n";
foreach ($unlocalized_words as $string_id => $string_value) {
$link = "/?recherche={$string_id}&repo={$repo}&sourcelocale={$locale}" .
"&locale={$ref_locale}&search_type=strings&whole_word=whole_word";
$link_title = $string_value == 1
? 'Search for this occurrence'
: 'Search for these occurrences';
$content .= " <tr class='{$search_id}'>\n" .
" <td><a href='{$link}' title='{$link_title}'>{$string_id}</a></td>\n" .
" <td>{$string_value}</td>\n" .
" </tr>\n";
}
$content .= "</table>\n";
echo $content;
@@ -15,6 +15,7 @@
['stats/', 200, 'Repository status overview', 'Status estimate'],
['string/?entity=browser/chrome/browser/places/places.properties:bookmarkResultLabel&repo=central', 200, 'supported_locales', 'Marque-page'],
['unchanged/', 200, 'Display a list of strings identical', 'Locale'],
['unlocalized/', 200, 'Display the list of the most common untranslated words', 'Occurrences'],
['variables/', 200, 'Show potential errors related to', 'no errors found'],
];
Oops, something went wrong.

0 comments on commit 2d70d2b

Please sign in to comment.