|
1 | 1 | <?php |
2 | 2 | namespace Transvision; |
3 | 3 |
|
| 4 | +use Cache\Cache; |
| 5 | + |
| 6 | +// Filtering out stop words from results. |
| 7 | +$stopwords = ['318419', '9999', '8601', '6667', '2000ms', '2000', '1990', '1024', '500', '360', '200', '140', '120', '100', '45em', '30em', '26em', '22em', '6ch', '005', '128px', 'adobe', 'android', 'ansi', 'ascii', 'aurora', 'doctype', 'e10s', 'ftp', 'gecko', 'gif', 'https', 'jpg', 'nntp', 'rgb', 'txt', 'unicode', 'usascii', 'vcard', 'wwwexamplecom', 'b-163', 'k-163', 'nist', 'secg', 'sect113r1', 'sect113r2', 'sect131r1', 'sect131r2', 'sect163k1', 'sect163r1', 'sect163r2', 'sect193r1', 'sect193r2', 'secp112r1', 'secp112r2', 'secp128r1', 'secp128r2', 'secp160k1', 'secp160r1', 'secp160r2', 'secp192k1', 'secp224k1', 'secp224r1', 'secp256k1', 'secp384r1', 'secp521r1','javascript', 'prime256v1', 'c2tnb191v2', 'sect239k1', 'c2onb239v4', 'c2onb191v5', 'c2pnb163v2', 'c2tnb191v1', 'c2pnb163v3', 'c2pnb208w1', 'c2tnb431r1', 'c2tnb239v1', 'c2tnb239v2', 'c2tnb239v3', 'sect409r1', 'c2tnb359v1', 'c2tnb191v3', 'c2pnb272w1', 'c2onb191v4', 'c2pnb368w1', 'c2onb239v5', 'c2pnb163v1', 'c2pnb176v1', 'sect233k1', 'sect409k1', 'c2pnb304w1', 'iii', 'sect233r1', 'sect283r1', 'sect283k1', 'sect571r1', 'sect571k1', 'iframe', 'enctype', 'charset', 'chrome', 'pprint', 'mozcmd', 'prime239v3', 'prime239v1', 'prime192v2', 'prime239v2', 'prime192v3', 'prime192v1', 'srcdir', 'newsrc', |
| 8 | +]; |
| 9 | + |
4 | 10 | // Build arrays for the search form. |
5 | 11 | $channel_selector = Utils::getHtmlSelectOptions( |
6 | 12 | $repos_nice_names, |
7 | 13 | $repo, |
8 | 14 | true |
9 | 15 | ); |
10 | | -$target_locales_list = Utils::getHtmlSelectOptions( |
11 | | - Project::getRepositoryLocales($repo), |
12 | | - $locale |
13 | | -); |
14 | | - |
15 | | -$ref_locale = Project::getReferenceLocale($repo); |
16 | | -$unlocalized_words = []; |
17 | | -$skip_pspell = true; |
18 | | - |
19 | | -/* |
20 | | - pspell helps getting rid of false positive results by keeping only valid |
21 | | - English words. The downside is that it’s filtering out 'jargon' words that |
22 | | - can be used in devtools or Mozilla-specific words. |
23 | | -*/ |
24 | | -if (extension_loaded('pspell')) { |
25 | | - $pspell_link = \pspell_new('en_US', '', '', '', PSPELL_FAST); |
26 | | - $skip_pspell = false; |
27 | | -} else { |
28 | | - $logger->error('Please install libpspell-dev, php5-pspell and aspell-en ' . |
29 | | - 'packages and make sure pspell module is enabled in PHP config.'); |
30 | | -} |
31 | 16 |
|
32 | 17 | // Load reference strings. |
33 | | -$strings_reference = array_map('strtolower', Utils::getRepoStrings( |
34 | | - $ref_locale, |
35 | | - $repo |
36 | | -)); |
37 | | - |
38 | | -$all_locales = array_diff($all_locales, ['en-US', 'en-ZA', 'en-GB', 'ltg']); |
39 | | - |
40 | | - |
41 | | - /* |
42 | | - Go through all strings in $strings_reference, extract valid English words |
43 | | - then check if any of them is present in the localized string from |
44 | | - $strings_locale. |
45 | | - */ |
46 | | - foreach ($strings_reference as $string_ref_id => $ref_string) { |
47 | | - |
48 | | - /* |
49 | | - Remove punctuation characters from the strings then explode them into |
50 | | - words. |
51 | | - */ |
52 | | - $ref_words = strip_tags($ref_string); |
53 | | - $ref_words = explode( |
54 | | - ' ', |
55 | | - preg_replace('/\p{P}/u', '', $ref_words) |
56 | | - ); |
57 | | - |
58 | | - $english_words = []; |
59 | | - |
60 | | - /* |
61 | | - Only keep valid English words with more than 1 character in the current |
62 | | - string. |
63 | | - */ |
64 | | - foreach ($ref_words as $word) { |
65 | | - if (strlen($word) > 1 && ! in_array($word, $english_words)) { |
66 | | - // Skip pspell when extension is not loaded |
67 | | - if ($skip_pspell) { |
68 | | - $english_words[] = $word; |
69 | | - continue; |
70 | | - } |
71 | | - |
72 | | - if (pspell_check($pspell_link, $word)) { |
73 | | - $english_words[] = $word; |
74 | | - } |
75 | | - } |
| 18 | +$ref_locale = Project::getReferenceLocale($repo); |
| 19 | +$strings_reference = Utils::getRepoStrings($ref_locale, $repo); |
| 20 | + |
| 21 | +function filter_strings($locale, $repo, $strings_reference) |
| 22 | +{ |
| 23 | + $strings = Utils::getRepoStrings($locale, $repo); |
| 24 | + foreach ($strings as $k => &$n) { |
| 25 | + if (! isset($strings_reference[$k])) { |
| 26 | + unset($strings[$k]); |
| 27 | + continue; |
76 | 28 | } |
77 | 29 |
|
78 | | - foreach ($all_locales as $locale) { |
| 30 | + if ($strings[$k] == $strings_reference[$k]) { |
| 31 | + unset($strings[$k]); |
| 32 | + continue; |
| 33 | + } |
79 | 34 |
|
80 | | - // Load locale strings. |
81 | | - $strings_locale = array_map('strtolower', Utils::getRepoStrings($locale, $repo)); |
| 35 | + $n = strip_tags($n); |
| 36 | + $n = strtolower($n); |
| 37 | + $n = preg_replace('/\p{P}/u', '', $n); |
| 38 | + $n = trim($n); |
82 | 39 |
|
83 | | - /* |
84 | | - If the string is missing in the locale or has been copy pasted from |
85 | | - source (e.g. not translated), skip it. |
86 | | - */ |
87 | | - if (! isset($strings_locale[$string_ref_id])) { |
| 40 | + if (is_null($n)) { |
| 41 | + unset($strings[$k]); |
88 | 42 | continue; |
89 | 43 | } |
90 | 44 |
|
91 | | - if ($ref_string == $strings_locale[$string_ref_id] && $locale != $ref_locale) { |
92 | | - continue; |
| 45 | + if (mb_strlen($n) < 2) { |
| 46 | + unset($strings[$k]); |
93 | 47 | } |
| 48 | + } |
| 49 | + |
| 50 | + return $strings; |
| 51 | +} |
94 | 52 |
|
| 53 | +$all_locales = array_diff($all_locales, ['en-US', 'en-ZA', 'en-GB', 'ja-JP-mac', 'ltg']); |
95 | 54 |
|
96 | | - $locale_words = strip_tags($strings_locale[$string_ref_id]); |
97 | | - $locale_words = explode( |
98 | | - ' ', |
99 | | - preg_replace('/\p{P}/u', '', $locale_words) |
100 | | - ); |
| 55 | +$cache_id = $repo . $page . 'unlocalized_words'; |
| 56 | + |
| 57 | +if (! $unlocalized_words = Cache::getKey($cache_id)) { |
| 58 | + $unlocalized_words = []; |
| 59 | + foreach ($all_locales as $locale) { |
| 60 | + // Load locale strings. |
| 61 | + $cache_id2 = $repo . $page . $locale . 'unlocalized_words'; |
| 62 | + if (! $strings = Cache::getKey($cache_id2)) { |
| 63 | + $strings = filter_strings($locale, $repo, $strings_reference); |
| 64 | + Cache::setKey($cache_id2, $strings); |
| 65 | + } |
| 66 | + |
| 67 | + foreach ($strings as $id => $locale_words) { |
| 68 | + /* |
| 69 | + Check if there is any English word in the current translated string and |
| 70 | + count matches. |
| 71 | + */ |
| 72 | + $suspicious_words = array_intersect( |
| 73 | + explode(' ', $locale_words), |
| 74 | + explode(' ', $strings_reference[$id]) |
| 75 | + ); |
| 76 | + |
| 77 | + foreach ($suspicious_words as $word) { |
| 78 | + if (mb_strlen($word) <= 2) { |
| 79 | + continue; |
| 80 | + } |
| 81 | + |
| 82 | + if (in_array($word, $stopwords)) { |
| 83 | + continue; |
| 84 | + } |
101 | 85 |
|
102 | | - /* |
103 | | - Check if there is any English word in the current translated string and |
104 | | - count matches. |
105 | | - */ |
106 | | - foreach ($locale_words as $word) { |
107 | | - if (in_array($word, $english_words)) { |
108 | 86 | if (! isset($unlocalized_words[$word][$locale])) { |
109 | 87 | $unlocalized_words[$word][$locale] = 1; |
110 | 88 | } else { |
|
113 | 91 | } |
114 | 92 | } |
115 | 93 | } |
| 94 | + Cache::setKey($cache_id, $unlocalized_words); |
116 | 95 | } |
117 | | -Utils::logScriptPerformances(); |
118 | | -unset($strings_reference); |
119 | | -unset($strings_locale); |
120 | 96 |
|
121 | | -// Filtering out stop words from results at the end for performance reasons. |
122 | | -include INC . 'stop_word_list.php'; |
123 | | - |
124 | | -foreach ($unlocalized_words as $word => $v) { |
125 | | - if (in_array($word, $stopwords)) { |
126 | | - unset($unlocalized_words[$word]); |
127 | | - } |
128 | | -} |
129 | | -unset($stopwords); |
130 | | -asort($unlocalized_words); |
| 97 | +unset($strings_reference, $strings, $stopwords); |
| 98 | +arsort($unlocalized_words); |
0 commit comments