Skip to content

Commit a30981d

Browse files
committed
Bug 1982594 - Update Translations RemoteSettings schemas r=translations-reviewers,gregtatum
This patch updates the schemas and typescript definitions of the RemoteSettings schemas to match the new `v2` collections that will hold the zstd-compressed models. Co-authored-by: Isaac Briandt <isaacbriandt10@gmail.com> Differential Revision: https://phabricator.services.mozilla.com/D260975
1 parent e6c6e9a commit a30981d

File tree

2 files changed

+87
-55
lines changed

2 files changed

+87
-55
lines changed

toolkit/components/translations/actors/TranslationsParent.sys.mjs

Lines changed: 75 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -370,12 +370,17 @@ export class TranslationsParent extends JSWindowActorParent {
370370
* Notes: The 2.x WASM binary introduces segmentation changes that are necessary
371371
* to translate CJK languages.
372372
*
373-
* 3.x Wasm Major Versions
373+
* 3.x WASM Major Versions
374374
*
375375
* - This update introduces memory savings that required a new bergamot-translator.js
376376
* file due to ASM offsets, but makes no other changes.
377+
*
378+
* 4.X WASM Major Versions
379+
*
380+
* - This update changes the format of the WASM to be compressed with zstd.
381+
* The WASM is decompressed only when loaded into the engine.
377382
*/
378-
static BERGAMOT_MAJOR_VERSION = 3;
383+
static BERGAMOT_MAJOR_VERSION = 4;
379384

380385
/**
381386
* The BERGAMOT_MAJOR_VERSION defined above has only a single value, because there will
@@ -415,21 +420,29 @@ export class TranslationsParent extends JSWindowActorParent {
415420
*
416421
* - Compatible with 1.x Bergamot WASM binaries.
417422
* - Compatible with 2.x Bergamot WASM binaries.
423+
* - Compatible with 3.x Bergamot WASM binaries.
418424
*
419425
* Notes: 1.x models are referred to as "tiny" models, and are the models that were shipped with the original
420426
* release of Translations in Firefox.
421427
*
422428
* 2.x Model Major Versions
423429
*
424430
* - Compatible with 2.x Bergamot WASM binaries.
431+
* - Compatible with 3.x Bergamot WASM binaries.
425432
*
426433
* Notes: 2.x models are defined by any of two characteristics. The first characteristic is any CJK language model.
427434
* Only the 2.x WASM binaries support the segmentation concerns needed to interop with CJK language models.
428435
* The second characteristic is any "base" language model, which is larger than the "tiny" 1.x models.
429436
* Compatibility for base models is dependent on the code changes in Bug 1926100.
437+
*
438+
* 3.X Model Major Versions
439+
*
440+
* - Compatible with 4.x Bergamot WASM binaries.
441+
*
442+
* Notes: 3.x models are compressed with zstd. They are decompressed only when they are loaded into the engine.
430443
*/
431-
static LANGUAGE_MODEL_MAJOR_VERSION_MIN = 1;
432-
static LANGUAGE_MODEL_MAJOR_VERSION_MAX = 2;
444+
static LANGUAGE_MODEL_MAJOR_VERSION_MIN = 3;
445+
static LANGUAGE_MODEL_MAJOR_VERSION_MAX = 3;
433446

434447
/**
435448
* Contains the state that would affect UI. Anytime this state is changed, a dispatch
@@ -1768,8 +1781,8 @@ export class TranslationsParent extends JSWindowActorParent {
17681781
const languagePairMap = new Map();
17691782

17701783
for (const {
1771-
fromLang: sourceLanguage,
1772-
toLang: targetLanguage,
1784+
sourceLanguage,
1785+
targetLanguage,
17731786
variant,
17741787
} of records.values()) {
17751788
const key = TranslationsParent.nonPivotKey(
@@ -2157,7 +2170,7 @@ export class TranslationsParent extends JSWindowActorParent {
21572170
}
21582171

21592172
/** @type {RemoteSettingsClient} */
2160-
const client = lazy.RemoteSettings("translations-models");
2173+
const client = lazy.RemoteSettings("translations-models-v2");
21612174
TranslationsParent.#translationModelsRemoteClient = client;
21622175
client.on("sync", TranslationsParent.#handleTranslationsModelsSync);
21632176

@@ -2175,8 +2188,8 @@ export class TranslationsParent extends JSWindowActorParent {
21752188
* @param {object} [options.filters={}]
21762189
* The filters to apply when retrieving the records from RemoteSettings.
21772190
* Filters should correspond to properties on the RemoteSettings records themselves.
2178-
* For example, A filter to retrieve only records with a `fromLang` value of "en" and a `toLang` value of "es":
2179-
* { filters: { fromLang: "en", toLang: "es" } }
2191+
* For example, A filter to retrieve only records with a `sourceLanguage` value of "en" and a `targetLanguage` value of "es":
2192+
* { filters: { sourceLanguage: "en", targetLanguage: "es" } }
21802193
* @param {number} options.minSupportedMajorVersion
21812194
* The minimum major record version that is supported in this build of Firefox.
21822195
* @param {number} options.maxSupportedMajorVersion
@@ -2321,8 +2334,8 @@ export class TranslationsParent extends JSWindowActorParent {
23212334
// to guarantee uniqueness.
23222335
lookupKey: record =>
23232336
`${record.name}${TranslationsParent.nonPivotKey(
2324-
record.fromLang,
2325-
record.toLang,
2337+
record.sourceLanguage,
2338+
record.targetLanguage,
23262339
record.variant
23272340
)}`,
23282341
});
@@ -2384,65 +2397,65 @@ export class TranslationsParent extends JSWindowActorParent {
23842397
return records;
23852398
}
23862399
// lang -> pivot
2387-
const hasToPivot = new Set();
2400+
const hasTargetPivot = new Set();
23882401
// pivot -> en
2389-
const hasFromPivot = new Set();
2402+
const hasSourcePivot = new Set();
23902403

2391-
const fromLangs = new Set();
2392-
const toLangs = new Set();
2404+
const sourceLanguages = new Set();
2405+
const targetLanguages = new Set();
23932406

2394-
for (const { fromLang, toLang } of records) {
2395-
fromLangs.add(fromLang);
2396-
toLangs.add(toLang);
2407+
for (const { sourceLanguage, targetLanguage } of records) {
2408+
sourceLanguages.add(sourceLanguage);
2409+
targetLanguages.add(targetLanguage);
23972410

2398-
if (toLang === PIVOT_LANGUAGE) {
2411+
if (targetLanguage === PIVOT_LANGUAGE) {
23992412
// lang -> pivot
2400-
hasToPivot.add(fromLang);
2413+
hasTargetPivot.add(sourceLanguage);
24012414
}
2402-
if (fromLang === PIVOT_LANGUAGE) {
2415+
if (sourceLanguage === PIVOT_LANGUAGE) {
24032416
// pivot -> en
2404-
hasFromPivot.add(toLang);
2417+
hasSourcePivot.add(targetLanguage);
24052418
}
24062419
}
24072420

2408-
const fromLangsToRemove = new Set();
2409-
const toLangsToRemove = new Set();
2421+
const sourceLanguagesToRemove = new Set();
2422+
const targetLanguagesToRemove = new Set();
24102423

2411-
for (const lang of fromLangs) {
2412-
if (lang === PIVOT_LANGUAGE) {
2424+
for (const language of sourceLanguages) {
2425+
if (language === PIVOT_LANGUAGE) {
24132426
continue;
24142427
}
24152428
// Check for "lang -> pivot"
2416-
if (!hasToPivot.has(lang)) {
2429+
if (!hasTargetPivot.has(language)) {
24172430
TranslationsParent.reportError(
24182431
new Error(
2419-
`The "from" language model "${lang}" is being discarded as it doesn't have a pivot language.`
2432+
`The source language model "${language}" is being discarded as it doesn't have a pivot language.`
24202433
)
24212434
);
2422-
fromLangsToRemove.add(lang);
2435+
sourceLanguagesToRemove.add(language);
24232436
}
24242437
}
24252438

2426-
for (const lang of toLangs) {
2427-
if (lang === PIVOT_LANGUAGE) {
2439+
for (const language of targetLanguages) {
2440+
if (language === PIVOT_LANGUAGE) {
24282441
continue;
24292442
}
24302443
// Check for "pivot -> lang"
2431-
if (!hasFromPivot.has(lang)) {
2444+
if (!hasSourcePivot.has(language)) {
24322445
TranslationsParent.reportError(
24332446
new Error(
2434-
`The "to" language model "${lang}" is being discarded as it doesn't have a pivot language.`
2447+
`The target language model "${language}" is being discarded as it doesn't have a pivot language.`
24352448
)
24362449
);
2437-
toLangsToRemove.add(lang);
2450+
targetLanguagesToRemove.add(language);
24382451
}
24392452
}
24402453

24412454
const after = records.filter(record => {
2442-
if (fromLangsToRemove.has(record.fromLang)) {
2455+
if (sourceLanguagesToRemove.has(record.sourceLanguage)) {
24432456
return false;
24442457
}
2445-
if (toLangsToRemove.has(record.toLang)) {
2458+
if (targetLanguagesToRemove.has(record.targetLanguage)) {
24462459
return false;
24472460
}
24482461
return true;
@@ -2471,8 +2484,8 @@ export class TranslationsParent extends JSWindowActorParent {
24712484
const recordGroups = new Map();
24722485
for (const record of records) {
24732486
const key = TranslationsParent.nonPivotKey(
2474-
record.fromLang,
2475-
record.toLang,
2487+
record.sourceLanguage,
2488+
record.targetLanguage,
24762489
record.variant
24772490
);
24782491

@@ -2516,7 +2529,7 @@ export class TranslationsParent extends JSWindowActorParent {
25162529
}
25172530

25182531
/** @type {RemoteSettingsClient} */
2519-
const client = lazy.RemoteSettings("translations-wasm");
2532+
const client = lazy.RemoteSettings("translations-wasm-v2");
25202533
TranslationsParent.#translationsWasmRemoteClient = client;
25212534
client.on("sync", TranslationsParent.#handleTranslationsWasmSync);
25222535

@@ -2852,16 +2865,16 @@ export class TranslationsParent extends JSWindowActorParent {
28522865
if (isDownloaded) {
28532866
downloadedPairs.add(
28542867
TranslationsParent.nonPivotKey(
2855-
record.fromLang,
2856-
record.toLang,
2868+
record.sourceLanguage,
2869+
record.targetLanguage,
28572870
record.variant
28582871
)
28592872
);
28602873
} else {
28612874
nonDownloadedPairs.add(
28622875
TranslationsParent.nonPivotKey(
2863-
record.fromLang,
2864-
record.toLang,
2876+
record.sourceLanguage,
2877+
record.targetLanguage,
28652878
record.variant
28662879
)
28672880
);
@@ -2926,10 +2939,13 @@ export class TranslationsParent extends JSWindowActorParent {
29262939
for (const record of records.values()) {
29272940
if (
29282941
lazy.TranslationsUtils.langTagsMatch(
2929-
record.fromLang,
2942+
record.sourceLanguage,
29302943
sourceLanguage
29312944
) &&
2932-
lazy.TranslationsUtils.langTagsMatch(record.toLang, targetLanguage)
2945+
lazy.TranslationsUtils.langTagsMatch(
2946+
record.targetLanguage,
2947+
targetLanguage
2948+
)
29332949
) {
29342950
matchedRecords.add(record);
29352951
matchFound = true;
@@ -3035,11 +3051,11 @@ export class TranslationsParent extends JSWindowActorParent {
30353051

30363052
if (
30373053
!lazy.TranslationsUtils.langTagsMatch(
3038-
record.fromLang,
3054+
record.sourceLanguage,
30393055
sourceLanguage
30403056
) ||
30413057
!lazy.TranslationsUtils.langTagsMatch(
3042-
record.toLang,
3058+
record.targetLanguage,
30433059
targetLanguage
30443060
) ||
30453061
record.variant !== variant
@@ -3065,8 +3081,8 @@ export class TranslationsParent extends JSWindowActorParent {
30653081
const duration = Date.now() - start;
30663082
lazy.console.log(
30673083
`Translation model fetched in ${duration / 1000} seconds:`,
3068-
record.fromLang,
3069-
record.toLang,
3084+
record.sourceLanguage,
3085+
record.targetLanguage,
30703086
record.variant,
30713087
record.fileType,
30723088
record.version
@@ -3124,8 +3140,11 @@ export class TranslationsParent extends JSWindowActorParent {
31243140
await Promise.all(
31253141
records.map(async record => {
31263142
if (
3127-
!lazy.TranslationsUtils.langTagsMatch(record.fromLang, language) &&
3128-
!lazy.TranslationsUtils.langTagsMatch(record.toLang, language)
3143+
!lazy.TranslationsUtils.langTagsMatch(
3144+
record.sourceLanguage,
3145+
language
3146+
) &&
3147+
!lazy.TranslationsUtils.langTagsMatch(record.targetLanguage, language)
31293148
) {
31303149
return;
31313150
}
@@ -3204,10 +3223,13 @@ export class TranslationsParent extends JSWindowActorParent {
32043223

32053224
if (
32063225
!lazy.TranslationsUtils.langTagsMatch(
3207-
record.fromLang,
3226+
record.sourceLanguage,
32083227
sourceLanguage
32093228
) ||
3210-
!lazy.TranslationsUtils.langTagsMatch(record.toLang, targetLanguage)
3229+
!lazy.TranslationsUtils.langTagsMatch(
3230+
record.targetLanguage,
3231+
targetLanguage
3232+
)
32113233
) {
32123234
return;
32133235
}

toolkit/components/translations/translations.d.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,22 @@ export interface TranslationModelRecord {
3333
// The full model name, e.g. "lex.50.50.deen.s2t.bin"
3434
name: string;
3535
// The BCP 47 language tag, e.g. "de"
36-
fromLang: string;
36+
sourceLanguage: string;
3737
// The BCP 47 language tag, e.g. "en"
38-
toLang: string;
38+
targetLanguage: string;
39+
// The architecture of the model, e.g "base", "base-memory", "tiny"
40+
architecture: string;
3941
// A model variant. This is a developer-only property that can be used in Nightly or
4042
// local builds to test different types of models.
4143
variant?: string;
4244
// The semver number, used for handling future format changes. e.g. 1.0
4345
version: string;
4446
// e.g. "lex"
4547
fileType: string;
48+
// The sha256 hash of the decompressed file
49+
decompressedHash: string;
50+
// The size of the decompressed file (bytes)
51+
decompressedSize: number;
4652
// The file attachment for this record
4753
attachment: Attachment;
4854
// e.g. 1673023100578
@@ -70,6 +76,10 @@ export interface WasmRecord {
7076
license: string;
7177
// The semver number, used for handling future format changes. e.g. 1.0
7278
version: string;
79+
// The sha256 hash of the decompressed file
80+
decompressedHash: string;
81+
// The size of the decompressed wasm file (bytes)
82+
decompressedSize: number;
7383
// The file attachment for this record
7484
attachment: Attachment;
7585
// e.g. 1673455932527

0 commit comments

Comments
 (0)