Skip to content

Commit

Permalink
[localize] Fix handling of nested HTML (#3674)
Browse files Browse the repository at this point in the history
Fixes handling of nested html-tagged template expressions in lit localize analysis.

For example, the following example was analyzed incorrectly:

```ts
msg(html`Hello <b>${html`<i>World</i>`}</b>!`);
```

We combine runs of HTML markup and expressions so that we have fewer placeholders during translation, so we pass the template through an HTML parser, nested expressions and all. The problem was that if the nested expressions contained HTML, those would get parsed as part of the outer message, which would corrupt the analyzed template. The fix was to replace nested expressions with placeholders (the index) during HTML parsing (because we don't need the HTML parser to see the contents of expressions at all) and then substitute the original expression back after HTML parsing.

Fixes #2426
  • Loading branch information
aomarks committed Feb 16, 2023
1 parent e00f6f5 commit 52ab087
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 35 deletions.
5 changes: 5 additions & 0 deletions .changeset/mighty-hornets-explain.md
@@ -0,0 +1,5 @@
---
'@lit/localize-tools': patch
---

Fix incorrect extraction of html embedded within html
39 changes: 30 additions & 9 deletions packages/localize-tools/src/program-analysis.ts
Expand Up @@ -370,9 +370,11 @@ interface Expression {
*/
const EXPRESSION_RAND = String(Math.random()).slice(2);
const EXPRESSION_START = `_START_LIT_LOCALIZE_EXPR_${EXPRESSION_RAND}_`;
const EXPRESSION_START_REGEXP = new RegExp(EXPRESSION_START, 'g');
const EXPRESSION_END = `_END_LIT_LOCALIZE_EXPR_${EXPRESSION_RAND}_`;
const EXPRESSION_END_REGEXP = new RegExp(EXPRESSION_END, 'g');
const EXPRESSION_START_END_REGEXP = new RegExp(
EXPRESSION_START + '(.*?)' + EXPRESSION_END,
'g'
);

/**
* Our template is split apart based on template string literal expressions.
Expand Down Expand Up @@ -401,10 +403,17 @@ function replaceExpressionsAndHtmlWithPlaceholders(
parts: Array<string | Expression>
): Array<string | Omit<Placeholder, 'index'>> {
const concatenatedHtml = parts
.map((part) =>
.map((part, expressionIndex) =>
typeof part === 'string'
? part
: EXPRESSION_START + part.identifier + EXPRESSION_END
: EXPRESSION_START +
// Use the index of the expression instead of the actual expression,
// because the actual expression might contain embedded HTML which
// will confuse HTML parsing. We could also escape it, but it's
// simpler to just use the index and swap it back with the actual
// expression after HTML parsing.
expressionIndex +
EXPRESSION_END
)
.join('');
const contents: Array<string | Omit<Placeholder, 'index'>> = [];
Expand All @@ -418,8 +427,13 @@ function replaceExpressionsAndHtmlWithPlaceholders(
contents.push(substr);
}
} else {
const [identifier, tail] = endSplit;
contents.push({untranslatable: '${' + identifier + '}'});
// Swap the expression index with the actual expression.
const [expressionIndexStr, tail] = endSplit;
const expressionIndex = Number(expressionIndexStr);
const expression = (parts[expressionIndex] as Expression).identifier;
contents.push({
untranslatable: '${' + expression + '}',
});
if (tail) {
contents.push(tail);
}
Expand All @@ -430,9 +444,16 @@ function replaceExpressionsAndHtmlWithPlaceholders(
// markup, it's fine and good to just keep this one placeholder. We just
// need to fix the syntax.
contents.push({
untranslatable: part.untranslatable
.replace(EXPRESSION_START_REGEXP, '${')
.replace(EXPRESSION_END_REGEXP, '}'),
untranslatable: part.untranslatable.replace(
EXPRESSION_START_END_REGEXP,
(_, expressionIndexStr) => {
// Swap the expression index with the actual expression.
const expressionIndex = Number(expressionIndexStr);
const expression = (parts[expressionIndex] as Expression)
.identifier;
return '${' + expression + '}';
}
),
});
}
}
Expand Down
51 changes: 25 additions & 26 deletions packages/localize-tools/src/tests/transform.unit.test.ts
Expand Up @@ -274,32 +274,31 @@ test('multiple expression-placeholders and order switching', () => {
);
});

// TODO(aomarks) Uncomment with fix for #2426
// test('msg(html(html))', () => {
// checkTransform(
// 'msg(html`Hello <b>${html`<i>World</i>`}</b>!`, {id: "foo"});',
// 'html`Hello <b><i>World</i></b>!`;'
// );
// });

// test('msg(html(html)) translated', () => {
// checkTransform(
// 'msg(html`Hello <b>${html`<i>World</i>`}</b>!`, {id: "foo"});',
// 'html`Hola <b><i>World</i></b>!`;',
// {
// messages: [
// {
// name: 'foo',
// contents: [
// 'Hola ',
// {untranslatable: '<b>${html`<i>World</i>`}</b>', index: 0},
// '!',
// ],
// },
// ],
// }
// );
// });
test('msg(html(html))', () => {
checkTransform(
'msg(html`Hello <b>${html`<i>World</i>`}</b>!`, {id: "foo"});',
'html`Hello <b><i>World</i></b>!`;'
);
});

test('msg(html(html)) translated', () => {
checkTransform(
'msg(html`Hello <b>${html`<i>World</i>`}</b>!`, {id: "foo"});',
'html`Hola <b><i>World</i></b>!`;',
{
messages: [
{
name: 'foo',
contents: [
'Hola ',
{untranslatable: '<b>${html`<i>World</i>`}</b>', index: 0},
'!',
],
},
],
}
);
});

test('msg(string(msg(string)))', () => {
checkTransform(
Expand Down
Expand Up @@ -5,6 +5,9 @@
<trans-unit id="h02c268d9b1fcb031">
<source>&lt;Hello<x id="0" equiv-text="&lt;b&gt;"/>&lt;World &amp; Friends&gt;<x id="1" equiv-text="&lt;/b&gt;"/>!&gt;</source>
</trans-unit>
<trans-unit id="h82ccc38d4d46eaa9">
<source>Hello <x id="0" equiv-text="&lt;b&gt;${html `&lt;i&gt;World&lt;/i&gt;`}&lt;/b&gt;"/>!</source>
</trans-unit>
<trans-unit id="he7b474847636149b">
<source>Hello <x id="0" equiv-text="&lt;b&gt;${name}"/>!<x id="1" equiv-text="&lt;/b&gt;"/></source>
<note from="lit-localize">Description of Hello $(name)</note>
Expand Down
Expand Up @@ -5,6 +5,9 @@
<trans-unit id="h02c268d9b1fcb031">
<source>&lt;Hello<x id="0" equiv-text="&lt;b&gt;"/>&lt;World &amp; Friends&gt;<x id="1" equiv-text="&lt;/b&gt;"/>!&gt;</source>
</trans-unit>
<trans-unit id="h82ccc38d4d46eaa9">
<source>Hello <x id="0" equiv-text="&lt;b&gt;${html `&lt;i&gt;World&lt;/i&gt;`}&lt;/b&gt;"/>!</source>
</trans-unit>
<trans-unit id="he7b474847636149b">
<source>Hello <x id="0" equiv-text="&lt;b&gt;${name}"/>!<x id="1" equiv-text="&lt;/b&gt;"/></source>
<note from="lit-localize">Description of Hello $(name)</note>
Expand Down
Expand Up @@ -14,3 +14,6 @@ msg(html`Hello <b>${name}!</b>`, {desc: 'Description of Hello $(name)'});

// Escaped markup characters should remain escaped
msg(html`&lt;Hello<b>&lt;World &amp; Friends&gt;</b>!&gt;`);

// A localized Lit template that contains a nested Lit template.
msg(html`Hello <b>${html`<i>World</i>`}</b>!`);
Expand Up @@ -14,3 +14,6 @@ msg(html`Hello <b>${name}!</b>`, {desc: 'Description of Hello $(name)'});

// Escaped markup characters should remain escaped
msg(html`&lt;Hello<b>&lt;World &amp; Friends&gt;</b>!&gt;`);

// A localized Lit template that contains a nested Lit template.
msg(html`Hello <b>${html`<i>World</i>`}</b>!`);

0 comments on commit 52ab087

Please sign in to comment.