Skip to content

Commit

Permalink
feat(WebVTT): Handle badly formed VTT (#6147)
Browse files Browse the repository at this point in the history
Handle remove chevrons that appear as part of the inner text of the
element to avoid parse failure.
  • Loading branch information
dave-nicholas committed Jan 26, 2024
1 parent bd944d1 commit 335eab0
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 3 deletions.
27 changes: 27 additions & 0 deletions lib/text/vtt_text_parser.js
Expand Up @@ -464,6 +464,7 @@ shaka.text.VttTextParser = class {

payload = VttTextParser.replaceKaraokeStylePayload_(payload);
payload = VttTextParser.replaceVoiceStylePayload_(payload);
payload = VttTextParser.escapeInvalidChevrons_(payload);
const xmlPayload = '<span>' + payload + '</span>';
let element;
try {
Expand All @@ -490,6 +491,32 @@ shaka.text.VttTextParser = class {
}
}

/**
* This method converts invalid > chevrons to HTML entities.
* It also removes < chevrons as per spec.
*
* @param {!string} input
* @return {string}
* @private
*/
static escapeInvalidChevrons_(input) {
// Used to map HTML entities to characters.
const htmlEscapes = {
'< ': '',
' >': ' &gt;',
};

const reEscapedHtml = /(< +>|<\s|\s>)/g;
const reHasEscapedHtml = RegExp(reEscapedHtml.source);
// This check is an optimization, since replace always makes a copy
if (input && reHasEscapedHtml.test(input)) {
return input.replace(reEscapedHtml, (entity) => {
return htmlEscapes[entity] || '';
});
}
return input || '';
}

/**
* Converts voice style tag to be valid for xml parsing
* For example,
Expand Down
28 changes: 25 additions & 3 deletions test/text/vtt_text_parser_unit.js
Expand Up @@ -1100,8 +1100,28 @@ describe('VttTextParser', () => {
{
startTime: 110,
endTime: 120,
payload: '<c.lime>less or more < > in text</c>',
nestedCues: [],
payload: '',
nestedCues: [
{
startTime: 110,
endTime: 120,
payload: 'less or more > >in text >',
color: 'lime',
},
],
},
{
startTime: 120,
endTime: 130,
payload: '',
nestedCues: [
{
startTime: 120,
endTime: 130,
payload: 'arrow in --> text',
color: 'lime',
},
],
},
],
'WEBVTT\n\n' +
Expand All @@ -1122,7 +1142,9 @@ describe('VttTextParser', () => {
'00:01:40.000 --> 00:01:50.000\n' +
'<c.lime>forward slash 1/2 in text</c>\n\n' +
'00:01:50.000 --> 00:02:00.000\n' +
'<c.lime>less or more < > in text</c>',
'<c.lime>less or more < > > < > >in text ></c>\n\n' +
'00:02:00.000 --> 00:02:10.000\n' +
'<c.lime>arrow in --> text</c>',
{periodStart: 0, segmentStart: 0, segmentEnd: 0, vttOffset: 0});
});

Expand Down

0 comments on commit 335eab0

Please sign in to comment.