From 726ef425b095543a216ba8fed0dfe6d1657e2e95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Velad=20Galv=C3=A1n?= Date: Fri, 20 Jan 2023 23:41:51 +0100 Subject: [PATCH] fix(WebVTT): Add support to  , ‎ and ‏ (#4920) --- lib/text/vtt_text_parser.js | 9 ++++++--- test/text/vtt_text_parser_unit.js | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/text/vtt_text_parser.js b/lib/text/vtt_text_parser.js index 349e12c6f5..6d8e2cbf25 100644 --- a/lib/text/vtt_text_parser.js +++ b/lib/text/vtt_text_parser.js @@ -940,8 +940,8 @@ shaka.text.VttTextParser = class { } /** - * This method converts the HTML entities &, <, >, ", and - * ' in string to their corresponding characters. + * This method converts the HTML entities &, <, >, ", ', + *  , ‎ and ‏ in string to their corresponding characters. * * @param {!string} input * @return {string} @@ -955,10 +955,13 @@ shaka.text.VttTextParser = class { '>': '>', '"': '"', ''': '\'', + ' ': '\u{a0}', + '‎': '\u{200e}', + '‏': '\u{200f}', }; // Used to match HTML entities and HTML characters. - const reEscapedHtml = /&(?:amp|lt|gt|quot|#(0+)?39);/g; + const reEscapedHtml = /&(?:amp|lt|gt|quot|#(0+)?39|nbsp|lrm|rlm);/g; const reHasEscapedHtml = RegExp(reEscapedHtml.source); // This check is an optimization, since replace always makes a copy if (input && reHasEscapedHtml.test(input)) { diff --git a/test/text/vtt_text_parser_unit.js b/test/text/vtt_text_parser_unit.js index d4c4d4907e..6c02656ab5 100644 --- a/test/text/vtt_text_parser_unit.js +++ b/test/text/vtt_text_parser_unit.js @@ -852,11 +852,11 @@ describe('VttTextParser', () => { it('support escaped html payload', () => { verifyHelper( [ - {startTime: 20.1, endTime: 40.505, payload: '"Test & 1"'}, + {startTime: 20.1, endTime: 40.505, payload: '"Test & 1"\u{a0}'}, ], 'WEBVTT\n\n' + '00:00:20.100 --> 00:00:40.505\n' + - '"Test & 1"', + '"Test & 1" ', {periodStart: 0, segmentStart: 0, segmentEnd: 0, vttOffset: 0}); });