From fa93d5353acf785e450c8e7e664f7012e0d99bdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Velad=20Galv=C3=A1n?= Date: Mon, 6 Nov 2023 11:26:56 +0100 Subject: [PATCH] fix(TTML): Fix support of urls in smpte:backgroundImage (#5851) Fixes https://github.com/shaka-project/shaka-player/issues/5049 --- externs/shaka/text.js | 4 ++- lib/media/media_source_engine.js | 3 ++- lib/player.js | 4 +-- lib/text/mp4_ttml_parser.js | 4 +-- lib/text/srt_text_parser.js | 4 +-- lib/text/text_engine.js | 5 ++-- lib/text/ttml_text_parser.js | 33 ++++++++++++++++++++---- test/media/media_source_engine_unit.js | 2 +- test/text/mp4_ttml_parser_unit.js | 8 +++--- test/text/srt_text_parser_unit.js | 2 +- test/text/text_engine_unit.js | 6 +++++ test/text/ttml_text_parser_unit.js | 35 ++++++++++++++++++++++++-- 12 files changed, 87 insertions(+), 23 deletions(-) diff --git a/externs/shaka/text.js b/externs/shaka/text.js index 5579299aab..cf42d110e1 100644 --- a/externs/shaka/text.js +++ b/externs/shaka/text.js @@ -36,11 +36,13 @@ shaka.extern.TextParser = class { * @param {shaka.extern.TextParser.TimeContext} timeContext * The time information that should be used to adjust the times values * for each cue. + * @param {?(string|undefined)} uri + * The media uri. * @return {!Array.} * * @exportDoc */ - parseMedia(data, timeContext) {} + parseMedia(data, timeContext, uri) {} /** * Notifies the stream if the manifest is in sequence mode or not. diff --git a/lib/media/media_source_engine.js b/lib/media/media_source_engine.js index 0ec46a1711..835ef4456d 100644 --- a/lib/media/media_source_engine.js +++ b/lib/media/media_source_engine.js @@ -830,7 +830,8 @@ shaka.media.MediaSourceEngine = class { await this.textEngine_.appendBuffer( data, reference ? reference.startTime : null, - reference ? reference.endTime : null); + reference ? reference.endTime : null, + reference ? reference.getUris()[0] : null); return; } diff --git a/lib/player.js b/lib/player.js index 7f55309937..9d49d62c2c 100644 --- a/lib/player.js +++ b/lib/player.js @@ -4706,7 +4706,7 @@ shaka.Player = class extends shaka.util.FakeEventTarget { vttOffset: 0, }; const data = shaka.util.BufferUtils.toUint8(buffer); - const cues = TextParser.parseMedia(data, time); + const cues = TextParser.parseMedia(data, time, uri); const references = []; for (const cue of cues) { @@ -4970,7 +4970,7 @@ shaka.Player = class extends shaka.util.FakeEventTarget { vttOffset: 0, }; const data = shaka.util.BufferUtils.toUint8(buffer); - const cues = obj.parseMedia(data, time); + const cues = obj.parseMedia(data, time, /* uri= */ null); return shaka.text.WebVttGenerator.convert(cues, adCuePoints); } throw new shaka.util.Error( diff --git a/lib/text/mp4_ttml_parser.js b/lib/text/mp4_ttml_parser.js index c2c5a9ef28..689c7defb8 100644 --- a/lib/text/mp4_ttml_parser.js +++ b/lib/text/mp4_ttml_parser.js @@ -75,7 +75,7 @@ shaka.text.Mp4TtmlParser = class { * @override * @export */ - parseMedia(data, time) { + parseMedia(data, time, uri) { const Mp4Parser = shaka.util.Mp4Parser; let sawMDAT = false; @@ -86,7 +86,7 @@ shaka.text.Mp4TtmlParser = class { sawMDAT = true; // Join this to any previous payload, in case the mp4 has multiple // mdats. - payload = payload.concat(this.parser_.parseMedia(data, time)); + payload = payload.concat(this.parser_.parseMedia(data, time, uri)); })); parser.parse(data, /* partialOkay= */ false); diff --git a/lib/text/srt_text_parser.js b/lib/text/srt_text_parser.js index 6401a42721..4b41b5e490 100644 --- a/lib/text/srt_text_parser.js +++ b/lib/text/srt_text_parser.js @@ -55,7 +55,7 @@ shaka.text.SrtTextParser = class { * @override * @export */ - parseMedia(data, time) { + parseMedia(data, time, uri) { const SrtTextParser = shaka.text.SrtTextParser; const BufferUtils = shaka.util.BufferUtils; const StringUtils = shaka.util.StringUtils; @@ -67,7 +67,7 @@ shaka.text.SrtTextParser = class { const newData = BufferUtils.toUint8(StringUtils.toUTF8(vvtText)); - return this.parser_.parseMedia(newData, time); + return this.parser_.parseMedia(newData, time, uri); } /** diff --git a/lib/text/text_engine.js b/lib/text/text_engine.js index 245ccc6d56..a438e75d8d 100644 --- a/lib/text/text_engine.js +++ b/lib/text/text_engine.js @@ -166,9 +166,10 @@ shaka.text.TextEngine = class { * @param {BufferSource} buffer * @param {?number} startTime relative to the start of the presentation * @param {?number} endTime relative to the start of the presentation + * @param {?string=} uri * @return {!Promise} */ - async appendBuffer(buffer, startTime, endTime) { + async appendBuffer(buffer, startTime, endTime, uri) { goog.asserts.assert( this.parser_, 'The parser should already be initialized'); @@ -198,7 +199,7 @@ shaka.text.TextEngine = class { // Parse the buffer and add the new cues. const allCues = this.parser_.parseMedia( - shaka.util.BufferUtils.toUint8(buffer), time); + shaka.util.BufferUtils.toUint8(buffer), time, uri); const cuesToAppend = allCues.filter((cue) => { return cue.startTime >= this.appendWindowStart_ && cue.startTime < this.appendWindowEnd_; diff --git a/lib/text/ttml_text_parser.js b/lib/text/ttml_text_parser.js index b08809f753..10760fcb25 100644 --- a/lib/text/ttml_text_parser.js +++ b/lib/text/ttml_text_parser.js @@ -7,6 +7,7 @@ goog.provide('shaka.text.TtmlTextParser'); goog.require('goog.asserts'); +goog.require('goog.Uri'); goog.require('shaka.log'); goog.require('shaka.text.Cue'); goog.require('shaka.text.CueRegion'); @@ -50,7 +51,7 @@ shaka.text.TtmlTextParser = class { * @override * @export */ - parseMedia(data, time) { + parseMedia(data, time, uri) { const TtmlTextParser = shaka.text.TtmlTextParser; const XmlUtils = shaka.util.XmlUtils; const ttpNs = TtmlTextParser.parameterNs_; @@ -147,7 +148,7 @@ shaka.text.TtmlTextParser = class { body, time, rateInfo, metadataElements, styles, regionElements, cueRegions, whitespaceTrim, cellResolutionInfo, /* parentCueElement= */ null, - /* isContent= */ false); + /* isContent= */ false, uri); if (cue) { // According to the TTML spec, backgrounds default to transparent. // So default the background of the top-level element to transparent. @@ -175,12 +176,14 @@ shaka.text.TtmlTextParser = class { * @param {?{columns: number, rows: number}} cellResolution * @param {?Element} parentCueElement * @param {boolean} isContent + * @param {?(string|undefined)} uri * @return {shaka.text.Cue} * @private */ static parseCue_( cueNode, timeContext, rateInfo, metadataElements, styles, regionElements, - cueRegions, whitespaceTrim, cellResolution, parentCueElement, isContent) { + cueRegions, whitespaceTrim, cellResolution, parentCueElement, isContent, + uri) { /** @type {Element} */ let cueElement; /** @type {Element} */ @@ -222,7 +225,21 @@ shaka.text.TtmlTextParser = class { } } - if (cueNode.nodeName == 'p' || imageElement) { + let imageUri = null; + const backgroundImage = shaka.util.XmlUtils.getAttributeNSList( + cueElement, + shaka.text.TtmlTextParser.smpteNsList_, + 'backgroundImage'); + if (uri && backgroundImage && !backgroundImage.startsWith('#')) { + const baseUri = new goog.Uri(uri); + const relativeUri = new goog.Uri(backgroundImage); + const newUri = baseUri.resolve(relativeUri).toString(); + if (newUri) { + imageUri = newUri; + } + } + + if (cueNode.nodeName == 'p' || imageElement || imageUri) { isContent = true; } @@ -255,6 +272,7 @@ shaka.text.TtmlTextParser = class { cellResolution, cueElement, isContent, + uri, ); // This node may or may not generate a nested cue. @@ -388,6 +406,7 @@ shaka.text.TtmlTextParser = class { cueElement, regionElementForStyle, imageElement, + imageUri, styles, /** isNested= */ parentIsContent, // "nested in a
" doesn't count. /** isLeaf= */ (nestedCues.length == 0)); @@ -490,13 +509,15 @@ shaka.text.TtmlTextParser = class { * @param {!Element} cueElement * @param {Element} region * @param {Element} imageElement + * @param {?string} imageUri * @param {!Array.} styles * @param {boolean} isNested * @param {boolean} isLeaf * @private */ static addStyle_( - cue, cueElement, region, imageElement, styles, isNested, isLeaf) { + cue, cueElement, region, imageElement, imageUri, styles, + isNested, isLeaf) { const TtmlTextParser = shaka.text.TtmlTextParser; const Cue = shaka.text.Cue; @@ -664,6 +685,8 @@ shaka.text.TtmlTextParser = class { backgroundImageData) { cue.backgroundImage = 'data:image/png;base64,' + backgroundImageData; } + } else if (imageUri) { + cue.backgroundImage = imageUri; } const textOutline = TtmlTextParser.getStyleAttribute_( diff --git a/test/media/media_source_engine_unit.js b/test/media/media_source_engine_unit.js index 4a3bb70c8f..9326df1e33 100644 --- a/test/media/media_source_engine_unit.js +++ b/test/media/media_source_engine_unit.js @@ -631,7 +631,7 @@ describe('MediaSourceEngine', () => { ContentType.TEXT, data, reference, fakeStream, /* hasClosedCaptions= */ false); expect(mockTextEngine.appendBuffer).toHaveBeenCalledWith( - data, 0, 10); + data, 0, 10, 'foo://bar'); }); it('appends transmuxed data', async () => { diff --git a/test/text/mp4_ttml_parser_unit.js b/test/text/mp4_ttml_parser_unit.js index a6160617b4..386967b16c 100644 --- a/test/text/mp4_ttml_parser_unit.js +++ b/test/text/mp4_ttml_parser_unit.js @@ -43,7 +43,7 @@ describe('Mp4TtmlParser', () => { parser.parseInit(ttmlInitSegment); const time = {periodStart: 0, segmentStart: 0, segmentEnd: 60, vttOffset: 0}; - const ret = parser.parseMedia(ttmlSegmentMultipleMDAT, time); + const ret = parser.parseMedia(ttmlSegmentMultipleMDAT, time, null); // Bodies. expect(ret.length).toBe(2); // Divs. @@ -63,10 +63,10 @@ describe('Mp4TtmlParser', () => { const parser = new shaka.text.Mp4TtmlParser(); parser.parseInit(ttmlInitSegment); - const ret1 = parser.parseMedia(ttmlSegment, time1); + const ret1 = parser.parseMedia(ttmlSegment, time1, null); expect(ret1.length).toBeGreaterThan(0); - const ret2 = parser.parseMedia(ttmlSegment, time2); + const ret2 = parser.parseMedia(ttmlSegment, time2, null); expect(ret2.length).toBeGreaterThan(0); expect(ret2[0].startTime).toBe(ret1[0].startTime + 7); @@ -164,7 +164,7 @@ describe('Mp4TtmlParser', () => { parser.parseInit(ttmlInitSegment); const time = {periodStart: 0, segmentStart: 0, segmentEnd: 60, vttOffset: 0}; - const result = parser.parseMedia(ttmlSegment, time); + const result = parser.parseMedia(ttmlSegment, time, null); shaka.test.TtmlUtils.verifyHelper( cues, result, {startTime: 23, endTime: 53.5}); }); diff --git a/test/text/srt_text_parser_unit.js b/test/text/srt_text_parser_unit.js index beee706cef..ea905dddd2 100644 --- a/test/text/srt_text_parser_unit.js +++ b/test/text/srt_text_parser_unit.js @@ -149,7 +149,7 @@ describe('SrtTextParser', () => { const data = BufferUtils.toUint8(StringUtils.toUTF8(text)); const parser = new shaka.text.SrtTextParser(); - const result = parser.parseMedia(data, time); + const result = parser.parseMedia(data, time, null); const expected = cues.map((cue) => { if (cue.nestedCues) { diff --git a/test/text/text_engine_unit.js b/test/text/text_engine_unit.js index a2219440f8..59f9f06103 100644 --- a/test/text/text_engine_unit.js +++ b/test/text/text_engine_unit.js @@ -105,6 +105,7 @@ describe('TextEngine', () => { expect(mockParseMedia).toHaveBeenCalledOnceMoreWith([ dummyData, {periodStart: 0, segmentStart: 0, segmentEnd: 3, vttOffset: 0}, + undefined, ]); expect(mockDisplayer.appendSpy).toHaveBeenCalledOnceMoreWith([ @@ -120,6 +121,7 @@ describe('TextEngine', () => { expect(mockParseMedia).toHaveBeenCalledOnceMoreWith([ dummyData, {periodStart: 0, segmentStart: 3, segmentEnd: 5, vttOffset: 0}, + undefined, ]); expect(mockDisplayer.appendSpy).toHaveBeenCalledOnceMoreWith([ @@ -272,6 +274,7 @@ describe('TextEngine', () => { expect(mockParseMedia).toHaveBeenCalledOnceMoreWith([ dummyData, {periodStart: 0, segmentStart: 0, segmentEnd: 3, vttOffset: 0}, + undefined, ]); expect(mockDisplayer.appendSpy).toHaveBeenCalledOnceMoreWith([ [ @@ -286,6 +289,7 @@ describe('TextEngine', () => { expect(mockParseMedia).toHaveBeenCalledOnceMoreWith([ dummyData, {periodStart: 4, segmentStart: 4, segmentEnd: 7, vttOffset: 4}, + undefined, ]); expect(mockDisplayer.appendSpy).toHaveBeenCalledOnceMoreWith([ [ @@ -316,6 +320,7 @@ describe('TextEngine', () => { expect(mockParseMedia).toHaveBeenCalledOnceMoreWith([ dummyData, {periodStart: 0, segmentStart: 0, segmentEnd: 3, vttOffset: 0}, + undefined, ]); textEngine.setTimestampOffset(8); @@ -325,6 +330,7 @@ describe('TextEngine', () => { expect(mockParseMedia).toHaveBeenCalledOnceMoreWith([ dummyData, {periodStart: 8, segmentStart: 4, segmentEnd: 7, vttOffset: 4}, + undefined, ]); }); }); diff --git a/test/text/ttml_text_parser_unit.js b/test/text/ttml_text_parser_unit.js index d5a5b3114e..686ead9051 100644 --- a/test/text/ttml_text_parser_unit.js +++ b/test/text/ttml_text_parser_unit.js @@ -1263,6 +1263,35 @@ describe('TtmlTextParser', () => { }); }); + it('supports smpte:backgroundImage attribute with url', () => { + verifyHelper( + [ + { + startTime: 62.05, + endTime: 3723.2, + payload: '', + }, + ], + '' + + '' + + '' + + 'base64EncodedImage' + + '' + + '
' + + '

' + + '
', + {periodStart: 0, segmentStart: 60, segmentEnd: 3730, vttOffset: 0}, + {startTime: 62.05, endTime: 3723.2}, + { + startTime: 62.05, + endTime: 3723.2, + backgroundImage: 'foo://bar/img_0.png', + isContainer: false, + }); + }); + it('supports smpte:backgroundImage attribute in div element', () => { verifyHelper( [], @@ -2112,7 +2141,8 @@ describe('TtmlTextParser', () => { function verifyHelper(cues, text, time, bodyProperties, divProperties) { const data = shaka.util.BufferUtils.toUint8(shaka.util.StringUtils.toUTF8(text)); - const result = new shaka.text.TtmlTextParser().parseMedia(data, time); + const result = new shaka.text.TtmlTextParser() + .parseMedia(data, time, 'foo://bar'); shaka.test.TtmlUtils.verifyHelper( cues, result, bodyProperties, divProperties); } @@ -2139,7 +2169,8 @@ describe('TtmlTextParser', () => { expect(() => { new shaka.text.TtmlTextParser().parseMedia( shaka.util.BufferUtils.toUint8(data), - {periodStart: 0, segmentStart: 0, segmentEnd: 10, vttOffset: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 10, vttOffset: 0}, + 'foo://bar'); }).toThrow(error); } });