From 7e8306fd085017b42f4d2f99d22e23b60d0a9ddb Mon Sep 17 00:00:00 2001 From: Adam Sampson Date: Thu, 26 Jan 2023 17:49:15 +0000 Subject: [PATCH 1/2] Check for buffered_caption being None in SCC reader This fixes a problem found when parsing CC data from a noisy LaserDisc: if a signal dropout corrupts the preamble for a caption, the SCC reader shouldn't crash when text is seen for that caption. --- src/main/python/ttconv/scc/reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/python/ttconv/scc/reader.py b/src/main/python/ttconv/scc/reader.py index 9b441536..a13ae1bd 100644 --- a/src/main/python/ttconv/scc/reader.py +++ b/src/main/python/ttconv/scc/reader.py @@ -455,7 +455,7 @@ def process_text(self, word: str, time_code: SmpteTimeCode): self.active_caption.get_current_text().add_style_property(StyleProperties.FontStyle, self.current_font_style) self.active_caption.get_current_text().add_style_property(StyleProperties.TextDecoration, self.current_text_decoration) - else: + elif self.buffered_caption is not None: self.buffered_caption.append_text(word) self.buffered_caption.get_current_text().add_style_property(StyleProperties.Color, self.current_color) @@ -541,7 +541,7 @@ def process_line(self, line: SccLine) -> SmpteTimeCode: elif extended_char is not None: if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): self.active_caption.get_current_text().backspace() - else: + elif self.buffered_caption is not None: self.buffered_caption.get_current_text().backspace() word = extended_char.get_unicode_value() From 6465e4c236d9b5fbc6671352a224eb1cb8fb0054 Mon Sep 17 00:00:00 2001 From: Valentin NOEL Date: Fri, 17 Feb 2023 16:22:53 +0100 Subject: [PATCH 2/2] Fix: SCC reader skips captions without PAC --- src/main/python/ttconv/scc/reader.py | 4 +++ src/test/python/test_scc_reader.py | 53 ++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/src/main/python/ttconv/scc/reader.py b/src/main/python/ttconv/scc/reader.py index a13ae1bd..a7429862 100644 --- a/src/main/python/ttconv/scc/reader.py +++ b/src/main/python/ttconv/scc/reader.py @@ -504,6 +504,10 @@ def process_line(self, line: SccLine) -> SmpteTimeCode: spec_char = SccSpecialCharacter.find(scc_word.value) extended_char = SccExtendedCharacter.find(scc_word.value) + if pac is None and self.buffered_caption is None and self.active_caption is None: + LOGGER.warning(f"Uninitialized caption: skip {hex(scc_word.value)} code.") + continue + if pac is not None: debug += "[PAC|" + str(pac.get_row()) + "|" + str(pac.get_indent()) if pac.get_color() is not None: diff --git a/src/test/python/test_scc_reader.py b/src/test/python/test_scc_reader.py index ebf386c4..2fa4c7c9 100644 --- a/src/test/python/test_scc_reader.py +++ b/src/test/python/test_scc_reader.py @@ -250,6 +250,59 @@ def test_scc_pop_on_content_unexpectedly_ended(self): "consectetur adipiscing elit.") self.assertEqual(region_1, p_list[0].get_region()) + def test_scc_pop_on_content_without_preamble_address_code(self): + import logging + logging.basicConfig(level=logging.DEBUG) + + scc_content = """\ +Scenarist_SCC V1.0 + +00:00:02:16 942c + +00:00:03:01 9420 91ae 9421 4c6f 7265 6d20 6970 7375 6d20 9220 942c 942f + +00:00:07:29 9420 94D0 646f 6c6f 7220 7369 7420 616d 6574 2c80 9470 636f 6e73 6563 7465 7475 7220 6164 6970 6973 6369 6e67 2065 6c69 742e 942c 942f + +00:00:09:07 9420 656e 7465 7371 7565 2069 6e74 6572 6475 6d20 6c61 6369 6e69 6120 736f 6c6c 6963 6974 7564 696e 2e80 942c 942f + +00:00:11:27 9420 +""" + + scc_disassembly = """\ +00:00:02:16 {EDM} +00:00:03:01 {RCL}{I}{BS}Lorem ipsum Á{EDM}{EOC} +00:00:07:29 {RCL}{1400}dolor sit amet,{1500}consectetur adipiscing elit.{EDM}{EOC} +00:00:09:07 {RCL}entesque interdum lacinia sollicitudin.{EDM}{EOC} +00:00:11:27 {RCL} +""" + + self.assertEqual(scc_disassembly, to_disassembly(scc_content)) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 15, doc.get_cell_resolution()) + self.check_region_extent(region_1, 28, 2, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(1, len(p_list)) + + self.check_caption(p_list[0], "caption1", "00:00:08:26", None, "dolor sit amet,", Br, + "consectetur adipiscing elit.") + self.assertEqual(region_1, p_list[0].get_region()) + def test_2_rows_roll_up_content(self): scc_content = """\ Scenarist_SCC V1.0