Skip to content

Commit 37e36db

Browse files
committed
Fix handling of implicitly closed char styles when updating USFM
- only skip/collect tokens if the marker is explicitly closed - fm markers are not footnote elements - only end note text when a ft marker is closed
1 parent 7e7e033 commit 37e36db

File tree

3 files changed

+46
-13
lines changed

3 files changed

+46
-13
lines changed

machine/corpora/scripture_ref_usfm_parser_handler.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def end_char(
170170
if self._is_embed_part_style(marker):
171171
if self._in_nested_embed:
172172
self._in_nested_embed = False
173-
else:
173+
elif self._is_note_text(marker):
174174
self._end_note_text_wrapper(state)
175175
if self._is_embed_style(marker):
176176
self._end_embed(state, marker, attributes, closed)
@@ -272,14 +272,17 @@ def _is_in_embed(self, marker: Optional[str]) -> bool:
272272

273273
def _is_in_nested_embed(self, marker: Optional[str]) -> bool:
274274
return self._in_nested_embed or (
275-
marker is not None and marker.startswith("+") and marker[1] in EMBED_PART_START_CHAR_STYLES
275+
marker is not None
276+
and marker.startswith("+")
277+
and marker[1] in EMBED_PART_START_CHAR_STYLES
278+
and marker != "fm"
276279
)
277280

278281
def _is_note_text(self, marker: Optional[str]) -> bool:
279282
return marker == "ft"
280283

281284
def _is_embed_part_style(self, marker: Optional[str]) -> bool:
282-
return marker is not None and marker.startswith(EMBED_PART_START_CHAR_STYLES)
285+
return marker is not None and marker.startswith(EMBED_PART_START_CHAR_STYLES) and marker != "fm"
283286

284287
def _is_embed_style(self, marker: Optional[str]) -> bool:
285288
return marker is not None and marker.strip("*") in EMBED_STYLES

machine/corpora/update_usfm_parser_handler.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -184,10 +184,13 @@ def end_char(
184184
attributes: Sequence[UsfmAttribute],
185185
closed: bool,
186186
) -> None:
187-
if self._replace_with_new_tokens(state, closed):
188-
self._skip_tokens(state)
189-
else:
190-
self._collect_tokens(state)
187+
188+
skip_tokens = self._replace_with_new_tokens(state, closed)
189+
if closed:
190+
if skip_tokens:
191+
self._skip_tokens(state)
192+
else:
193+
self._collect_tokens(state)
191194

192195
super().end_char(state, marker, attributes, closed)
193196

@@ -207,10 +210,12 @@ def _start_embed(
207210
def _end_embed(
208211
self, state: UsfmParserState, marker: str, attributes: Sequence[UsfmAttribute], closed: bool
209212
) -> None:
210-
if self._replace_with_new_tokens(state, closed):
211-
self._skip_tokens(state)
212-
else:
213-
self._collect_tokens(state)
213+
skip_tokens = self._replace_with_new_tokens(state, closed)
214+
if closed:
215+
if skip_tokens:
216+
self._skip_tokens(state)
217+
else:
218+
self._collect_tokens(state)
214219

215220
self._embed_row_texts.clear()
216221
self._embed_updated = False

tests/corpora/test_update_usfm_parser_handler.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,12 +305,13 @@ def test_get_usfm_verse_replace_note() -> None:
305305
]
306306
usfm = r"""\id MAT - Test
307307
\c 1
308-
\v 1 Chapter \add one\add*, verse \f + \fr 2:1: \ft This is a footnote.\f*one.
308+
\v 1 Chapter \add one\add*, verse \f + \fr 2:1: \ft This is a \fq quotation \ft and an \fqa alternative quotation\f*one.
309309
"""
310310
target = update_usfm(rows, usfm)
311+
# Only the first \ft marker is updated
311312
result = r"""\id MAT - Test
312313
\c 1
313-
\v 1 updated text \f + \fr 2:1: \ft This is a new footnote. \f*
314+
\v 1 updated text \f + \fr 2:1: \ft This is a new footnote. \fq quotation \ft and an \fqa alternative quotation\f*
314315
"""
315316
assess(target, result)
316317

@@ -979,6 +980,30 @@ def test_multiple_ft_only_update_first() -> None:
979980
assess(target, result)
980981

981982

983+
def test_implicitly_closed_char_style() -> None:
984+
rows = [
985+
(
986+
scr_ref("MAT 1:1"),
987+
str("Update text"),
988+
)
989+
]
990+
usfm = r"""\id MAT - Test
991+
\c 1
992+
\v 1 Verse \bd one.
993+
\c 2
994+
\v 1 Verse one.
995+
"""
996+
997+
target = update_usfm(rows, usfm)
998+
result = r"""\id MAT - Test
999+
\c 1
1000+
\v 1 Update text
1001+
\c 2
1002+
\v 1 Verse one.
1003+
"""
1004+
assess(target, result)
1005+
1006+
9821007
def scr_ref(*refs: str) -> List[ScriptureRef]:
9831008
return [ScriptureRef.parse(ref) for ref in refs]
9841009

0 commit comments

Comments
 (0)