From f3e5d79b053ac20a1e17856269d603f35203e48f Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 19 May 2026 17:34:00 -0400 Subject: [PATCH 1/6] Port changes from Machine --- machine/scripture/verse_ref.py | 47 +++++++- tests/corpora/test_parallel_text_corpus.py | 127 +++++++++++++++++++++ tests/scripture/test_verse_ref.py | 51 +++++++++ 3 files changed, 222 insertions(+), 3 deletions(-) diff --git a/machine/scripture/verse_ref.py b/machine/scripture/verse_ref.py index c5feba16..5732fcfc 100644 --- a/machine/scripture/verse_ref.py +++ b/machine/scripture/verse_ref.py @@ -95,9 +95,9 @@ def from_string(cls, verse_str: str, versification: Optional[Versification] = No return VerseRef(b_cv[0], c_v[0], c_v[1], versification) @classmethod - def try_from_string(cls, verse_str: str) -> Optional[VerseRef]: + def try_from_string(cls, verse_str: str, versification: Optional[Versification] = None) -> Optional[VerseRef]: try: - return cls.from_string(verse_str) + return cls.from_string(verse_str, versification) except ValueError: return None @@ -402,6 +402,18 @@ def exact_equals(self, other: object) -> bool: and self.versification == other.versification ) + def remove_segments(self) -> VerseRef: + if not self.segment(): + return self.copy() + vr = VerseRef.try_from_string( + f"{self.book} {self.chapter_num}:{','.join([str(v.verse_num) for v in self.all_verses()])}", + self.versification, + ) + if vr is None: + vr = self.copy() + vr.simplify() + return vr + def __eq__(self, other): if not isinstance(other, VerseRef): return NotImplemented @@ -816,9 +828,11 @@ def first_included_verse(self, book_num: int, chapter_num: int) -> Optional[Vers def is_excluded(self, bbbcccvvv: int) -> bool: return bbbcccvvv in self.excluded_verses - def change_versification(self, vref: VerseRef) -> bool: + def change_versification(self, vref: VerseRef, ignore_segments: bool = False) -> bool: if vref.has_multiple: return self._change_versification_with_ranges(vref) + if vref.segment() and not ignore_segments: + return self._change_versification_with_segments(vref) if vref.versification == NULL_VERSIFICATION: vref.versification = self @@ -904,6 +918,33 @@ def _change_versification_with_ranges(self, vref: VerseRef) -> bool: return all_same_chapter + def _change_versification_with_segments(self, orig_vref: VerseRef) -> bool: + vref = orig_vref.copy() + all_in_one_chapter = self.change_versification(vref, ignore_segments=True) + if not vref.segment(): + orig_vref.copy_from(vref) + return all_in_one_chapter + + vref_without_segments = orig_vref.remove_segments() + all_in_one_chapter = self.change_versification(vref_without_segments, ignore_segments=True) + if vref_without_segments != vref.remove_segments(): + verses = [ + v_with_correct_number.verse + v_with_segments.segment() + for (v_with_segments, v_with_correct_number) in zip( + orig_vref.all_verses(), vref_without_segments.all_verses() + ) + ] + + combined_vr = VerseRef.try_from_string( + f"{vref_without_segments.book} {vref_without_segments.chapter_num}:{','.join(verses)}", self + ) + if combined_vr is not None: + orig_vref.copy_from(combined_vr) + return all_in_one_chapter + + orig_vref.copy_from(vref) + return all_in_one_chapter + class VerseMappings: def __init__(self) -> None: diff --git a/tests/corpora/test_parallel_text_corpus.py b/tests/corpora/test_parallel_text_corpus.py index b5558e94..5218cf6e 100644 --- a/tests/corpora/test_parallel_text_corpus.py +++ b/tests/corpora/test_parallel_text_corpus.py @@ -988,6 +988,133 @@ def test_get_rows_verse_ref_out_of_order() -> None: assert rows[3].source_segment == "source chapter one, verse four .".split() assert rows[3].target_segment == "target chapter one, verse four . target chapter one, verse five .".split() + def test_get_rows_different_versifications_with_verse_segments(): + sourceCorpus = DictionaryTextCorpus( + MemoryText( + "NUM", + [ + TextRow( + "NUM", + ScriptureRef.parse("NUM 17:1a", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse one a .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 17:1b", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse one b .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 17:2", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse two .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 17:3", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse three .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse four .", + ), + ], + ) + ) + targetCorpus = DictionaryTextCorpus( + MemoryText( + "NUM", + [ + TextRow( + "NUM", + ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty six .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 16:37", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty seven .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 16:38", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty eight .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 16:39a", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty nine a .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 16:39b", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty nine b .", + ), + ], + ) + ) + + # English vs. Original + # NUM 16:36-50 = NUM 17:1-15 + # NUM 17:1-13 = NUM 17:16-28 + parallelCorpus = StandardParallelTextCorpus(sourceCorpus, targetCorpus) + rows = list(parallelCorpus.get_rows()) + + # port below code from c# to python + assert len(rows) == 0 + + assert rows[0].source_refs == [ScriptureRef.parse("NUM 17:1a", ORIGINAL_VERSIFICATION)] + assert rows[0].target_refs == [ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION)] + # Assert.That(rows[0].SourceSegment, Is.EqualTo("source chapter seventeen, verse one a .".Split())); + assert rows[0].source_segment == "source chapter seventeen, verse one a .".split() + # Assert.That(rows[0].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty six .".Split())); + assert rows[0].target_segment == "target chapter sixteen, verse thirty six .".split() + + # Assert.That(rows[1].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:1b", ScrVers.Original)])); + assert rows[1].source_refs == [ScriptureRef.parse("NUM 17:1b", ORIGINAL_VERSIFICATION)] + # Assert.That(rows[1].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:36", ScrVers.English)])); + assert rows[1].target_refs == [ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION)] + # Assert.That(rows[1].SourceSegment, Is.EqualTo("source chapter seventeen, verse one b .".Split())); + assert rows[1].source_segment == "source chapter seventeen, verse one b .".split() + # Assert.That(rows[1].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty six .".Split())); + assert rows[1].target_segment == "target chapter sixteen, verse thirty six .".split() + + # Assert.That(rows[2].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:2", ScrVers.Original)])); + assert rows[2].source_refs == [ScriptureRef.parse("NUM 17:2", ORIGINAL_VERSIFICATION)] + # Assert.That(rows[2].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:37", ScrVers.English)])); + assert rows[2].target_refs == [ScriptureRef.parse("NUM 16:37", ENGLISH_VERSIFICATION)] + # Assert.That(rows[2].SourceSegment, Is.EqualTo("source chapter seventeen, verse two .".Split())); + assert rows[2].source_segment == "source chapter seventeen, verse two .".split() + # Assert.That(rows[2].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty seven .".Split())); + assert rows[2].target_segment == "target chapter sixteen, verse thirty seven .".split() + + # Assert.That(rows[3].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:3", ScrVers.Original)])); + assert rows[3].source_refs == [ScriptureRef.parse("NUM 17:3", ORIGINAL_VERSIFICATION)] + # Assert.That(rows[3].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:38", ScrVers.English)])); + assert rows[3].target_refs == [ScriptureRef.parse("NUM 16:38", ENGLISH_VERSIFICATION)] + # Assert.That(rows[3].SourceSegment, Is.EqualTo("source chapter seventeen, verse three .".Split())); + assert rows[3].source_segment == "source chapter seventeen, verse three .".split() + # Assert.That(rows[3].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty eight .".Split())); + assert rows[3].target_segment == "target chapter sixteen, verse thirty eight .".split() + + # Assert.That(rows[4].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:4", ScrVers.Original)])); + assert rows[4].source_refs == [ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION)] + # Assert.That(rows[4].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:39a", ScrVers.English)])); + assert rows[4].target_refs == [ScriptureRef.parse("NUM 16:39a", ENGLISH_VERSIFICATION)] + # Assert.That(rows[4].SourceSegment, Is.EqualTo("source chapter seventeen, verse four .".Split())); + assert rows[4].source_segment == "source chapter seventeen, verse four .".split() + # Assert.That(rows[4].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty nine a .".Split())); + assert rows[4].target_segment == "target chapter sixteen, verse thirty nine a .".split() + + # Assert.That(rows[5].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:4", ScrVers.Original)])); + assert rows[5].source_refs == [ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION)] + # Assert.That(rows[5].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:39b", ScrVers.English)])); + assert rows[5].target_refs == [ScriptureRef.parse("NUM 16:39b", ENGLISH_VERSIFICATION)] + # Assert.That(rows[5].SourceSegment, Is.EqualTo("source chapter seventeen, verse four .".Split())); + assert rows[5].source_segment == "source chapter seventeen, verse four .".split() + # Assert.That(rows[5].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty nine b .".Split())); + assert rows[5].target_segment == "target chapter sixteen, verse thirty nine b .".split() + def test_to_pandas() -> None: source_corpus = DictionaryTextCorpus( diff --git a/tests/scripture/test_verse_ref.py b/tests/scripture/test_verse_ref.py index 911225ad..e72f5346 100644 --- a/tests/scripture/test_verse_ref.py +++ b/tests/scripture/test_verse_ref.py @@ -757,3 +757,54 @@ def test_unbridge() -> None: assert VerseRef.from_string("EXO 6:9a,9b").unbridge() == VerseRef.from_string("EXO 6:9a") assert VerseRef.from_string("EXO 6:4-10").unbridge() == VerseRef.from_string("EXO 6:4") assert VerseRef.from_string("EXO 6:150monkeys").unbridge() == VerseRef.from_string("EXO 6:150monkeys") + + +def test_remove_segments() -> None: + assert VerseRef.from_string("MAT 1:1").remove_segments() == VerseRef.from_string("MAT 1:1") + assert VerseRef.from_string("MAT 1:1a").remove_segments() == VerseRef.from_string("MAT 1:1") + assert VerseRef.from_string("MAT 1:1a-2b,5a").remove_segments() == VerseRef.from_string("MAT 1:1,2,5") + assert VerseRef.from_string("MAT 1:1a-3b").remove_segments() == VerseRef.from_string("MAT 1:1,2,3") + + +def test_change_versification_with_segments() -> None: + + # English vs. Original + # NUM 16:36-50 = NUM 17:1-15 + # NUM 17:1-13 = NUM 17:16-28 + # ESG 1:1 = ESG 1:1a + # ESG 1:2 = ESG 1:1b + + verse_ref = VerseRef.from_string("NUM 17:1", ENGLISH_VERSIFICATION) + verse_ref.change_versification(ORIGINAL_VERSIFICATION) + assert verse_ref.versification == ORIGINAL_VERSIFICATION + assert str(verse_ref) == "NUM 17:16" + + verse_ref = VerseRef.from_string("NUM 17:1a", ENGLISH_VERSIFICATION) + verse_ref.change_versification(ORIGINAL_VERSIFICATION) + assert verse_ref.versification == ORIGINAL_VERSIFICATION + assert str(verse_ref) == "NUM 17:16a" + + verse_ref = VerseRef.from_string("NUM 17:1a-2b,5a", ENGLISH_VERSIFICATION) + verse_ref.change_versification(ORIGINAL_VERSIFICATION) + assert verse_ref.versification == ORIGINAL_VERSIFICATION + assert str(verse_ref) == "NUM 17:16a-17b,20a" + + verse_ref = VerseRef.from_string("NUM 17:13a-15a", ORIGINAL_VERSIFICATION) + verse_ref.change_versification(ENGLISH_VERSIFICATION) + assert verse_ref.versification == ENGLISH_VERSIFICATION + assert str(verse_ref) == "NUM 16:48a-50a" + + verse_ref = VerseRef.from_string("NUM 17:1a", ENGLISH_VERSIFICATION) + verse_ref.change_versification(ENGLISH_VERSIFICATION) + assert verse_ref.versification == ENGLISH_VERSIFICATION + assert str(verse_ref) == "NUM 17:1a" + + verse_ref = VerseRef.from_string("ESG 1:1b", ORIGINAL_VERSIFICATION) + verse_ref.change_versification(ENGLISH_VERSIFICATION) + assert verse_ref.versification == ENGLISH_VERSIFICATION + assert str(verse_ref) == "ESG 1:2" + + verse_ref = VerseRef.from_string("ESG 1:2", ENGLISH_VERSIFICATION) + verse_ref.change_versification(ORIGINAL_VERSIFICATION) + assert verse_ref.versification == ORIGINAL_VERSIFICATION + assert str(verse_ref) == "ESG 1:1b" From 813244e1e2177e89c41e10a2113f546843d67837 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 19 May 2026 17:36:02 -0400 Subject: [PATCH 2/6] Remove commented out code --- tests/corpora/test_parallel_text_corpus.py | 23 ---------------------- 1 file changed, 23 deletions(-) diff --git a/tests/corpora/test_parallel_text_corpus.py b/tests/corpora/test_parallel_text_corpus.py index 5218cf6e..c5cc3f30 100644 --- a/tests/corpora/test_parallel_text_corpus.py +++ b/tests/corpora/test_parallel_text_corpus.py @@ -1060,59 +1060,36 @@ def test_get_rows_different_versifications_with_verse_segments(): parallelCorpus = StandardParallelTextCorpus(sourceCorpus, targetCorpus) rows = list(parallelCorpus.get_rows()) - # port below code from c# to python assert len(rows) == 0 assert rows[0].source_refs == [ScriptureRef.parse("NUM 17:1a", ORIGINAL_VERSIFICATION)] assert rows[0].target_refs == [ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION)] - # Assert.That(rows[0].SourceSegment, Is.EqualTo("source chapter seventeen, verse one a .".Split())); assert rows[0].source_segment == "source chapter seventeen, verse one a .".split() - # Assert.That(rows[0].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty six .".Split())); assert rows[0].target_segment == "target chapter sixteen, verse thirty six .".split() - # Assert.That(rows[1].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:1b", ScrVers.Original)])); assert rows[1].source_refs == [ScriptureRef.parse("NUM 17:1b", ORIGINAL_VERSIFICATION)] - # Assert.That(rows[1].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:36", ScrVers.English)])); assert rows[1].target_refs == [ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION)] - # Assert.That(rows[1].SourceSegment, Is.EqualTo("source chapter seventeen, verse one b .".Split())); assert rows[1].source_segment == "source chapter seventeen, verse one b .".split() - # Assert.That(rows[1].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty six .".Split())); assert rows[1].target_segment == "target chapter sixteen, verse thirty six .".split() - # Assert.That(rows[2].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:2", ScrVers.Original)])); assert rows[2].source_refs == [ScriptureRef.parse("NUM 17:2", ORIGINAL_VERSIFICATION)] - # Assert.That(rows[2].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:37", ScrVers.English)])); assert rows[2].target_refs == [ScriptureRef.parse("NUM 16:37", ENGLISH_VERSIFICATION)] - # Assert.That(rows[2].SourceSegment, Is.EqualTo("source chapter seventeen, verse two .".Split())); assert rows[2].source_segment == "source chapter seventeen, verse two .".split() - # Assert.That(rows[2].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty seven .".Split())); assert rows[2].target_segment == "target chapter sixteen, verse thirty seven .".split() - # Assert.That(rows[3].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:3", ScrVers.Original)])); assert rows[3].source_refs == [ScriptureRef.parse("NUM 17:3", ORIGINAL_VERSIFICATION)] - # Assert.That(rows[3].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:38", ScrVers.English)])); assert rows[3].target_refs == [ScriptureRef.parse("NUM 16:38", ENGLISH_VERSIFICATION)] - # Assert.That(rows[3].SourceSegment, Is.EqualTo("source chapter seventeen, verse three .".Split())); assert rows[3].source_segment == "source chapter seventeen, verse three .".split() - # Assert.That(rows[3].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty eight .".Split())); assert rows[3].target_segment == "target chapter sixteen, verse thirty eight .".split() - # Assert.That(rows[4].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:4", ScrVers.Original)])); assert rows[4].source_refs == [ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION)] - # Assert.That(rows[4].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:39a", ScrVers.English)])); assert rows[4].target_refs == [ScriptureRef.parse("NUM 16:39a", ENGLISH_VERSIFICATION)] - # Assert.That(rows[4].SourceSegment, Is.EqualTo("source chapter seventeen, verse four .".Split())); assert rows[4].source_segment == "source chapter seventeen, verse four .".split() - # Assert.That(rows[4].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty nine a .".Split())); assert rows[4].target_segment == "target chapter sixteen, verse thirty nine a .".split() - # Assert.That(rows[5].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:4", ScrVers.Original)])); assert rows[5].source_refs == [ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION)] - # Assert.That(rows[5].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:39b", ScrVers.English)])); assert rows[5].target_refs == [ScriptureRef.parse("NUM 16:39b", ENGLISH_VERSIFICATION)] - # Assert.That(rows[5].SourceSegment, Is.EqualTo("source chapter seventeen, verse four .".Split())); assert rows[5].source_segment == "source chapter seventeen, verse four .".split() - # Assert.That(rows[5].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty nine b .".Split())); assert rows[5].target_segment == "target chapter sixteen, verse thirty nine b .".split() From 62b85642a2575cbcfdbdcda53ced3968a077b2d5 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 19 May 2026 17:38:34 -0400 Subject: [PATCH 3/6] Make test variable names snake case --- tests/corpora/test_parallel_text_corpus.py | 201 +++++++++++---------- 1 file changed, 101 insertions(+), 100 deletions(-) diff --git a/tests/corpora/test_parallel_text_corpus.py b/tests/corpora/test_parallel_text_corpus.py index c5cc3f30..9944a056 100644 --- a/tests/corpora/test_parallel_text_corpus.py +++ b/tests/corpora/test_parallel_text_corpus.py @@ -988,109 +988,110 @@ def test_get_rows_verse_ref_out_of_order() -> None: assert rows[3].source_segment == "source chapter one, verse four .".split() assert rows[3].target_segment == "target chapter one, verse four . target chapter one, verse five .".split() - def test_get_rows_different_versifications_with_verse_segments(): - sourceCorpus = DictionaryTextCorpus( - MemoryText( - "NUM", - [ - TextRow( - "NUM", - ScriptureRef.parse("NUM 17:1a", ORIGINAL_VERSIFICATION), - "source chapter seventeen, verse one a .", - ), - TextRow( - "NUM", - ScriptureRef.parse("NUM 17:1b", ORIGINAL_VERSIFICATION), - "source chapter seventeen, verse one b .", - ), - TextRow( - "NUM", - ScriptureRef.parse("NUM 17:2", ORIGINAL_VERSIFICATION), - "source chapter seventeen, verse two .", - ), - TextRow( - "NUM", - ScriptureRef.parse("NUM 17:3", ORIGINAL_VERSIFICATION), - "source chapter seventeen, verse three .", - ), - TextRow( - "NUM", - ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION), - "source chapter seventeen, verse four .", - ), - ], - ) + +def test_get_rows_different_versifications_with_verse_segments(): + source_corpus = DictionaryTextCorpus( + MemoryText( + "NUM", + [ + TextRow( + "NUM", + ScriptureRef.parse("NUM 17:1a", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse one a .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 17:1b", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse one b .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 17:2", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse two .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 17:3", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse three .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse four .", + ), + ], ) - targetCorpus = DictionaryTextCorpus( - MemoryText( - "NUM", - [ - TextRow( - "NUM", - ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION), - "target chapter sixteen, verse thirty six .", - ), - TextRow( - "NUM", - ScriptureRef.parse("NUM 16:37", ENGLISH_VERSIFICATION), - "target chapter sixteen, verse thirty seven .", - ), - TextRow( - "NUM", - ScriptureRef.parse("NUM 16:38", ENGLISH_VERSIFICATION), - "target chapter sixteen, verse thirty eight .", - ), - TextRow( - "NUM", - ScriptureRef.parse("NUM 16:39a", ENGLISH_VERSIFICATION), - "target chapter sixteen, verse thirty nine a .", - ), - TextRow( - "NUM", - ScriptureRef.parse("NUM 16:39b", ENGLISH_VERSIFICATION), - "target chapter sixteen, verse thirty nine b .", - ), - ], - ) + ) + target_corpus = DictionaryTextCorpus( + MemoryText( + "NUM", + [ + TextRow( + "NUM", + ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty six .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 16:37", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty seven .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 16:38", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty eight .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 16:39a", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty nine a .", + ), + TextRow( + "NUM", + ScriptureRef.parse("NUM 16:39b", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty nine b .", + ), + ], ) + ) - # English vs. Original - # NUM 16:36-50 = NUM 17:1-15 - # NUM 17:1-13 = NUM 17:16-28 - parallelCorpus = StandardParallelTextCorpus(sourceCorpus, targetCorpus) - rows = list(parallelCorpus.get_rows()) - - assert len(rows) == 0 - - assert rows[0].source_refs == [ScriptureRef.parse("NUM 17:1a", ORIGINAL_VERSIFICATION)] - assert rows[0].target_refs == [ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION)] - assert rows[0].source_segment == "source chapter seventeen, verse one a .".split() - assert rows[0].target_segment == "target chapter sixteen, verse thirty six .".split() - - assert rows[1].source_refs == [ScriptureRef.parse("NUM 17:1b", ORIGINAL_VERSIFICATION)] - assert rows[1].target_refs == [ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION)] - assert rows[1].source_segment == "source chapter seventeen, verse one b .".split() - assert rows[1].target_segment == "target chapter sixteen, verse thirty six .".split() - - assert rows[2].source_refs == [ScriptureRef.parse("NUM 17:2", ORIGINAL_VERSIFICATION)] - assert rows[2].target_refs == [ScriptureRef.parse("NUM 16:37", ENGLISH_VERSIFICATION)] - assert rows[2].source_segment == "source chapter seventeen, verse two .".split() - assert rows[2].target_segment == "target chapter sixteen, verse thirty seven .".split() - - assert rows[3].source_refs == [ScriptureRef.parse("NUM 17:3", ORIGINAL_VERSIFICATION)] - assert rows[3].target_refs == [ScriptureRef.parse("NUM 16:38", ENGLISH_VERSIFICATION)] - assert rows[3].source_segment == "source chapter seventeen, verse three .".split() - assert rows[3].target_segment == "target chapter sixteen, verse thirty eight .".split() - - assert rows[4].source_refs == [ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION)] - assert rows[4].target_refs == [ScriptureRef.parse("NUM 16:39a", ENGLISH_VERSIFICATION)] - assert rows[4].source_segment == "source chapter seventeen, verse four .".split() - assert rows[4].target_segment == "target chapter sixteen, verse thirty nine a .".split() - - assert rows[5].source_refs == [ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION)] - assert rows[5].target_refs == [ScriptureRef.parse("NUM 16:39b", ENGLISH_VERSIFICATION)] - assert rows[5].source_segment == "source chapter seventeen, verse four .".split() - assert rows[5].target_segment == "target chapter sixteen, verse thirty nine b .".split() + # English vs. Original + # NUM 16:36-50 = NUM 17:1-15 + # NUM 17:1-13 = NUM 17:16-28 + parallel_corpus = StandardParallelTextCorpus(source_corpus, target_corpus) + rows = list(parallel_corpus.get_rows()) + + assert len(rows) == 0 + + assert rows[0].source_refs == [ScriptureRef.parse("NUM 17:1a", ORIGINAL_VERSIFICATION)] + assert rows[0].target_refs == [ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION)] + assert rows[0].source_segment == "source chapter seventeen, verse one a .".split() + assert rows[0].target_segment == "target chapter sixteen, verse thirty six .".split() + + assert rows[1].source_refs == [ScriptureRef.parse("NUM 17:1b", ORIGINAL_VERSIFICATION)] + assert rows[1].target_refs == [ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION)] + assert rows[1].source_segment == "source chapter seventeen, verse one b .".split() + assert rows[1].target_segment == "target chapter sixteen, verse thirty six .".split() + + assert rows[2].source_refs == [ScriptureRef.parse("NUM 17:2", ORIGINAL_VERSIFICATION)] + assert rows[2].target_refs == [ScriptureRef.parse("NUM 16:37", ENGLISH_VERSIFICATION)] + assert rows[2].source_segment == "source chapter seventeen, verse two .".split() + assert rows[2].target_segment == "target chapter sixteen, verse thirty seven .".split() + + assert rows[3].source_refs == [ScriptureRef.parse("NUM 17:3", ORIGINAL_VERSIFICATION)] + assert rows[3].target_refs == [ScriptureRef.parse("NUM 16:38", ENGLISH_VERSIFICATION)] + assert rows[3].source_segment == "source chapter seventeen, verse three .".split() + assert rows[3].target_segment == "target chapter sixteen, verse thirty eight .".split() + + assert rows[4].source_refs == [ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION)] + assert rows[4].target_refs == [ScriptureRef.parse("NUM 16:39a", ENGLISH_VERSIFICATION)] + assert rows[4].source_segment == "source chapter seventeen, verse four .".split() + assert rows[4].target_segment == "target chapter sixteen, verse thirty nine a .".split() + + assert rows[5].source_refs == [ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION)] + assert rows[5].target_refs == [ScriptureRef.parse("NUM 16:39b", ENGLISH_VERSIFICATION)] + assert rows[5].source_segment == "source chapter seventeen, verse four .".split() + assert rows[5].target_segment == "target chapter sixteen, verse thirty nine b .".split() def test_to_pandas() -> None: From 6e7afa337064e9c381d6341f1fd6fbb4aec93bb4 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 19 May 2026 17:45:24 -0400 Subject: [PATCH 4/6] Fix typo in test --- tests/corpora/test_parallel_text_corpus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/corpora/test_parallel_text_corpus.py b/tests/corpora/test_parallel_text_corpus.py index 9944a056..a6201214 100644 --- a/tests/corpora/test_parallel_text_corpus.py +++ b/tests/corpora/test_parallel_text_corpus.py @@ -1061,7 +1061,7 @@ def test_get_rows_different_versifications_with_verse_segments(): parallel_corpus = StandardParallelTextCorpus(source_corpus, target_corpus) rows = list(parallel_corpus.get_rows()) - assert len(rows) == 0 + assert len(rows) == 6 assert rows[0].source_refs == [ScriptureRef.parse("NUM 17:1a", ORIGINAL_VERSIFICATION)] assert rows[0].target_refs == [ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION)] From 43f2a6f09516809fab2c4e38bc82e1bd25bb571d Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 19 May 2026 17:50:27 -0400 Subject: [PATCH 5/6] Explicitly set corpus versifications --- tests/corpora/test_parallel_text_corpus.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/corpora/test_parallel_text_corpus.py b/tests/corpora/test_parallel_text_corpus.py index a6201214..007a8ad3 100644 --- a/tests/corpora/test_parallel_text_corpus.py +++ b/tests/corpora/test_parallel_text_corpus.py @@ -1022,6 +1022,8 @@ def test_get_rows_different_versifications_with_verse_segments(): ], ) ) + source_corpus.versification = ORIGINAL_VERSIFICATION + target_corpus = DictionaryTextCorpus( MemoryText( "NUM", @@ -1054,6 +1056,7 @@ def test_get_rows_different_versifications_with_verse_segments(): ], ) ) + target_corpus.versification = ENGLISH_VERSIFICATION # English vs. Original # NUM 16:36-50 = NUM 17:1-15 From ab9d57ee7b1127fe513918f7f39ab4360abe1a38 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 19 May 2026 18:22:18 -0400 Subject: [PATCH 6/6] Fix typo in n_parallel_text_corpus --- machine/corpora/n_parallel_text_corpus.py | 2 +- tests/corpora/test_parallel_text_corpus.py | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/machine/corpora/n_parallel_text_corpus.py b/machine/corpora/n_parallel_text_corpus.py index 3d2b0197..c2098aae 100644 --- a/machine/corpora/n_parallel_text_corpus.py +++ b/machine/corpora/n_parallel_text_corpus.py @@ -307,7 +307,7 @@ def _create_rows( content_type = row.content_type text_id = text_id or row.text_id if self.corpora[i].is_scripture: - refs[i] = self._correct_versification([row.ref] if row.ref is None else default_refs, i) + refs[i] = self._correct_versification([row.ref] if row.ref is not None else default_refs, i) else: refs[i] = default_refs flags[i] = row.flags diff --git a/tests/corpora/test_parallel_text_corpus.py b/tests/corpora/test_parallel_text_corpus.py index 007a8ad3..a3a81f7f 100644 --- a/tests/corpora/test_parallel_text_corpus.py +++ b/tests/corpora/test_parallel_text_corpus.py @@ -994,27 +994,27 @@ def test_get_rows_different_versifications_with_verse_segments(): MemoryText( "NUM", [ - TextRow( + text_row( "NUM", ScriptureRef.parse("NUM 17:1a", ORIGINAL_VERSIFICATION), "source chapter seventeen, verse one a .", ), - TextRow( + text_row( "NUM", ScriptureRef.parse("NUM 17:1b", ORIGINAL_VERSIFICATION), "source chapter seventeen, verse one b .", ), - TextRow( + text_row( "NUM", ScriptureRef.parse("NUM 17:2", ORIGINAL_VERSIFICATION), "source chapter seventeen, verse two .", ), - TextRow( + text_row( "NUM", ScriptureRef.parse("NUM 17:3", ORIGINAL_VERSIFICATION), "source chapter seventeen, verse three .", ), - TextRow( + text_row( "NUM", ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION), "source chapter seventeen, verse four .", @@ -1028,27 +1028,27 @@ def test_get_rows_different_versifications_with_verse_segments(): MemoryText( "NUM", [ - TextRow( + text_row( "NUM", ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION), "target chapter sixteen, verse thirty six .", ), - TextRow( + text_row( "NUM", ScriptureRef.parse("NUM 16:37", ENGLISH_VERSIFICATION), "target chapter sixteen, verse thirty seven .", ), - TextRow( + text_row( "NUM", ScriptureRef.parse("NUM 16:38", ENGLISH_VERSIFICATION), "target chapter sixteen, verse thirty eight .", ), - TextRow( + text_row( "NUM", ScriptureRef.parse("NUM 16:39a", ENGLISH_VERSIFICATION), "target chapter sixteen, verse thirty nine a .", ), - TextRow( + text_row( "NUM", ScriptureRef.parse("NUM 16:39b", ENGLISH_VERSIFICATION), "target chapter sixteen, verse thirty nine b .",