diff --git a/machine/corpora/n_parallel_text_corpus.py b/machine/corpora/n_parallel_text_corpus.py index 3d2b0197..c2098aae 100644 --- a/machine/corpora/n_parallel_text_corpus.py +++ b/machine/corpora/n_parallel_text_corpus.py @@ -307,7 +307,7 @@ def _create_rows( content_type = row.content_type text_id = text_id or row.text_id if self.corpora[i].is_scripture: - refs[i] = self._correct_versification([row.ref] if row.ref is None else default_refs, i) + refs[i] = self._correct_versification([row.ref] if row.ref is not None else default_refs, i) else: refs[i] = default_refs flags[i] = row.flags diff --git a/machine/scripture/verse_ref.py b/machine/scripture/verse_ref.py index c5feba16..5732fcfc 100644 --- a/machine/scripture/verse_ref.py +++ b/machine/scripture/verse_ref.py @@ -95,9 +95,9 @@ def from_string(cls, verse_str: str, versification: Optional[Versification] = No return VerseRef(b_cv[0], c_v[0], c_v[1], versification) @classmethod - def try_from_string(cls, verse_str: str) -> Optional[VerseRef]: + def try_from_string(cls, verse_str: str, versification: Optional[Versification] = None) -> Optional[VerseRef]: try: - return cls.from_string(verse_str) + return cls.from_string(verse_str, versification) except ValueError: return None @@ -402,6 +402,18 @@ def exact_equals(self, other: object) -> bool: and self.versification == other.versification ) + def remove_segments(self) -> VerseRef: + if not self.segment(): + return self.copy() + vr = VerseRef.try_from_string( + f"{self.book} {self.chapter_num}:{','.join([str(v.verse_num) for v in self.all_verses()])}", + self.versification, + ) + if vr is None: + vr = self.copy() + vr.simplify() + return vr + def __eq__(self, other): if not isinstance(other, VerseRef): return NotImplemented @@ -816,9 +828,11 @@ def first_included_verse(self, book_num: int, chapter_num: int) -> Optional[Vers def is_excluded(self, bbbcccvvv: int) -> bool: return bbbcccvvv in self.excluded_verses - def change_versification(self, vref: VerseRef) -> bool: + def change_versification(self, vref: VerseRef, ignore_segments: bool = False) -> bool: if vref.has_multiple: return self._change_versification_with_ranges(vref) + if vref.segment() and not ignore_segments: + return self._change_versification_with_segments(vref) if vref.versification == NULL_VERSIFICATION: vref.versification = self @@ -904,6 +918,33 @@ def _change_versification_with_ranges(self, vref: VerseRef) -> bool: return all_same_chapter + def _change_versification_with_segments(self, orig_vref: VerseRef) -> bool: + vref = orig_vref.copy() + all_in_one_chapter = self.change_versification(vref, ignore_segments=True) + if not vref.segment(): + orig_vref.copy_from(vref) + return all_in_one_chapter + + vref_without_segments = orig_vref.remove_segments() + all_in_one_chapter = self.change_versification(vref_without_segments, ignore_segments=True) + if vref_without_segments != vref.remove_segments(): + verses = [ + v_with_correct_number.verse + v_with_segments.segment() + for (v_with_segments, v_with_correct_number) in zip( + orig_vref.all_verses(), vref_without_segments.all_verses() + ) + ] + + combined_vr = VerseRef.try_from_string( + f"{vref_without_segments.book} {vref_without_segments.chapter_num}:{','.join(verses)}", self + ) + if combined_vr is not None: + orig_vref.copy_from(combined_vr) + return all_in_one_chapter + + orig_vref.copy_from(vref) + return all_in_one_chapter + class VerseMappings: def __init__(self) -> None: diff --git a/tests/corpora/test_parallel_text_corpus.py b/tests/corpora/test_parallel_text_corpus.py index b5558e94..a3a81f7f 100644 --- a/tests/corpora/test_parallel_text_corpus.py +++ b/tests/corpora/test_parallel_text_corpus.py @@ -989,6 +989,114 @@ def test_get_rows_verse_ref_out_of_order() -> None: assert rows[3].target_segment == "target chapter one, verse four . target chapter one, verse five .".split() +def test_get_rows_different_versifications_with_verse_segments(): + source_corpus = DictionaryTextCorpus( + MemoryText( + "NUM", + [ + text_row( + "NUM", + ScriptureRef.parse("NUM 17:1a", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse one a .", + ), + text_row( + "NUM", + ScriptureRef.parse("NUM 17:1b", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse one b .", + ), + text_row( + "NUM", + ScriptureRef.parse("NUM 17:2", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse two .", + ), + text_row( + "NUM", + ScriptureRef.parse("NUM 17:3", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse three .", + ), + text_row( + "NUM", + ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION), + "source chapter seventeen, verse four .", + ), + ], + ) + ) + source_corpus.versification = ORIGINAL_VERSIFICATION + + target_corpus = DictionaryTextCorpus( + MemoryText( + "NUM", + [ + text_row( + "NUM", + ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty six .", + ), + text_row( + "NUM", + ScriptureRef.parse("NUM 16:37", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty seven .", + ), + text_row( + "NUM", + ScriptureRef.parse("NUM 16:38", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty eight .", + ), + text_row( + "NUM", + ScriptureRef.parse("NUM 16:39a", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty nine a .", + ), + text_row( + "NUM", + ScriptureRef.parse("NUM 16:39b", ENGLISH_VERSIFICATION), + "target chapter sixteen, verse thirty nine b .", + ), + ], + ) + ) + target_corpus.versification = ENGLISH_VERSIFICATION + + # English vs. Original + # NUM 16:36-50 = NUM 17:1-15 + # NUM 17:1-13 = NUM 17:16-28 + parallel_corpus = StandardParallelTextCorpus(source_corpus, target_corpus) + rows = list(parallel_corpus.get_rows()) + + assert len(rows) == 6 + + assert rows[0].source_refs == [ScriptureRef.parse("NUM 17:1a", ORIGINAL_VERSIFICATION)] + assert rows[0].target_refs == [ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION)] + assert rows[0].source_segment == "source chapter seventeen, verse one a .".split() + assert rows[0].target_segment == "target chapter sixteen, verse thirty six .".split() + + assert rows[1].source_refs == [ScriptureRef.parse("NUM 17:1b", ORIGINAL_VERSIFICATION)] + assert rows[1].target_refs == [ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION)] + assert rows[1].source_segment == "source chapter seventeen, verse one b .".split() + assert rows[1].target_segment == "target chapter sixteen, verse thirty six .".split() + + assert rows[2].source_refs == [ScriptureRef.parse("NUM 17:2", ORIGINAL_VERSIFICATION)] + assert rows[2].target_refs == [ScriptureRef.parse("NUM 16:37", ENGLISH_VERSIFICATION)] + assert rows[2].source_segment == "source chapter seventeen, verse two .".split() + assert rows[2].target_segment == "target chapter sixteen, verse thirty seven .".split() + + assert rows[3].source_refs == [ScriptureRef.parse("NUM 17:3", ORIGINAL_VERSIFICATION)] + assert rows[3].target_refs == [ScriptureRef.parse("NUM 16:38", ENGLISH_VERSIFICATION)] + assert rows[3].source_segment == "source chapter seventeen, verse three .".split() + assert rows[3].target_segment == "target chapter sixteen, verse thirty eight .".split() + + assert rows[4].source_refs == [ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION)] + assert rows[4].target_refs == [ScriptureRef.parse("NUM 16:39a", ENGLISH_VERSIFICATION)] + assert rows[4].source_segment == "source chapter seventeen, verse four .".split() + assert rows[4].target_segment == "target chapter sixteen, verse thirty nine a .".split() + + assert rows[5].source_refs == [ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION)] + assert rows[5].target_refs == [ScriptureRef.parse("NUM 16:39b", ENGLISH_VERSIFICATION)] + assert rows[5].source_segment == "source chapter seventeen, verse four .".split() + assert rows[5].target_segment == "target chapter sixteen, verse thirty nine b .".split() + + def test_to_pandas() -> None: source_corpus = DictionaryTextCorpus( MemoryText( diff --git a/tests/scripture/test_verse_ref.py b/tests/scripture/test_verse_ref.py index 911225ad..e72f5346 100644 --- a/tests/scripture/test_verse_ref.py +++ b/tests/scripture/test_verse_ref.py @@ -757,3 +757,54 @@ def test_unbridge() -> None: assert VerseRef.from_string("EXO 6:9a,9b").unbridge() == VerseRef.from_string("EXO 6:9a") assert VerseRef.from_string("EXO 6:4-10").unbridge() == VerseRef.from_string("EXO 6:4") assert VerseRef.from_string("EXO 6:150monkeys").unbridge() == VerseRef.from_string("EXO 6:150monkeys") + + +def test_remove_segments() -> None: + assert VerseRef.from_string("MAT 1:1").remove_segments() == VerseRef.from_string("MAT 1:1") + assert VerseRef.from_string("MAT 1:1a").remove_segments() == VerseRef.from_string("MAT 1:1") + assert VerseRef.from_string("MAT 1:1a-2b,5a").remove_segments() == VerseRef.from_string("MAT 1:1,2,5") + assert VerseRef.from_string("MAT 1:1a-3b").remove_segments() == VerseRef.from_string("MAT 1:1,2,3") + + +def test_change_versification_with_segments() -> None: + + # English vs. Original + # NUM 16:36-50 = NUM 17:1-15 + # NUM 17:1-13 = NUM 17:16-28 + # ESG 1:1 = ESG 1:1a + # ESG 1:2 = ESG 1:1b + + verse_ref = VerseRef.from_string("NUM 17:1", ENGLISH_VERSIFICATION) + verse_ref.change_versification(ORIGINAL_VERSIFICATION) + assert verse_ref.versification == ORIGINAL_VERSIFICATION + assert str(verse_ref) == "NUM 17:16" + + verse_ref = VerseRef.from_string("NUM 17:1a", ENGLISH_VERSIFICATION) + verse_ref.change_versification(ORIGINAL_VERSIFICATION) + assert verse_ref.versification == ORIGINAL_VERSIFICATION + assert str(verse_ref) == "NUM 17:16a" + + verse_ref = VerseRef.from_string("NUM 17:1a-2b,5a", ENGLISH_VERSIFICATION) + verse_ref.change_versification(ORIGINAL_VERSIFICATION) + assert verse_ref.versification == ORIGINAL_VERSIFICATION + assert str(verse_ref) == "NUM 17:16a-17b,20a" + + verse_ref = VerseRef.from_string("NUM 17:13a-15a", ORIGINAL_VERSIFICATION) + verse_ref.change_versification(ENGLISH_VERSIFICATION) + assert verse_ref.versification == ENGLISH_VERSIFICATION + assert str(verse_ref) == "NUM 16:48a-50a" + + verse_ref = VerseRef.from_string("NUM 17:1a", ENGLISH_VERSIFICATION) + verse_ref.change_versification(ENGLISH_VERSIFICATION) + assert verse_ref.versification == ENGLISH_VERSIFICATION + assert str(verse_ref) == "NUM 17:1a" + + verse_ref = VerseRef.from_string("ESG 1:1b", ORIGINAL_VERSIFICATION) + verse_ref.change_versification(ENGLISH_VERSIFICATION) + assert verse_ref.versification == ENGLISH_VERSIFICATION + assert str(verse_ref) == "ESG 1:2" + + verse_ref = VerseRef.from_string("ESG 1:2", ENGLISH_VERSIFICATION) + verse_ref.change_versification(ORIGINAL_VERSIFICATION) + assert verse_ref.versification == ORIGINAL_VERSIFICATION + assert str(verse_ref) == "ESG 1:1b"