Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 0 additions & 20 deletions machine/corpora/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from .dbl_bundle_text_corpus import DblBundleTextCorpus
from .dictionary_alignment_corpus import DictionaryAlignmentCorpus
from .dictionary_text_corpus import DictionaryTextCorpus
from .fallback_quotation_mark_resolver import FallbackQuotationMarkResolver
from .file_paratext_project_quote_convention_detector import FileParatextProjectQuoteConventionDetector
from .file_paratext_project_settings_parser import FileParatextProjectSettingsParser
from .file_paratext_project_text_updater import FileParatextProjectTextUpdater
from .flatten import flatten
Expand All @@ -26,13 +24,6 @@
from .paratext_project_text_updater_base import ParatextProjectTextUpdaterBase
from .paratext_text_corpus import ParatextTextCorpus
from .place_markers_usfm_update_block_handler import PlaceMarkersAlignmentInfo, PlaceMarkersUsfmUpdateBlockHandler
from .quotation_mark_denormalization_first_pass import QuotationMarkDenormalizationFirstPass
from .quotation_mark_denormalization_usfm_update_block_handler import QuotationMarkDenormalizationUsfmUpdateBlockHandler
from .quotation_mark_update_first_pass import QuotationMarkUpdateFirstPass
from .quotation_mark_update_resolution_settings import QuotationMarkUpdateResolutionSettings
from .quotation_mark_update_settings import QuotationMarkUpdateSettings
from .quotation_mark_update_strategy import QuotationMarkUpdateStrategy
from .quote_convention_changing_usfm_update_block_handler import QuoteConventionChangingUsfmUpdateBlockHandler
from .scripture_element import ScriptureElement
from .scripture_ref import EMPTY_SCRIPTURE_REF, ScriptureRef
from .scripture_ref_usfm_parser_handler import ScriptureRefUsfmParserHandler, ScriptureTextType
Expand Down Expand Up @@ -85,7 +76,6 @@
from .usx_file_text_corpus import UsxFileTextCorpus
from .usx_memory_text import UsxMemoryText
from .usx_zip_text import UsxZipText
from .zip_paratext_project_quote_convention_detector import ZipParatextProjectQuoteConventionDetector
from .zip_paratext_project_settings_parser import ZipParatextProjectSettingsParser
from .zip_paratext_project_settings_parser_base import ZipParatextProjectSettingsParserBase
from .zip_paratext_project_terms_parser import ZipParatextProjectTermsParser
Expand All @@ -96,7 +86,6 @@
"AlignmentCollection",
"AlignmentCorpus",
"AlignmentRow",
"FallbackQuotationMarkResolver",
"batch",
"Corpus",
"create_versification_ref_corpus",
Expand All @@ -106,7 +95,6 @@
"EMPTY_SCRIPTURE_REF",
"escape_spaces",
"extract_scripture_corpus",
"FileParatextProjectQuoteConventionDetector",
"FileParatextProjectSettingsParser",
"FileParatextProjectTextUpdater",
"flatten",
Expand All @@ -133,13 +121,6 @@
"PlaceMarkersAlignmentInfo",
"PlaceMarkersUsfmUpdateBlockHandler",
"parse_usfm",
"QuoteConventionChangingUsfmUpdateBlockHandler",
"QuotationMarkUpdateResolutionSettings",
"QuotationMarkUpdateStrategy",
"QuotationMarkUpdateFirstPass",
"QuotationMarkDenormalizationFirstPass",
"QuotationMarkDenormalizationUsfmUpdateBlockHandler",
"QuotationMarkUpdateSettings",
"RtlReferenceOrder",
"ScriptureElement",
"ScriptureRef",
Expand Down Expand Up @@ -189,7 +170,6 @@
"UsxFileTextCorpus",
"UsxMemoryText",
"UsxZipText",
"ZipParatextProjectQuoteConventionDetector",
"ZipParatextProjectSettingsParser",
"ZipParatextProjectSettingsParserBase",
"ZipParatextProjectTermsParser",
Expand Down
9 changes: 0 additions & 9 deletions machine/corpora/quotation_mark_denormalization_first_pass.py

This file was deleted.

22 changes: 22 additions & 0 deletions machine/punctuation_analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
QuoteContinuerState,
QuoteContinuerStyle,
)
from .fallback_quotation_mark_resolver import FallbackQuotationMarkResolver
from .file_paratext_project_quote_convention_detector import FileParatextProjectQuoteConventionDetector
from .paratext_project_quote_convention_detector import ParatextProjectQuoteConventionDetector
from .preliminary_quotation_mark_analyzer import (
ApostropheProportionStatistics,
PreliminaryApostropheAnalyzer,
Expand All @@ -14,6 +17,8 @@
QuotationMarkSequences,
QuotationMarkWordPositions,
)
from .quotation_mark_denormalization_first_pass import QuotationMarkDenormalizationFirstPass
from .quotation_mark_denormalization_usfm_update_block_handler import QuotationMarkDenormalizationUsfmUpdateBlockHandler
from .quotation_mark_direction import QuotationMarkDirection
from .quotation_mark_finder import QuotationMarkFinder
from .quotation_mark_metadata import QuotationMarkMetadata
Expand All @@ -22,7 +27,12 @@
from .quotation_mark_resolver import QuotationMarkResolver
from .quotation_mark_string_match import QuotationMarkStringMatch
from .quotation_mark_tabulator import QuotationMarkCounts, QuotationMarkTabulator
from .quotation_mark_update_first_pass import QuotationMarkUpdateFirstPass
from .quotation_mark_update_resolution_settings import QuotationMarkUpdateResolutionSettings
from .quotation_mark_update_settings import QuotationMarkUpdateSettings
from .quotation_mark_update_strategy import QuotationMarkUpdateStrategy
from .quote_convention import QuoteConvention, SingleLevelQuoteConvention
from .quote_convention_changing_usfm_update_block_handler import QuoteConventionChangingUsfmUpdateBlockHandler
from .quote_convention_detection_resolution_settings import QuoteConventionDetectionResolutionSettings
from .quote_convention_detector import QuoteConventionAnalysis, QuoteConventionDetector
from .quote_convention_set import QuoteConventionSet
Expand All @@ -31,27 +41,38 @@
from .usfm_marker_type import UsfmMarkerType
from .usfm_structure_extractor import UsfmStructureExtractor
from .verse import Verse
from .zip_paratext_project_quote_convention_detector import ZipParatextProjectQuoteConventionDetector

__all__ = [
"ApostropheProportionStatistics",
"Chapter",
"DepthBasedQuotationMarkResolver",
"FallbackQuotationMarkResolver",
"FileParatextProjectQuoteConventionDetector",
"ParatextProjectQuoteConventionDetector",
"PreliminaryApostropheAnalyzer",
"PreliminaryQuotationMarkAnalyzer",
"SingleLevelQuoteConvention",
"QuoteContinuerState",
"QuoteContinuerStyle",
"QuotationMarkCategorizer",
"QuotationMarkCounts",
"QuotationMarkDenormalizationFirstPass",
"QuotationMarkDenormalizationUsfmUpdateBlockHandler",
"QuotationMarkDirection",
"QuotationMarkGrouper",
"QuotationMarkMetadata",
"QuotationMarkResolverState",
"QuotationMarkSequences",
"QuotationMarkStringMatch",
"QuotationMarkUpdateFirstPass",
"QuotationMarkUpdateResolutionSettings",
"QuotationMarkUpdateSettings",
"QuotationMarkUpdateStrategy",
"QuotationMarkWordPositions",
"QuoteConvention",
"QuoteConventionAnalysis",
"QuoteConventionChangingUsfmUpdateBlockHandler",
"QuoteConventionDetectionResolutionSettings",
"QuotationMarkFinder",
"QuotationMarkResolutionIssue",
Expand All @@ -65,4 +86,5 @@
"UsfmMarkerType",
"UsfmStructureExtractor",
"Verse",
"ZipParatextProjectQuoteConventionDetector",
]
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from typing import Generator, Optional, Set

from ..punctuation_analysis.quotation_mark_direction import QuotationMarkDirection
from ..punctuation_analysis.quotation_mark_metadata import QuotationMarkMetadata
from ..punctuation_analysis.quotation_mark_resolution_issue import QuotationMarkResolutionIssue
from ..punctuation_analysis.quotation_mark_resolution_settings import QuotationMarkResolutionSettings
from ..punctuation_analysis.quotation_mark_resolver import QuotationMarkResolver
from ..punctuation_analysis.quotation_mark_string_match import QuotationMarkStringMatch
from .quotation_mark_direction import QuotationMarkDirection
from .quotation_mark_metadata import QuotationMarkMetadata
from .quotation_mark_resolution_issue import QuotationMarkResolutionIssue
from .quotation_mark_resolution_settings import QuotationMarkResolutionSettings
from .quotation_mark_resolver import QuotationMarkResolver
from .quotation_mark_string_match import QuotationMarkStringMatch


class FallbackQuotationMarkResolver(QuotationMarkResolver):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from pathlib import Path
from typing import BinaryIO

from ..corpora.file_paratext_project_settings_parser import FileParatextProjectSettingsParser
from ..utils.typeshed import StrPath
from .file_paratext_project_settings_parser import FileParatextProjectSettingsParser
from .paratext_project_quote_convention_detector import ParatextProjectQuoteConventionDetector


Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from abc import ABC, abstractmethod
from typing import BinaryIO, Optional, Union

from ..punctuation_analysis.quote_convention_detector import QuoteConventionAnalysis, QuoteConventionDetector
from ..corpora.paratext_project_settings import ParatextProjectSettings
from ..corpora.paratext_project_settings_parser_base import ParatextProjectSettingsParserBase
from ..corpora.usfm_parser import parse_usfm
from ..utils.typeshed import StrPath
from .paratext_project_settings import ParatextProjectSettings
from .paratext_project_settings_parser_base import ParatextProjectSettingsParserBase
from .usfm_parser import parse_usfm
from .quote_convention_detector import QuoteConventionAnalysis, QuoteConventionDetector


class ParatextProjectQuoteConventionDetector(ABC):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from .quotation_mark_update_first_pass import QuotationMarkUpdateFirstPass
from .quote_convention import QuoteConvention


# This is a convenience class so that users don't have to know to pass in two quote conventions,
# with the first being the normalized version of the second.
class QuotationMarkDenormalizationFirstPass(QuotationMarkUpdateFirstPass):

def __init__(self, target_quote_convention: QuoteConvention):
super().__init__(target_quote_convention.normalize(), target_quote_convention)
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from ..punctuation_analysis.quote_convention import QuoteConvention
from .quotation_mark_update_settings import QuotationMarkUpdateSettings
from .quote_convention import QuoteConvention
from .quote_convention_changing_usfm_update_block_handler import QuoteConventionChangingUsfmUpdateBlockHandler


# This is a convenience class so that users don't have to know to normalize the source quote convention
# This is a convenience class so that users don't have to know to pass in two quote conventions,
# with the first being the normalized version of the second.
class QuotationMarkDenormalizationUsfmUpdateBlockHandler(QuoteConventionChangingUsfmUpdateBlockHandler):

def __init__(
self,
source_quote_convention: QuoteConvention,
target_quote_convention: QuoteConvention,
settings: QuotationMarkUpdateSettings = QuotationMarkUpdateSettings(),
):
super().__init__(source_quote_convention.normalize(), target_quote_convention, settings)
super().__init__(target_quote_convention.normalize(), target_quote_convention, settings)
Original file line number Diff line number Diff line change
@@ -1,56 +1,56 @@
from typing import Dict, List, Set

from ..punctuation_analysis.chapter import Chapter
from ..punctuation_analysis.depth_based_quotation_mark_resolver import DepthBasedQuotationMarkResolver
from ..punctuation_analysis.quotation_mark_finder import QuotationMarkFinder
from ..punctuation_analysis.quotation_mark_resolution_issue import QuotationMarkResolutionIssue
from ..punctuation_analysis.quotation_mark_resolver import QuotationMarkResolver
from ..punctuation_analysis.quotation_mark_string_match import QuotationMarkStringMatch
from ..punctuation_analysis.quote_convention import QuoteConvention
from ..punctuation_analysis.quote_convention_set import QuoteConventionSet
from ..punctuation_analysis.usfm_structure_extractor import UsfmStructureExtractor
from .chapter import Chapter
from .depth_based_quotation_mark_resolver import DepthBasedQuotationMarkResolver
from .quotation_mark_finder import QuotationMarkFinder
from .quotation_mark_resolution_issue import QuotationMarkResolutionIssue
from .quotation_mark_resolver import QuotationMarkResolver
from .quotation_mark_string_match import QuotationMarkStringMatch
from .quotation_mark_update_resolution_settings import QuotationMarkUpdateResolutionSettings
from .quotation_mark_update_strategy import QuotationMarkUpdateStrategy
from .quote_convention import QuoteConvention
from .quote_convention_set import QuoteConventionSet
from .usfm_structure_extractor import UsfmStructureExtractor


# Determines the best strategy to take for each chapter
class QuotationMarkUpdateFirstPass(UsfmStructureExtractor):

def __init__(self, source_quote_convention: QuoteConvention, target_quote_convention: QuoteConvention):
def __init__(self, old_quote_convention: QuoteConvention, new_quote_convention: QuoteConvention):
super().__init__()
self._quotation_mark_finder: QuotationMarkFinder = QuotationMarkFinder(
QuoteConventionSet([source_quote_convention])
QuoteConventionSet([old_quote_convention])
)
self._quotation_mark_resolver: QuotationMarkResolver = DepthBasedQuotationMarkResolver(
QuotationMarkUpdateResolutionSettings(source_quote_convention)
QuotationMarkUpdateResolutionSettings(old_quote_convention)
)
self._will_fallback_mode_work: bool = self._check_whether_fallback_mode_will_work(
source_quote_convention, target_quote_convention
old_quote_convention, new_quote_convention
)

def _check_whether_fallback_mode_will_work(
self, source_quote_convention: QuoteConvention, target_quote_convention: QuoteConvention
self, old_quote_convention: QuoteConvention, new_quote_convention: QuoteConvention
) -> bool:
opening_target_marks_by_source_marks: Dict[str, str] = {}
closing_target_marks_by_source_marks: Dict[str, str] = {}
for depth in range(1, min(source_quote_convention.num_levels, target_quote_convention.num_levels) + 1):
source_opening_quotation_mark = source_quote_convention.get_opening_quotation_mark_at_depth(depth)
target_opening_quotation_mark = target_quote_convention.get_opening_quotation_mark_at_depth(depth)
new_opening_marks_by_old_marks: Dict[str, str] = {}
new_closing_marks_by_old_marks: Dict[str, str] = {}
for depth in range(1, min(old_quote_convention.num_levels, new_quote_convention.num_levels) + 1):
old_opening_quotation_mark = old_quote_convention.get_opening_quotation_mark_at_depth(depth)
new_opening_quotation_mark = new_quote_convention.get_opening_quotation_mark_at_depth(depth)
if (
source_opening_quotation_mark in opening_target_marks_by_source_marks
and opening_target_marks_by_source_marks[source_opening_quotation_mark] != target_opening_quotation_mark
old_opening_quotation_mark in new_opening_marks_by_old_marks
and new_opening_marks_by_old_marks[old_opening_quotation_mark] != new_opening_quotation_mark
):
return False
opening_target_marks_by_source_marks[source_opening_quotation_mark] = target_opening_quotation_mark
new_opening_marks_by_old_marks[old_opening_quotation_mark] = new_opening_quotation_mark

source_closing_quotation_mark = source_quote_convention.get_closing_quotation_mark_at_depth(depth)
target_closing_quotation_mark = target_quote_convention.get_closing_quotation_mark_at_depth(depth)
old_closing_quotation_mark = old_quote_convention.get_closing_quotation_mark_at_depth(depth)
new_closing_quotation_mark = new_quote_convention.get_closing_quotation_mark_at_depth(depth)
if (
source_closing_quotation_mark in closing_target_marks_by_source_marks
and closing_target_marks_by_source_marks[source_closing_quotation_mark] != target_closing_quotation_mark
old_closing_quotation_mark in new_closing_marks_by_old_marks
and new_closing_marks_by_old_marks[old_closing_quotation_mark] != new_closing_quotation_mark
):
return False
closing_target_marks_by_source_marks[source_closing_quotation_mark] = target_closing_quotation_mark
new_closing_marks_by_old_marks[old_closing_quotation_mark] = new_closing_quotation_mark

return True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@

import regex

from ..punctuation_analysis.quotation_mark_direction import QuotationMarkDirection
from ..punctuation_analysis.quotation_mark_resolution_settings import QuotationMarkResolutionSettings
from ..punctuation_analysis.quotation_mark_string_match import QuotationMarkStringMatch
from ..punctuation_analysis.quote_convention import QuoteConvention
from ..punctuation_analysis.quote_convention_set import QuoteConventionSet
from .quotation_mark_direction import QuotationMarkDirection
from .quotation_mark_resolution_settings import QuotationMarkResolutionSettings
from .quotation_mark_string_match import QuotationMarkStringMatch
from .quote_convention import QuoteConvention
from .quote_convention_set import QuoteConventionSet


class QuotationMarkUpdateResolutionSettings(QuotationMarkResolutionSettings):
def __init__(self, source_quote_convention: QuoteConvention):
self._source_quote_convention = source_quote_convention
self._quote_convention_singleton_set = QuoteConventionSet([self._source_quote_convention])
def __init__(self, old_quote_convention: QuoteConvention):
self._old_quote_convention = old_quote_convention
self._quote_convention_singleton_set = QuoteConventionSet([self._old_quote_convention])

def is_valid_opening_quotation_mark(self, quotation_mark_match: QuotationMarkStringMatch) -> bool:
return quotation_mark_match.is_valid_opening_quotation_mark(self._quote_convention_singleton_set)
Expand All @@ -36,9 +36,9 @@ def should_rely_on_paragraph_markers(self):
return False

def get_possible_depths(self, quotation_mark: str, direction: QuotationMarkDirection) -> Set[int]:
return self._source_quote_convention.get_possible_depths(quotation_mark, direction)
return self._old_quote_convention.get_possible_depths(quotation_mark, direction)

def metadata_matches_quotation_mark(
self, quotation_mark: str, depth: int, direction: QuotationMarkDirection
) -> bool:
return self._source_quote_convention.get_expected_quotation_mark(depth, direction) == quotation_mark
return self._old_quote_convention.get_expected_quotation_mark(depth, direction) == quotation_mark
Loading
Loading