From 65d0d15e3df75d30454049d41462c5a29e5edf73 Mon Sep 17 00:00:00 2001
From: Ben King <benjaminking@sil.org>
Date: Tue, 28 Oct 2025 14:41:23 -0400
Subject: [PATCH 1/7] Use a weighted average of books for Paratext project
 quote convention detection

---
 machine/punctuation_analysis/__init__.py      |  3 +-
 ...atext_project_quote_convention_detector.py | 58 +++++++++++++++++--
 .../quotation_mark_tabulator.py               | 17 ++++++
 .../punctuation_analysis/quote_convention.py  |  6 ++
 .../quote_convention_analysis.py              | 51 ++++++++++++++++
 .../quote_convention_detector.py              | 30 ++++------
 .../quote_convention_set.py                   | 11 ++++
 ...atext_project_quote_convention_detector.py |  2 +
 ...atext_project_quote_convention_detector.py | 21 +++++++
 9 files changed, 176 insertions(+), 23 deletions(-)
 create mode 100644 machine/punctuation_analysis/quote_convention_analysis.py

diff --git a/machine/punctuation_analysis/__init__.py b/machine/punctuation_analysis/__init__.py
index aa28e3b7..1773cbf2 100644
--- a/machine/punctuation_analysis/__init__.py
+++ b/machine/punctuation_analysis/__init__.py
@@ -32,9 +32,10 @@
 from .quotation_mark_update_settings import QuotationMarkUpdateSettings
 from .quotation_mark_update_strategy import QuotationMarkUpdateStrategy
 from .quote_convention import QuoteConvention, SingleLevelQuoteConvention
+from .quote_convention_analysis import QuoteConventionAnalysis
 from .quote_convention_changing_usfm_update_block_handler import QuoteConventionChangingUsfmUpdateBlockHandler
 from .quote_convention_detection_resolution_settings import QuoteConventionDetectionResolutionSettings
-from .quote_convention_detector import QuoteConventionAnalysis, QuoteConventionDetector
+from .quote_convention_detector import QuoteConventionDetector
 from .quote_convention_set import QuoteConventionSet
 from .standard_quote_conventions import STANDARD_QUOTE_CONVENTIONS
 from .text_segment import TextSegment
diff --git a/machine/punctuation_analysis/paratext_project_quote_convention_detector.py b/machine/punctuation_analysis/paratext_project_quote_convention_detector.py
index 808a717a..c3b73b86 100644
--- a/machine/punctuation_analysis/paratext_project_quote_convention_detector.py
+++ b/machine/punctuation_analysis/paratext_project_quote_convention_detector.py
@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
+from collections import defaultdict
 from typing import BinaryIO, Dict, List, Optional, Union
 
 from ..corpora.paratext_project_settings import ParatextProjectSettings
@@ -6,7 +7,43 @@
 from ..corpora.usfm_parser import parse_usfm
 from ..scripture.canon import book_id_to_number, get_scripture_books
 from ..utils.typeshed import StrPath
-from .quote_convention_detector import QuoteConventionAnalysis, QuoteConventionDetector
+from .quotation_mark_tabulator import QuotationMarkTabulator
+from .quote_convention import QuoteConvention
+from .quote_convention_analysis import QuoteConventionAnalysis
+from .quote_convention_detector import QuoteConventionDetector
+
+
+class WeightedAverageQuoteConventionAnalysisBuilder:
+    def __init__(self) -> None:
+        self._total_weight: float = 0
+        self._convention_votes: Dict[QuoteConvention, float] = defaultdict(float)
+        self._total_tabulated_quotation_marks = QuotationMarkTabulator()
+
+    def record_book_results(
+        self,
+        quote_convention_analysis: Optional[QuoteConventionAnalysis],
+        tabulated_quotation_marks: QuotationMarkTabulator,
+    ) -> None:
+        if quote_convention_analysis is None:
+            return
+
+        self._total_tabulated_quotation_marks.tabulate_from(tabulated_quotation_marks)
+
+        self._total_weight += quote_convention_analysis.weight
+        for convention, score in quote_convention_analysis.get_all_convention_scores():
+            self._convention_votes[convention] += score * quote_convention_analysis.weight
+
+    def to_quote_convention_analysis(self) -> Optional[QuoteConventionAnalysis]:
+        if self._total_weight == 0:
+            return None
+
+        quote_convention_analysis_builder = QuoteConventionAnalysis.Builder(self._total_tabulated_quotation_marks)
+
+        for convention, total_score in self._convention_votes.items():
+            if total_score > 0:
+                quote_convention_analysis_builder.record_convention_score(convention, total_score / self._total_weight)
+
+        return quote_convention_analysis_builder.build()
 
 
 class ParatextProjectQuoteConventionDetector(ABC):
@@ -17,15 +54,20 @@ def __init__(self, settings: Union[ParatextProjectSettings, ParatextProjectSetti
             self._settings = settings
 
     def get_quote_convention_analysis(
-        self, handler: Optional[QuoteConventionDetector] = None, include_chapters: Optional[Dict[int, List[int]]] = None
+        self, include_chapters: Optional[Dict[int, List[int]]] = None
     ) -> Optional[QuoteConventionAnalysis]:
-        handler = QuoteConventionDetector() if handler is None else handler
+
+        weighted_average_quote_convention_analysis_builder = WeightedAverageQuoteConventionAnalysisBuilder()
+
         for book_id in get_scripture_books():
             if include_chapters is not None and book_id_to_number(book_id) not in include_chapters:
                 continue
             file_name: str = self._settings.get_book_file_name(book_id)
             if not self._exists(file_name):
                 continue
+
+            handler = QuoteConventionDetector()
+
             with self._open(file_name) as sfm_file:
                 usfm: str = sfm_file.read().decode(self._settings.encoding)
             try:
@@ -37,7 +79,15 @@ def get_quote_convention_analysis(
                     f". Error: '{e}'"
                 )
                 raise RuntimeError(error_message) from e
-        return handler.detect_quote_convention(include_chapters)
+
+            quote_convention_analysis, tabulated_quotation_marks = (
+                handler.detect_quote_convention_and_get_tabulated_quotation_marks(include_chapters)
+            )
+            weighted_average_quote_convention_analysis_builder.record_book_results(
+                quote_convention_analysis, tabulated_quotation_marks
+            )
+
+        return weighted_average_quote_convention_analysis_builder.to_quote_convention_analysis()
 
     @abstractmethod
     def _exists(self, file_name: StrPath) -> bool: ...
diff --git a/machine/punctuation_analysis/quotation_mark_tabulator.py b/machine/punctuation_analysis/quotation_mark_tabulator.py
index c76ff540..308be297 100644
--- a/machine/punctuation_analysis/quotation_mark_tabulator.py
+++ b/machine/punctuation_analysis/quotation_mark_tabulator.py
@@ -15,6 +15,10 @@ def count_quotation_mark(self, quotation_mark: str) -> None:
         self._quotation_mark_counter.update([quotation_mark])
         self._total_count += 1
 
+    def count_from(self, quotation_mark_counts: "QuotationMarkCounts") -> None:
+        self._quotation_mark_counter.update(quotation_mark_counts._quotation_mark_counter)
+        self._total_count += quotation_mark_counts._total_count
+
     def find_best_quotation_mark_proportion(self) -> tuple[str, int, int]:
         return self._quotation_mark_counter.most_common(1)[0] + (self._total_count,)
 
@@ -36,6 +40,13 @@ def tabulate(self, quotation_marks: list[QuotationMarkMetadata]) -> None:
         for quotation_mark in quotation_marks:
             self._count_quotation_mark(quotation_mark)
 
+    def tabulate_from(self, tabulated_quotation_marks: "QuotationMarkTabulator") -> None:
+        for (
+            depth_and_direction,
+            quotation_mark_counts,
+        ) in tabulated_quotation_marks._quotation_counts_by_depth_and_direction.items():
+            self._quotation_counts_by_depth_and_direction[depth_and_direction].count_from(quotation_mark_counts)
+
     def _count_quotation_mark(self, quotation_mark: QuotationMarkMetadata) -> None:
         key = (quotation_mark.depth, quotation_mark.direction)
         self._quotation_counts_by_depth_and_direction[key].count_quotation_mark(quotation_mark.quotation_mark)
@@ -48,6 +59,12 @@ def _find_most_common_quotation_mark_with_depth_and_direction(
     ) -> tuple[str, int, int]:
         return self._quotation_counts_by_depth_and_direction[(depth, direction)].find_best_quotation_mark_proportion()
 
+    def get_total_quotation_mark_count(self) -> int:
+        total_count = 0
+        for counts in self._quotation_counts_by_depth_and_direction.values():
+            total_count += counts.get_observed_count()
+        return total_count
+
     def calculate_similarity(self, quote_convention: QuoteConvention) -> float:
         weighted_difference = 0
         total_weight = 0
diff --git a/machine/punctuation_analysis/quote_convention.py b/machine/punctuation_analysis/quote_convention.py
index 386cd559..3160894c 100644
--- a/machine/punctuation_analysis/quote_convention.py
+++ b/machine/punctuation_analysis/quote_convention.py
@@ -37,6 +37,9 @@ def normalize(self) -> "SingleLevelQuoteConvention":
         )
         return SingleLevelQuoteConvention(normalized_opening_quotation_mark, normalized_closing_quotation_mark)
 
+    def __hash__(self) -> int:
+        return hash((self.opening_quotation_mark, self.closing_quotation_mark))
+
 
 class QuoteConvention:
     def __init__(self, name: str, level_conventions: list[SingleLevelQuoteConvention]):
@@ -57,6 +60,9 @@ def __eq__(self, value):
                 return False
         return True
 
+    def __hash__(self) -> int:
+        return hash((tuple(self.level_conventions)))
+
     @property
     def name(self) -> str:
         return self._name
diff --git a/machine/punctuation_analysis/quote_convention_analysis.py b/machine/punctuation_analysis/quote_convention_analysis.py
new file mode 100644
index 00000000..c0a8cfa2
--- /dev/null
+++ b/machine/punctuation_analysis/quote_convention_analysis.py
@@ -0,0 +1,51 @@
+from .quotation_mark_tabulator import QuotationMarkTabulator
+from .quote_convention import QuoteConvention
+
+
+class QuoteConventionAnalysis:
+
+    def __init__(
+        self,
+        convention_scores: dict[QuoteConvention, float],
+        tabulated_quotation_marks: QuotationMarkTabulator,
+        analysis_weight: float = 1.0,
+    ):
+        self._convention_scores = convention_scores
+        self._best_quote_convention = max(convention_scores.items(), key=lambda item: item[1])[0]
+        self._best_quote_convention_score = convention_scores[self._best_quote_convention]
+        self._tabulated_quotation_marks = tabulated_quotation_marks
+        self._analysis_weight = analysis_weight
+
+    def get_all_convention_scores(self) -> list[tuple[QuoteConvention, float]]:
+        return list(self._convention_scores.items())
+
+    @property
+    def analysis_summary(self) -> str:
+        return self._tabulated_quotation_marks.get_summary_message()
+
+    @property
+    def best_quote_convention(self) -> QuoteConvention:
+        return self._best_quote_convention
+
+    @property
+    def best_quote_convention_score(self) -> float:
+        return self._best_quote_convention_score
+
+    @property
+    def weight(self) -> float:
+        return self._analysis_weight
+
+    class Builder:
+        def __init__(self, tabulated_quotation_marks: QuotationMarkTabulator):
+            self._convention_scores: dict[QuoteConvention, float] = {}
+            self._tabulated_quotation_marks = tabulated_quotation_marks
+
+        def record_convention_score(self, quote_convention: QuoteConvention, score: float) -> None:
+            self._convention_scores[quote_convention] = score
+
+        def build(self) -> "QuoteConventionAnalysis":
+            return QuoteConventionAnalysis(
+                self._convention_scores,
+                self._tabulated_quotation_marks,
+                self._tabulated_quotation_marks.get_total_quotation_mark_count(),
+            )
diff --git a/machine/punctuation_analysis/quote_convention_detector.py b/machine/punctuation_analysis/quote_convention_detector.py
index c37e8135..17286cd1 100644
--- a/machine/punctuation_analysis/quote_convention_detector.py
+++ b/machine/punctuation_analysis/quote_convention_detector.py
@@ -1,5 +1,4 @@
-from dataclasses import dataclass
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Tuple
 
 from .chapter import Chapter
 from .depth_based_quotation_mark_resolver import DepthBasedQuotationMarkResolver
@@ -8,20 +7,13 @@
 from .quotation_mark_metadata import QuotationMarkMetadata
 from .quotation_mark_string_match import QuotationMarkStringMatch
 from .quotation_mark_tabulator import QuotationMarkTabulator
-from .quote_convention import QuoteConvention
+from .quote_convention_analysis import QuoteConventionAnalysis
 from .quote_convention_detection_resolution_settings import QuoteConventionDetectionResolutionSettings
 from .quote_convention_set import QuoteConventionSet
 from .standard_quote_conventions import STANDARD_QUOTE_CONVENTIONS
 from .usfm_structure_extractor import UsfmStructureExtractor
 
 
-@dataclass(frozen=True)
-class QuoteConventionAnalysis:
-    best_quote_convention: QuoteConvention
-    best_quote_convention_score: float
-    analysis_summary: str
-
-
 class QuoteConventionDetector(UsfmStructureExtractor):
 
     def __init__(self):
@@ -56,12 +48,14 @@ def detect_quote_convention(
     ) -> Optional[QuoteConventionAnalysis]:
         self._count_quotation_marks_in_chapters(self.get_chapters(include_chapters))
 
-        (best_quote_convention, score) = STANDARD_QUOTE_CONVENTIONS.find_most_similar_convention(
-            self._quotation_mark_tabulator
-        )
+        return STANDARD_QUOTE_CONVENTIONS.score_all_quote_conventions(self._quotation_mark_tabulator)
 
-        if score > 0 and best_quote_convention is not None:
-            return QuoteConventionAnalysis(
-                best_quote_convention, score, self._quotation_mark_tabulator.get_summary_message()
-            )
-        return None
+    def detect_quote_convention_and_get_tabulated_quotation_marks(
+        self, include_chapters: Optional[Dict[int, List[int]]] = None
+    ) -> Tuple[Optional[QuoteConventionAnalysis], QuotationMarkTabulator]:
+        self._count_quotation_marks_in_chapters(self.get_chapters(include_chapters))
+
+        return (
+            STANDARD_QUOTE_CONVENTIONS.score_all_quote_conventions(self._quotation_mark_tabulator),
+            self._quotation_mark_tabulator,
+        )
diff --git a/machine/punctuation_analysis/quote_convention_set.py b/machine/punctuation_analysis/quote_convention_set.py
index bef15639..f4c99f22 100644
--- a/machine/punctuation_analysis/quote_convention_set.py
+++ b/machine/punctuation_analysis/quote_convention_set.py
@@ -7,6 +7,7 @@
 from .quotation_mark_direction import QuotationMarkDirection
 from .quotation_mark_tabulator import QuotationMarkTabulator
 from .quote_convention import QuoteConvention
+from .quote_convention_analysis import QuoteConventionAnalysis
 
 
 class QuoteConventionSet:
@@ -149,3 +150,13 @@ def find_most_similar_convention(
                 best_quote_convention = quote_convention
 
         return (best_quote_convention, best_similarity)
+
+    def score_all_quote_conventions(
+        self, tabulated_quotation_marks: QuotationMarkTabulator
+    ) -> Optional[QuoteConventionAnalysis]:
+        quote_convention_analysis_builder = QuoteConventionAnalysis.Builder(tabulated_quotation_marks)
+        for quote_convention in self._conventions:
+            score = tabulated_quotation_marks.calculate_similarity(quote_convention)
+            quote_convention_analysis_builder.record_convention_score(quote_convention, score)
+
+        return quote_convention_analysis_builder.build()
diff --git a/machine/punctuation_analysis/zip_paratext_project_quote_convention_detector.py b/machine/punctuation_analysis/zip_paratext_project_quote_convention_detector.py
index c0ccc90d..e51b2754 100644
--- a/machine/punctuation_analysis/zip_paratext_project_quote_convention_detector.py
+++ b/machine/punctuation_analysis/zip_paratext_project_quote_convention_detector.py
@@ -2,12 +2,14 @@
 from typing import BinaryIO, Optional
 from zipfile import ZipFile
 
+from ..corpora.zip_paratext_project_settings_parser import ZipParatextProjectSettingsParser
 from .paratext_project_quote_convention_detector import ParatextProjectQuoteConventionDetector
 
 
 class ZipParatextProjectQuoteConventionDetector(ParatextProjectQuoteConventionDetector):
     def __init__(self, archive: ZipFile) -> None:
         self._archive = archive
+        super().__init__(ZipParatextProjectSettingsParser(archive))
 
     def _exists(self, file_name: str) -> bool:
         return file_name in self._archive.namelist()
diff --git a/tests/punctuation_analysis/test_paratext_project_quote_convention_detector.py b/tests/punctuation_analysis/test_paratext_project_quote_convention_detector.py
index 36e0f9fb..64c74d44 100644
--- a/tests/punctuation_analysis/test_paratext_project_quote_convention_detector.py
+++ b/tests/punctuation_analysis/test_paratext_project_quote_convention_detector.py
@@ -90,6 +90,27 @@ def test_get_quote_convention_invalid_book_code() -> None:
     assert analysis is None
 
 
+def test_get_quote_convention_weighted_average_of_multiple_books() -> None:
+    env = _TestEnvironment(
+        files={
+            "41MATTest.SFM": rf"""\id MAT
+{get_test_chapter(1, standard_english_quote_convention)}""",
+            "42MRKTest.SFM": r"""\id MRK
+\c 1
+\v 1 This "sentence uses a different" convention""",
+        }
+    )
+    analysis: Optional[QuoteConventionAnalysis] = env.get_quote_convention()
+    assert analysis is not None
+    assert analysis.best_quote_convention.name == "standard_english"
+    assert analysis.best_quote_convention_score > 0.8
+    assert analysis.best_quote_convention_score < 0.9
+    assert (
+        analysis.analysis_summary
+        == "The most common level 1 quotation marks are “ (5 of 6 opening marks) and ” (5 of 6 closing marks)"
+    )
+
+
 class _TestEnvironment:
     def __init__(
         self,

From d8829101181d9cd292572f1d21d135aa272a001c Mon Sep 17 00:00:00 2001
From: Ben King <benjaminking@sil.org>
Date: Thu, 30 Oct 2025 09:12:27 -0400
Subject: [PATCH 2/7] Always return a QuoteConventionAnalysis instead of None

---
 ...atext_project_quote_convention_detector.py | 22 +++---
 .../quote_convention_analysis.py              | 16 ++++-
 .../quote_convention_detector.py              |  4 +-
 .../quote_convention_set.py                   |  4 +-
 ...atext_project_quote_convention_detector.py | 18 +++--
 .../test_quote_convention_detector.py         | 70 ++++++++++++-------
 6 files changed, 84 insertions(+), 50 deletions(-)

diff --git a/machine/punctuation_analysis/paratext_project_quote_convention_detector.py b/machine/punctuation_analysis/paratext_project_quote_convention_detector.py
index c3b73b86..89ec698e 100644
--- a/machine/punctuation_analysis/paratext_project_quote_convention_detector.py
+++ b/machine/punctuation_analysis/paratext_project_quote_convention_detector.py
@@ -16,7 +16,8 @@
 class WeightedAverageQuoteConventionAnalysisBuilder:
     def __init__(self) -> None:
         self._total_weight: float = 0
-        self._convention_votes: Dict[QuoteConvention, float] = defaultdict(float)
+        self._convention_votes: Dict[str, float] = defaultdict(float)
+        self._quote_conventions_by_name: Dict[str, QuoteConvention] = {}
         self._total_tabulated_quotation_marks = QuotationMarkTabulator()
 
     def record_book_results(
@@ -24,24 +25,25 @@ def record_book_results(
         quote_convention_analysis: Optional[QuoteConventionAnalysis],
         tabulated_quotation_marks: QuotationMarkTabulator,
     ) -> None:
-        if quote_convention_analysis is None:
+        if quote_convention_analysis is None or quote_convention_analysis.weight == 0:
             return
 
         self._total_tabulated_quotation_marks.tabulate_from(tabulated_quotation_marks)
 
         self._total_weight += quote_convention_analysis.weight
         for convention, score in quote_convention_analysis.get_all_convention_scores():
-            self._convention_votes[convention] += score * quote_convention_analysis.weight
-
-    def to_quote_convention_analysis(self) -> Optional[QuoteConventionAnalysis]:
-        if self._total_weight == 0:
-            return None
+            if convention.name not in self._quote_conventions_by_name:
+                self._quote_conventions_by_name[convention.name] = convention
+            self._convention_votes[convention.name] += score * quote_convention_analysis.weight
 
+    def to_quote_convention_analysis(self) -> QuoteConventionAnalysis:
         quote_convention_analysis_builder = QuoteConventionAnalysis.Builder(self._total_tabulated_quotation_marks)
 
-        for convention, total_score in self._convention_votes.items():
+        for convention_name, total_score in self._convention_votes.items():
             if total_score > 0:
-                quote_convention_analysis_builder.record_convention_score(convention, total_score / self._total_weight)
+                quote_convention_analysis_builder.record_convention_score(
+                    self._quote_conventions_by_name[convention_name], total_score / self._total_weight
+                )
 
         return quote_convention_analysis_builder.build()
 
@@ -55,7 +57,7 @@ def __init__(self, settings: Union[ParatextProjectSettings, ParatextProjectSetti
 
     def get_quote_convention_analysis(
         self, include_chapters: Optional[Dict[int, List[int]]] = None
-    ) -> Optional[QuoteConventionAnalysis]:
+    ) -> QuoteConventionAnalysis:
 
         weighted_average_quote_convention_analysis_builder = WeightedAverageQuoteConventionAnalysisBuilder()
 
diff --git a/machine/punctuation_analysis/quote_convention_analysis.py b/machine/punctuation_analysis/quote_convention_analysis.py
index c0a8cfa2..ae637b84 100644
--- a/machine/punctuation_analysis/quote_convention_analysis.py
+++ b/machine/punctuation_analysis/quote_convention_analysis.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from .quotation_mark_tabulator import QuotationMarkTabulator
 from .quote_convention import QuoteConvention
 
@@ -11,8 +13,16 @@ def __init__(
         analysis_weight: float = 1.0,
     ):
         self._convention_scores = convention_scores
-        self._best_quote_convention = max(convention_scores.items(), key=lambda item: item[1])[0]
-        self._best_quote_convention_score = convention_scores[self._best_quote_convention]
+        if len(convention_scores) > 0:
+            self._best_quote_convention_score = max(convention_scores.items(), key=lambda item: item[1])[1]
+        else:
+            self._best_quote_convention_score = 0
+
+        if self._best_quote_convention_score > 0:
+            self._best_quote_convention = max(convention_scores.items(), key=lambda item: item[1])[0]
+        else:
+            self._best_quote_convention = None
+
         self._tabulated_quotation_marks = tabulated_quotation_marks
         self._analysis_weight = analysis_weight
 
@@ -24,7 +34,7 @@ def analysis_summary(self) -> str:
         return self._tabulated_quotation_marks.get_summary_message()
 
     @property
-    def best_quote_convention(self) -> QuoteConvention:
+    def best_quote_convention(self) -> Optional[QuoteConvention]:
         return self._best_quote_convention
 
     @property
diff --git a/machine/punctuation_analysis/quote_convention_detector.py b/machine/punctuation_analysis/quote_convention_detector.py
index 17286cd1..21f2b827 100644
--- a/machine/punctuation_analysis/quote_convention_detector.py
+++ b/machine/punctuation_analysis/quote_convention_detector.py
@@ -45,14 +45,14 @@ def _count_quotation_marks_in_chapter(
 
     def detect_quote_convention(
         self, include_chapters: Optional[Dict[int, List[int]]] = None
-    ) -> Optional[QuoteConventionAnalysis]:
+    ) -> QuoteConventionAnalysis:
         self._count_quotation_marks_in_chapters(self.get_chapters(include_chapters))
 
         return STANDARD_QUOTE_CONVENTIONS.score_all_quote_conventions(self._quotation_mark_tabulator)
 
     def detect_quote_convention_and_get_tabulated_quotation_marks(
         self, include_chapters: Optional[Dict[int, List[int]]] = None
-    ) -> Tuple[Optional[QuoteConventionAnalysis], QuotationMarkTabulator]:
+    ) -> Tuple[QuoteConventionAnalysis, QuotationMarkTabulator]:
         self._count_quotation_marks_in_chapters(self.get_chapters(include_chapters))
 
         return (
diff --git a/machine/punctuation_analysis/quote_convention_set.py b/machine/punctuation_analysis/quote_convention_set.py
index f4c99f22..cddb6f2e 100644
--- a/machine/punctuation_analysis/quote_convention_set.py
+++ b/machine/punctuation_analysis/quote_convention_set.py
@@ -151,9 +151,7 @@ def find_most_similar_convention(
 
         return (best_quote_convention, best_similarity)
 
-    def score_all_quote_conventions(
-        self, tabulated_quotation_marks: QuotationMarkTabulator
-    ) -> Optional[QuoteConventionAnalysis]:
+    def score_all_quote_conventions(self, tabulated_quotation_marks: QuotationMarkTabulator) -> QuoteConventionAnalysis:
         quote_convention_analysis_builder = QuoteConventionAnalysis.Builder(tabulated_quotation_marks)
         for quote_convention in self._conventions:
             score = tabulated_quotation_marks.calculate_similarity(quote_convention)
diff --git a/tests/punctuation_analysis/test_paratext_project_quote_convention_detector.py b/tests/punctuation_analysis/test_paratext_project_quote_convention_detector.py
index 64c74d44..7977b6b7 100644
--- a/tests/punctuation_analysis/test_paratext_project_quote_convention_detector.py
+++ b/tests/punctuation_analysis/test_paratext_project_quote_convention_detector.py
@@ -27,7 +27,8 @@ def test_get_quote_convention() -> None:
         }
     )
     analysis: Optional[QuoteConventionAnalysis] = env.get_quote_convention()
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention_score > 0.8
     assert analysis.best_quote_convention.name == "standard_english"
 
@@ -42,7 +43,8 @@ def test_get_quote_convention_by_book() -> None:
         }
     )
     analysis: Optional[QuoteConventionAnalysis] = env.get_quote_convention("MRK")
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention_score > 0.8
     assert analysis.best_quote_convention.name == "standard_french"
 
@@ -61,7 +63,8 @@ def test_get_quote_convention_by_chapter() -> None:
         }
     )
     analysis: Optional[QuoteConventionAnalysis] = env.get_quote_convention("MRK2,4-5")
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention_score > 0.66
     assert analysis.best_quote_convention.name == "standard_french"
 
@@ -76,7 +79,7 @@ def test_get_quote_convention_by_chapter_indeterminate() -> None:
         }
     )
     analysis: Optional[QuoteConventionAnalysis] = env.get_quote_convention("MAT1,3")
-    assert analysis is None
+    assert analysis.best_quote_convention is None
 
 
 def test_get_quote_convention_invalid_book_code() -> None:
@@ -87,7 +90,7 @@ def test_get_quote_convention_invalid_book_code() -> None:
         }
     )
     analysis: Optional[QuoteConventionAnalysis] = env.get_quote_convention("MAT")
-    assert analysis is None
+    assert analysis.best_quote_convention is None
 
 
 def test_get_quote_convention_weighted_average_of_multiple_books() -> None:
@@ -101,7 +104,8 @@ def test_get_quote_convention_weighted_average_of_multiple_books() -> None:
         }
     )
     analysis: Optional[QuoteConventionAnalysis] = env.get_quote_convention()
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "standard_english"
     assert analysis.best_quote_convention_score > 0.8
     assert analysis.best_quote_convention_score < 0.9
@@ -125,7 +129,7 @@ def __init__(
     def detector(self) -> ParatextProjectQuoteConventionDetector:
         return self._detector
 
-    def get_quote_convention(self, scripture_range: Optional[str] = None) -> Optional[QuoteConventionAnalysis]:
+    def get_quote_convention(self, scripture_range: Optional[str] = None) -> QuoteConventionAnalysis:
         chapters: Optional[Dict[int, List[int]]] = None
         if scripture_range is not None:
             chapters = get_chapters(scripture_range, ORIGINAL_VERSIFICATION)
diff --git a/tests/punctuation_analysis/test_quote_convention_detector.py b/tests/punctuation_analysis/test_quote_convention_detector.py
index 44ff3b09..6f34abb3 100644
--- a/tests/punctuation_analysis/test_quote_convention_detector.py
+++ b/tests/punctuation_analysis/test_quote_convention_detector.py
@@ -1,5 +1,3 @@
-from typing import Union
-
 from machine.corpora import parse_usfm
 from machine.punctuation_analysis import QuoteConventionAnalysis, QuoteConventionDetector
 
@@ -15,7 +13,8 @@ def test_standard_english() -> None:
     ‘You shall not eat of any tree of the garden’?”
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "standard_english"
 
 
@@ -28,7 +27,8 @@ def test_typewriter_english() -> None:
     'You shall not eat of any tree of the garden'?\"
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "typewriter_english"
 
 
@@ -41,7 +41,8 @@ def test_british_english() -> None:
     “You shall not eat of any tree of the garden”?’
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "british_english"
 
 
@@ -54,7 +55,8 @@ def test_british_typewriter_english() -> None:
     \"You shall not eat of any tree of the garden\"?'
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "british_typewriter_english"
 
 
@@ -67,7 +69,8 @@ def test_hybrid_typewriter_english() -> None:
     'You shall not eat of any tree of the garden'?”
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "hybrid_typewriter_english"
 
 
@@ -80,7 +83,8 @@ def test_standard_french() -> None:
     ‹You shall not eat of any tree of the garden›?»
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "standard_french"
 
 
@@ -93,7 +97,8 @@ def test_typewriter_french() -> None:
     <You shall not eat of any tree of the garden>?>>
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "typewriter_french"
 
 
@@ -109,7 +114,8 @@ def test_western_european() -> None:
     “You shall not eat of any tree of the garden”?»
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "western_european"
 
 
@@ -122,7 +128,8 @@ def test_british_inspired_western_european() -> None:
     ‘You shall not eat of any tree of the garden’?»
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "british_inspired_western_european"
 
 
@@ -135,7 +142,8 @@ def test_typewriter_western_european() -> None:
     "You shall not eat of any tree of the garden"?>>
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "typewriter_western_european"
 
 
@@ -148,7 +156,8 @@ def test_typewriter_western_european_variant() -> None:
     <You shall not eat of any tree of the garden>?"
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "typewriter_western_european_variant"
 
 
@@ -161,7 +170,8 @@ def test_hybrid_typewriter_western_european() -> None:
     "You shall not eat of any tree of the garden"?»
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "hybrid_typewriter_western_european"
 
 
@@ -174,7 +184,8 @@ def test_hybrid_british_typewriter_western_european() -> None:
     'You shall not eat of any tree of the garden'?»
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "hybrid_british_typewriter_western_european"
 
 
@@ -187,7 +198,8 @@ def test_central_european() -> None:
     ‚You shall not eat of any tree of the garden‘?“
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "central_european"
 
 
@@ -200,7 +212,8 @@ def test_central_european_guillemets() -> None:
     ›You shall not eat of any tree of the garden‹?«
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "central_european_guillemets"
 
 
@@ -213,7 +226,8 @@ def test_standard_swedish() -> None:
     ’You shall not eat of any tree of the garden’?”
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "standard_swedish"
 
 
@@ -226,7 +240,8 @@ def test_standard_finnish() -> None:
     ’You shall not eat of any tree of the garden’?»
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "standard_finnish"
 
 
@@ -239,7 +254,8 @@ def test_eastern_european() -> None:
     ‚You shall not eat of any tree of the garden’?”
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "eastern_european"
 
 
@@ -252,7 +268,8 @@ def test_standard_russian() -> None:
     „You shall not eat of any tree of the garden“?»
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "standard_russian"
 
 
@@ -265,7 +282,8 @@ def test_standard_arabic() -> None:
     ’You shall not eat of any tree of the garden‘?“
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "standard_arabic"
 
 
@@ -278,7 +296,8 @@ def test_non_standard_arabic() -> None:
     ’You shall not eat of any tree of the garden‘?»
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "non-standard_arabic"
 
 
@@ -295,11 +314,12 @@ def test_mismatched_quotation_marks() -> None:
     God has said, ‘You shall not eat of it. You shall not touch it, lest you die.’
     """
     analysis = detect_quote_convention(usfm)
-    assert analysis is not None
+
+    assert analysis.best_quote_convention is not None
     assert analysis.best_quote_convention.name == "standard_english"
 
 
-def detect_quote_convention(usfm: str) -> Union[QuoteConventionAnalysis, None]:
+def detect_quote_convention(usfm: str) -> QuoteConventionAnalysis:
     quote_convention_detector = QuoteConventionDetector()
     parse_usfm(usfm, quote_convention_detector)
     return quote_convention_detector.detect_quote_convention()

From e4ea5f7c5cd676b9739c24afc0495a9ebbde8ae9 Mon Sep 17 00:00:00 2001
From: Ben King <benjaminking@sil.org>
Date: Thu, 30 Oct 2025 10:47:25 -0400
Subject: [PATCH 3/7] Modify quote convention similarity calculation

---
 .../quotation_mark_tabulator.py               | 34 ++++++++++++-------
 .../test_quotation_mark_tabulator.py          | 15 ++++----
 .../test_quote_convention_set.py              |  8 ++---
 3 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/machine/punctuation_analysis/quotation_mark_tabulator.py b/machine/punctuation_analysis/quotation_mark_tabulator.py
index 308be297..080a95b7 100644
--- a/machine/punctuation_analysis/quotation_mark_tabulator.py
+++ b/machine/punctuation_analysis/quotation_mark_tabulator.py
@@ -1,5 +1,5 @@
 from collections import Counter, defaultdict
-from typing import List
+from typing import Dict, List
 
 from .quotation_mark_direction import QuotationMarkDirection
 from .quotation_mark_metadata import QuotationMarkMetadata
@@ -66,22 +66,32 @@ def get_total_quotation_mark_count(self) -> int:
         return total_count
 
     def calculate_similarity(self, quote_convention: QuoteConvention) -> float:
-        weighted_difference = 0
-        total_weight = 0
-        for depth, direction in self._quotation_counts_by_depth_and_direction:
+        num_marks_by_depth: Dict[int, int] = defaultdict(int)
+        num_matching_marks_by_depth: Dict[int, int] = defaultdict(int)
+
+        for depth, direction in sorted(self._quotation_counts_by_depth_and_direction, key=lambda item: item[0]):
             expected_quotation_mark: str = quote_convention.get_expected_quotation_mark(depth, direction)
 
-            # Give higher weight to shallower depths, since deeper marks are more likely to be mistakes
-            weighted_difference += self._quotation_counts_by_depth_and_direction[
-                (depth, direction)
-            ].calculate_num_differences(expected_quotation_mark) * 2 ** (-depth)
-            total_weight += self._quotation_counts_by_depth_and_direction[
+            num_matching_marks = self._quotation_counts_by_depth_and_direction[(depth, direction)].get_observed_count()
+            num_marks_by_depth[depth] += num_matching_marks
+            num_matching_marks_by_depth[depth] += num_matching_marks - self._quotation_counts_by_depth_and_direction[
                 (depth, direction)
-            ].get_observed_count() * 2 ** (-depth)
+            ].calculate_num_differences(expected_quotation_mark)
+
+        # The scores of greater depths depend on the scores of shallower depths
+        scores_by_depth: Dict[int, float] = defaultdict(float)
+        for depth in sorted(num_marks_by_depth.keys()):
+            previous_depth_score = (
+                scores_by_depth[depth - 1] / num_marks_by_depth[depth - 1] if depth - 1 in scores_by_depth else 1
+            )
+            scores_by_depth[depth] = previous_depth_score * num_matching_marks_by_depth[depth]
+
+        total_marks = sum(num_marks_by_depth.values())
+        total_score = sum(scores_by_depth.values())
 
-        if total_weight == 0:
+        if total_marks == 0:
             return 0
-        return 1 - (weighted_difference / total_weight)
+        return total_score / total_marks
 
     def get_summary_message(self) -> str:
         message_lines: List[str] = []
diff --git a/tests/punctuation_analysis/test_quotation_mark_tabulator.py b/tests/punctuation_analysis/test_quotation_mark_tabulator.py
index 80c17ea2..e7e8cfbf 100644
--- a/tests/punctuation_analysis/test_quotation_mark_tabulator.py
+++ b/tests/punctuation_analysis/test_quotation_mark_tabulator.py
@@ -118,7 +118,7 @@ def test_calculate_similarity() -> None:
     )
     assert two_level_quotation_mark_tabulator.calculate_similarity(
         QuoteConvention("", [SingleLevelQuoteConvention("\u201c", "\u201d")])
-    ) == approx(0.66666666666667, rel=1e-9)
+    ) == approx(0.5, rel=1e-9)
     assert (
         two_level_quotation_mark_tabulator.calculate_similarity(
             QuoteConvention(
@@ -131,9 +131,12 @@ def test_calculate_similarity() -> None:
         QuoteConvention(
             "", [SingleLevelQuoteConvention("\u201c", "\u201d"), SingleLevelQuoteConvention("\u00ab", "\u00bb")]
         )
-    ) == approx(0.66666666666667, rel=1e-9)
-    assert two_level_quotation_mark_tabulator.calculate_similarity(
-        QuoteConvention(
-            "", [SingleLevelQuoteConvention("\u2018", "\u2019"), SingleLevelQuoteConvention("\u2018", "\u2019")]
+    ) == approx(0.5, rel=1e-9)
+    assert (
+        two_level_quotation_mark_tabulator.calculate_similarity(
+            QuoteConvention(
+                "", [SingleLevelQuoteConvention("\u2018", "\u2019"), SingleLevelQuoteConvention("\u2018", "\u2019")]
+            )
         )
-    ) == approx(0.33333333333333, rel=1e-9)
+        == 0.0
+    )
diff --git a/tests/punctuation_analysis/test_quote_convention_set.py b/tests/punctuation_analysis/test_quote_convention_set.py
index a2a87c4d..2ddf165d 100644
--- a/tests/punctuation_analysis/test_quote_convention_set.py
+++ b/tests/punctuation_analysis/test_quote_convention_set.py
@@ -1251,11 +1251,11 @@ def test_find_most_similar_convention() -> None:
     )
     assert all_three_quote_convention_set.find_most_similar_convention(noisy_multiple_english_quotes_tabulator) == (
         standard_english_quote_convention,
-        approx(0.9, rel=1e-9),
+        approx(0.8333333333333, rel=1e-9),
     )
     assert two_french_quote_convention_set.find_most_similar_convention(noisy_multiple_english_quotes_tabulator) == (
         western_european_quote_convention,
-        approx(0.1, rel=1e-9),
+        0,
     )
 
     noisy_multiple_french_quotes_tabulator = QuotationMarkTabulator()
@@ -1273,7 +1273,7 @@ def test_find_most_similar_convention() -> None:
     )
     assert all_three_quote_convention_set.find_most_similar_convention(noisy_multiple_french_quotes_tabulator) == (
         standard_french_quote_convention,
-        approx(0.916666666666, rel=1e-9),
+        approx(0.875, rel=1e-9),
     )
 
     too_deep_english_quotes_tabulator = QuotationMarkTabulator()
@@ -1288,7 +1288,7 @@ def test_find_most_similar_convention() -> None:
     )
     assert all_three_quote_convention_set.find_most_similar_convention(too_deep_english_quotes_tabulator) == (
         standard_english_quote_convention,
-        approx(0.967741935483871, rel=1e-9),
+        approx(0.8, rel=1e-9),
     )
 
     # in case of ties, the earlier convention in the list should be returned

From 3bab6bd2e0f316586d12297646be4290661b67c0 Mon Sep 17 00:00:00 2001
From: Ben King <benjaminking@sil.org>
Date: Thu, 30 Oct 2025 12:01:27 -0400
Subject: [PATCH 4/7] Add new quote convention

---
 .../punctuation_analysis/standard_quote_conventions.py    | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/machine/punctuation_analysis/standard_quote_conventions.py b/machine/punctuation_analysis/standard_quote_conventions.py
index b1292e15..5e69eb75 100644
--- a/machine/punctuation_analysis/standard_quote_conventions.py
+++ b/machine/punctuation_analysis/standard_quote_conventions.py
@@ -189,5 +189,13 @@
                 SingleLevelQuoteConvention("\u2019", "\u2018"),
             ],
         ),
+        QuoteConvention(
+            "arabic_inspired_western_european",
+            [
+                SingleLevelQuoteConvention("\u00ab", "\u00bb"),
+                SingleLevelQuoteConvention("\u201d", "\u201c"),
+                SingleLevelQuoteConvention("\u2019", "\u2018"),
+            ],
+        ),
     ]
 )

From 14274dd66b9a759008835ff61b9a110c4f078e96 Mon Sep 17 00:00:00 2001
From: Ben King <benjaminking@sil.org>
Date: Thu, 30 Oct 2025 14:15:26 -0400
Subject: [PATCH 5/7] Minor code clarity changes

---
 .../paratext_project_quote_convention_detector.py             | 4 ++--
 machine/punctuation_analysis/quote_convention_analysis.py     | 2 +-
 machine/punctuation_analysis/standard_quote_conventions.py    | 1 +
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/machine/punctuation_analysis/paratext_project_quote_convention_detector.py b/machine/punctuation_analysis/paratext_project_quote_convention_detector.py
index 89ec698e..2c79fbcb 100644
--- a/machine/punctuation_analysis/paratext_project_quote_convention_detector.py
+++ b/machine/punctuation_analysis/paratext_project_quote_convention_detector.py
@@ -22,10 +22,10 @@ def __init__(self) -> None:
 
     def record_book_results(
         self,
-        quote_convention_analysis: Optional[QuoteConventionAnalysis],
+        quote_convention_analysis: QuoteConventionAnalysis,
         tabulated_quotation_marks: QuotationMarkTabulator,
     ) -> None:
-        if quote_convention_analysis is None or quote_convention_analysis.weight == 0:
+        if quote_convention_analysis.best_quote_convention is None or quote_convention_analysis.weight == 0:
             return
 
         self._total_tabulated_quotation_marks.tabulate_from(tabulated_quotation_marks)
diff --git a/machine/punctuation_analysis/quote_convention_analysis.py b/machine/punctuation_analysis/quote_convention_analysis.py
index ae637b84..d4765c14 100644
--- a/machine/punctuation_analysis/quote_convention_analysis.py
+++ b/machine/punctuation_analysis/quote_convention_analysis.py
@@ -10,7 +10,7 @@ def __init__(
         self,
         convention_scores: dict[QuoteConvention, float],
         tabulated_quotation_marks: QuotationMarkTabulator,
-        analysis_weight: float = 1.0,
+        analysis_weight: float = 1.0,  # weight is used for combining scores for multiple books
     ):
         self._convention_scores = convention_scores
         if len(convention_scores) > 0:
diff --git a/machine/punctuation_analysis/standard_quote_conventions.py b/machine/punctuation_analysis/standard_quote_conventions.py
index 5e69eb75..fddda562 100644
--- a/machine/punctuation_analysis/standard_quote_conventions.py
+++ b/machine/punctuation_analysis/standard_quote_conventions.py
@@ -187,6 +187,7 @@
             [
                 SingleLevelQuoteConvention("\u00ab", "\u00bb"),
                 SingleLevelQuoteConvention("\u2019", "\u2018"),
+                SingleLevelQuoteConvention("\u201d", "\u201c"),
             ],
         ),
         QuoteConvention(

From afa93ba970a7a7fe7b533dca0caffd156016a3c3 Mon Sep 17 00:00:00 2001
From: Ben King <benjaminking@sil.org>
Date: Thu, 30 Oct 2025 14:45:25 -0400
Subject: [PATCH 6/7] Fix linting issue

---
 tests/corpora/test_usfm_manual.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/tests/corpora/test_usfm_manual.py b/tests/corpora/test_usfm_manual.py
index 6247634f..241ba195 100644
--- a/tests/corpora/test_usfm_manual.py
+++ b/tests/corpora/test_usfm_manual.py
@@ -24,7 +24,7 @@
     ZipParatextProjectSettingsParser,
     ZipParatextProjectTextUpdater,
 )
-from machine.punctuation_analysis import QuoteConventionDetector, ZipParatextProjectQuoteConventionDetector
+from machine.punctuation_analysis import ZipParatextProjectQuoteConventionDetector
 
 
 @pytest.mark.skip(reason="This is for manual testing only. Remove this decorator to run the test.")
@@ -135,18 +135,13 @@ def get_usfm(project_path: Path):
 
 @pytest.mark.skip(reason="This is for manual testing only. Remove this decorator to run the test.")
 def test_analyze_corpora_quote_conventions():
-    source_handler = QuoteConventionDetector()
     source_archive = zipfile.ZipFile(USFM_SOURCE_PROJECT_ZIP_PATH, "r")
     source_quote_convention_detector = ZipParatextProjectQuoteConventionDetector(source_archive)
-    source_quote_convention_detector.get_quote_convention_analysis(source_handler)
+    source_analysis = source_quote_convention_detector.get_quote_convention_analysis()
 
-    target_handler = QuoteConventionDetector()
     target_archive = zipfile.ZipFile(USFM_TARGET_PROJECT_ZIP_PATH, "r")
     target_quote_convention_detector = ZipParatextProjectQuoteConventionDetector(target_archive)
-    target_quote_convention_detector.get_quote_convention_analysis(target_handler)
+    target_analysis = target_quote_convention_detector.get_quote_convention_analysis()
 
-    source_analysis = source_handler.detect_quote_convention()
-    target_analysis = target_handler.detect_quote_convention()
-
-    assert source_analysis is not None
-    assert target_analysis is not None
+    assert source_analysis.best_quote_convention is not None
+    assert target_analysis.best_quote_convention is not None

From 2e699e25bc487135f3c2bdf92946c547fa2ec676 Mon Sep 17 00:00:00 2001
From: Ben King <benjaminking@sil.org>
Date: Thu, 6 Nov 2025 14:47:28 -0500
Subject: [PATCH 7/7] Address reviewer comments + refactor weighted average

---
 ...atext_project_quote_convention_detector.py | 50 ++-----------------
 .../punctuation_analysis/quote_convention.py  |  2 +-
 .../quote_convention_analysis.py              | 44 +++++++++++-----
 .../quote_convention_detector.py              | 12 +----
 4 files changed, 37 insertions(+), 71 deletions(-)

diff --git a/machine/punctuation_analysis/paratext_project_quote_convention_detector.py b/machine/punctuation_analysis/paratext_project_quote_convention_detector.py
index 2c79fbcb..4b46d136 100644
--- a/machine/punctuation_analysis/paratext_project_quote_convention_detector.py
+++ b/machine/punctuation_analysis/paratext_project_quote_convention_detector.py
@@ -1,5 +1,4 @@
 from abc import ABC, abstractmethod
-from collections import defaultdict
 from typing import BinaryIO, Dict, List, Optional, Union
 
 from ..corpora.paratext_project_settings import ParatextProjectSettings
@@ -7,47 +6,10 @@
 from ..corpora.usfm_parser import parse_usfm
 from ..scripture.canon import book_id_to_number, get_scripture_books
 from ..utils.typeshed import StrPath
-from .quotation_mark_tabulator import QuotationMarkTabulator
-from .quote_convention import QuoteConvention
 from .quote_convention_analysis import QuoteConventionAnalysis
 from .quote_convention_detector import QuoteConventionDetector
 
 
-class WeightedAverageQuoteConventionAnalysisBuilder:
-    def __init__(self) -> None:
-        self._total_weight: float = 0
-        self._convention_votes: Dict[str, float] = defaultdict(float)
-        self._quote_conventions_by_name: Dict[str, QuoteConvention] = {}
-        self._total_tabulated_quotation_marks = QuotationMarkTabulator()
-
-    def record_book_results(
-        self,
-        quote_convention_analysis: QuoteConventionAnalysis,
-        tabulated_quotation_marks: QuotationMarkTabulator,
-    ) -> None:
-        if quote_convention_analysis.best_quote_convention is None or quote_convention_analysis.weight == 0:
-            return
-
-        self._total_tabulated_quotation_marks.tabulate_from(tabulated_quotation_marks)
-
-        self._total_weight += quote_convention_analysis.weight
-        for convention, score in quote_convention_analysis.get_all_convention_scores():
-            if convention.name not in self._quote_conventions_by_name:
-                self._quote_conventions_by_name[convention.name] = convention
-            self._convention_votes[convention.name] += score * quote_convention_analysis.weight
-
-    def to_quote_convention_analysis(self) -> QuoteConventionAnalysis:
-        quote_convention_analysis_builder = QuoteConventionAnalysis.Builder(self._total_tabulated_quotation_marks)
-
-        for convention_name, total_score in self._convention_votes.items():
-            if total_score > 0:
-                quote_convention_analysis_builder.record_convention_score(
-                    self._quote_conventions_by_name[convention_name], total_score / self._total_weight
-                )
-
-        return quote_convention_analysis_builder.build()
-
-
 class ParatextProjectQuoteConventionDetector(ABC):
     def __init__(self, settings: Union[ParatextProjectSettings, ParatextProjectSettingsParserBase]) -> None:
         if isinstance(settings, ParatextProjectSettingsParserBase):
@@ -59,7 +21,7 @@ def get_quote_convention_analysis(
         self, include_chapters: Optional[Dict[int, List[int]]] = None
     ) -> QuoteConventionAnalysis:
 
-        weighted_average_quote_convention_analysis_builder = WeightedAverageQuoteConventionAnalysisBuilder()
+        book_quote_convention_analyses: List[QuoteConventionAnalysis] = []
 
         for book_id in get_scripture_books():
             if include_chapters is not None and book_id_to_number(book_id) not in include_chapters:
@@ -82,14 +44,10 @@ def get_quote_convention_analysis(
                 )
                 raise RuntimeError(error_message) from e
 
-            quote_convention_analysis, tabulated_quotation_marks = (
-                handler.detect_quote_convention_and_get_tabulated_quotation_marks(include_chapters)
-            )
-            weighted_average_quote_convention_analysis_builder.record_book_results(
-                quote_convention_analysis, tabulated_quotation_marks
-            )
+            quote_convention_analysis = handler.detect_quote_convention(include_chapters)
+            book_quote_convention_analyses.append(quote_convention_analysis)
 
-        return weighted_average_quote_convention_analysis_builder.to_quote_convention_analysis()
+        return QuoteConventionAnalysis.combine_with_weighted_average(book_quote_convention_analyses)
 
     @abstractmethod
     def _exists(self, file_name: StrPath) -> bool: ...
diff --git a/machine/punctuation_analysis/quote_convention.py b/machine/punctuation_analysis/quote_convention.py
index 3160894c..9dda42e3 100644
--- a/machine/punctuation_analysis/quote_convention.py
+++ b/machine/punctuation_analysis/quote_convention.py
@@ -61,7 +61,7 @@ def __eq__(self, value):
         return True
 
     def __hash__(self) -> int:
-        return hash((tuple(self.level_conventions)))
+        return hash(tuple(self.level_conventions))
 
     @property
     def name(self) -> str:
diff --git a/machine/punctuation_analysis/quote_convention_analysis.py b/machine/punctuation_analysis/quote_convention_analysis.py
index d4765c14..1de547fc 100644
--- a/machine/punctuation_analysis/quote_convention_analysis.py
+++ b/machine/punctuation_analysis/quote_convention_analysis.py
@@ -1,4 +1,5 @@
-from typing import Optional
+from collections import defaultdict
+from typing import Dict, List, Optional
 
 from .quotation_mark_tabulator import QuotationMarkTabulator
 from .quote_convention import QuoteConvention
@@ -14,21 +15,16 @@ def __init__(
     ):
         self._convention_scores = convention_scores
         if len(convention_scores) > 0:
-            self._best_quote_convention_score = max(convention_scores.items(), key=lambda item: item[1])[1]
+            (self._best_quote_convention, self._best_quote_convention_score) = max(
+                convention_scores.items(), key=lambda item: item[1]
+            )
         else:
             self._best_quote_convention_score = 0
-
-        if self._best_quote_convention_score > 0:
-            self._best_quote_convention = max(convention_scores.items(), key=lambda item: item[1])[0]
-        else:
             self._best_quote_convention = None
 
         self._tabulated_quotation_marks = tabulated_quotation_marks
         self._analysis_weight = analysis_weight
 
-    def get_all_convention_scores(self) -> list[tuple[QuoteConvention, float]]:
-        return list(self._convention_scores.items())
-
     @property
     def analysis_summary(self) -> str:
         return self._tabulated_quotation_marks.get_summary_message()
@@ -41,10 +37,6 @@ def best_quote_convention(self) -> Optional[QuoteConvention]:
     def best_quote_convention_score(self) -> float:
         return self._best_quote_convention_score
 
-    @property
-    def weight(self) -> float:
-        return self._analysis_weight
-
     class Builder:
         def __init__(self, tabulated_quotation_marks: QuotationMarkTabulator):
             self._convention_scores: dict[QuoteConvention, float] = {}
@@ -59,3 +51,29 @@ def build(self) -> "QuoteConventionAnalysis":
                 self._tabulated_quotation_marks,
                 self._tabulated_quotation_marks.get_total_quotation_mark_count(),
             )
+
+    @staticmethod
+    def combine_with_weighted_average(
+        quote_convention_analyses: List["QuoteConventionAnalysis"],
+    ) -> "QuoteConventionAnalysis":
+        total_weight: float = 0
+        convention_votes: Dict[str, float] = defaultdict(float)
+        quote_conventions_by_name: Dict[str, QuoteConvention] = {}
+        total_tabulated_quotation_marks = QuotationMarkTabulator()
+        for quote_convention_analysis in quote_convention_analyses:
+            total_tabulated_quotation_marks.tabulate_from(quote_convention_analysis._tabulated_quotation_marks)
+            total_weight += quote_convention_analysis._analysis_weight
+            for convention, score in quote_convention_analysis._convention_scores.items():
+                if convention.name not in quote_conventions_by_name:
+                    quote_conventions_by_name[convention.name] = convention
+                convention_votes[convention.name] += score * quote_convention_analysis._analysis_weight
+
+        quote_convention_analysis_builder = QuoteConventionAnalysis.Builder(total_tabulated_quotation_marks)
+
+        for convention_name, total_score in convention_votes.items():
+            if total_score > 0:
+                quote_convention_analysis_builder.record_convention_score(
+                    quote_conventions_by_name[convention_name], total_score / total_weight
+                )
+
+        return quote_convention_analysis_builder.build()
diff --git a/machine/punctuation_analysis/quote_convention_detector.py b/machine/punctuation_analysis/quote_convention_detector.py
index 21f2b827..ca1662d9 100644
--- a/machine/punctuation_analysis/quote_convention_detector.py
+++ b/machine/punctuation_analysis/quote_convention_detector.py
@@ -1,4 +1,4 @@
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional
 
 from .chapter import Chapter
 from .depth_based_quotation_mark_resolver import DepthBasedQuotationMarkResolver
@@ -49,13 +49,3 @@ def detect_quote_convention(
         self._count_quotation_marks_in_chapters(self.get_chapters(include_chapters))
 
         return STANDARD_QUOTE_CONVENTIONS.score_all_quote_conventions(self._quotation_mark_tabulator)
-
-    def detect_quote_convention_and_get_tabulated_quotation_marks(
-        self, include_chapters: Optional[Dict[int, List[int]]] = None
-    ) -> Tuple[QuoteConventionAnalysis, QuotationMarkTabulator]:
-        self._count_quotation_marks_in_chapters(self.get_chapters(include_chapters))
-
-        return (
-            STANDARD_QUOTE_CONVENTIONS.score_all_quote_conventions(self._quotation_mark_tabulator),
-            self._quotation_mark_tabulator,
-        )