From 1995f538e973be2beb340f375e8f753b0e3284a6 Mon Sep 17 00:00:00 2001
From: Lukasz Kolodziejczyk <lukasz.kolodziejczyk@mostly.ai>
Date: Wed, 27 Nov 2024 15:16:01 +0100
Subject: [PATCH 01/12] wip

---
 src/mostlyai/qa/common.py | 29 +++++++++++++++++++----------
 src/mostlyai/qa/report.py | 33 +++++++++++++++++++--------------
 2 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/src/mostlyai/qa/common.py b/src/mostlyai/qa/common.py
index f6ad52e..a3e3699 100644
--- a/src/mostlyai/qa/common.py
+++ b/src/mostlyai/qa/common.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import logging
-from typing import Protocol
+from functools import partial
+from typing import Protocol, Callable
 
 import pandas as pd
+from rich.progress import Progress
 from tqdm.auto import tqdm
 
 from mostlyai.qa.filesystem import Statistics
@@ -73,18 +75,25 @@ class PrerequisiteNotMetError(Exception):
 
 
 class ProgressCallback(Protocol):
-    def __call__(self, current: int, total: int) -> None: ...
-
+    def __call__(
+        self, total: float | None = None, completed: float | None = None, advance: float | None = None, **kwargs
+    ) -> None: ...
+
+def wrap_progress_callback(update_progress: ProgressCallback | None = None, **kwargs) -> tuple[ProgressCallback, Callable]:
+    if not update_progress:
+        rich_progress = Progress()
+        rich_progress.start()
+        task_id = rich_progress.add_task(**kwargs)
+        update_progress = partial(rich_progress.update, task_id=task_id)
+    else:
+        rich_progress = None
 
-def add_tqdm(on_progress: ProgressCallback | None = None, description: str = "Processing") -> ProgressCallback:
-    pbar = tqdm(desc=description, total=100)
+    def teardown_wrapped_progress_callback(*args, **kwargs):
+        if rich_progress:
+            rich_progress.stop()
 
-    def _on_progress(current: int, total: int):
-        if on_progress is not None:
-            on_progress(current, total)
-        pbar.update(current - pbar.n)
+    return update_progress, teardown_wrapped_progress_callback
 
-    return _on_progress
 
 
 def check_min_sample_size(size: int, min: int, type: str) -> None:
diff --git a/src/mostlyai/qa/report.py b/src/mostlyai/qa/report.py
index b0c8d9d..45ab5d6 100644
--- a/src/mostlyai/qa/report.py
+++ b/src/mostlyai/qa/report.py
@@ -13,11 +13,15 @@
 # limitations under the License.
 
 import logging
+from functools import partial
+from idlelib.debugger_r import wrap_info
 from pathlib import Path
 
 import numpy as np
 import pandas as pd
+from docutils.nodes import description
 from pandas.core.dtypes.common import is_numeric_dtype, is_datetime64_dtype
+from rich.progress import Progress
 
 from mostlyai.qa import distances, similarity, html_report
 from mostlyai.qa.accuracy import (
@@ -46,7 +50,7 @@
     NXT_COLUMN,
     CTX_COLUMN_PREFIX,
     TGT_COLUMN_PREFIX,
-    REPORT_CREDITS,
+    REPORT_CREDITS, wrap_progress_callback,
 )
 from mostlyai.qa.filesystem import Statistics, TemporaryWorkspace
 
@@ -71,7 +75,7 @@ def report(
     max_sample_size_accuracy: int | None = None,
     max_sample_size_embeddings: int | None = None,
     statistics_path: str | Path | None = None,
-    on_progress: ProgressCallback | None = None,
+    update_progress: ProgressCallback | None = None,
 ) -> tuple[Path, Metrics | None]:
     """
     Generate HTML report and metrics for comparing synthetic and original data samples.
@@ -93,7 +97,7 @@ def report(
         max_sample_size_accuracy: Max sample size for accuracy
         max_sample_size_embeddings: Max sample size for embeddings (similarity & distances)
         statistics_path: Path of where to store the statistics to be used by `report_from_statistics`
-        on_progress: A custom progress callback
+        update_progress: A custom progress callback
     Returns:
         1. Path to the HTML report
         2. Pydantic Metrics:
@@ -120,8 +124,8 @@ def report(
     """
 
     with TemporaryWorkspace() as workspace:
-        on_progress = add_tqdm(on_progress, description="Creating report")
-        on_progress(current=0, total=100)
+        update_progress, teardown_progress = wrap_progress_callback(update_progress, description="Creating report")
+        update_progress(completed=0, total=100)
 
         # ensure all columns are present and in the same order as training data
         syn_tgt_data = syn_tgt_data[trn_tgt_data.columns]
@@ -165,7 +169,7 @@ def report(
             _LOG.info(err)
             statistics.mark_early_exit()
             html_report.store_early_exit_report(report_path)
-            on_progress(current=100, total=100)
+            update_progress(completed=100, total=100)
             return report_path, None
 
         # prepare datasets for accuracy
@@ -194,7 +198,7 @@ def report(
             max_sample_size=max_sample_size_accuracy,
             setup=setup,
         )
-        on_progress(current=5, total=100)
+        update_progress(completed=5, total=100)
 
         _LOG.info("prepare training data for accuracy started")
         trn = pull_data_for_accuracy(
@@ -205,7 +209,7 @@ def report(
             max_sample_size=max_sample_size_accuracy,
             setup=setup,
         )
-        on_progress(current=10, total=100)
+        update_progress(completed=10, total=100)
 
         # coerce dtypes to match the original training data dtypes
         for col in trn:
@@ -222,7 +226,7 @@ def report(
             statistics=statistics,
             workspace=workspace,
         )
-        on_progress(current=20, total=100)
+        update_progress(completed=20, total=100)
 
         # ensure that embeddings are all equal size for a fair 3-way comparison
         max_sample_size_embeddings = min(
@@ -245,7 +249,7 @@ def _calc_pull_embeds(df_tgt: pd.DataFrame, df_ctx: pd.DataFrame, start: int, st
             embeds = []
             for i, bucket in enumerate(buckets, 1):
                 embeds += [calculate_embeddings(bucket.tolist())]
-                on_progress(current=start + i, total=100)
+                update_progress(completed=start + i, total=100)
             embeds = np.concatenate(embeds, axis=0)
             _LOG.info(f"calculated embeddings {embeds.shape}")
             return embeds
@@ -256,7 +260,7 @@ def _calc_pull_embeds(df_tgt: pd.DataFrame, df_ctx: pd.DataFrame, start: int, st
             hol_embeds = _calc_pull_embeds(df_tgt=hol_tgt_data, df_ctx=hol_ctx_data, start=60, stop=80)
         else:
             hol_embeds = None
-        on_progress(current=80, total=100)
+        update_progress(completed=80, total=100)
 
         _LOG.info("report similarity")
         sim_cosine_trn_hol, sim_cosine_trn_syn, sim_auc_trn_hol, sim_auc_trn_syn = report_similarity(
@@ -266,7 +270,7 @@ def _calc_pull_embeds(df_tgt: pd.DataFrame, df_ctx: pd.DataFrame, start: int, st
             workspace=workspace,
             statistics=statistics,
         )
-        on_progress(current=90, total=100)
+        update_progress(completed=90, total=100)
 
         _LOG.info("report distances")
         dcr_trn, dcr_hol = report_distances(
@@ -275,7 +279,7 @@ def _calc_pull_embeds(df_tgt: pd.DataFrame, df_ctx: pd.DataFrame, start: int, st
             hol_embeds=hol_embeds,
             workspace=workspace,
         )
-        on_progress(current=99, total=100)
+        update_progress(completed=99, total=100)
 
         metrics = calculate_metrics(
             acc_uni=acc_uni,
@@ -312,7 +316,8 @@ def _calc_pull_embeds(df_tgt: pd.DataFrame, df_ctx: pd.DataFrame, start: int, st
             acc_biv=acc_biv,
             corr_trn=corr_trn,
         )
-        on_progress(current=100, total=100)
+        update_progress(completed=100, total=100)
+        teardown_progress()
         return report_path, metrics
 
 

From 90d3e5b01f7bf7002beec9b6292a13b7ec020cbf Mon Sep 17 00:00:00 2001
From: Lukasz Kolodziejczyk <lukasz.kolodziejczyk@mostly.ai>
Date: Wed, 27 Nov 2024 15:17:47 +0100
Subject: [PATCH 02/12] wip

---
 src/mostlyai/qa/report.py                 |  1 -
 src/mostlyai/qa/report_from_statistics.py | 19 +++++++++----------
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/mostlyai/qa/report.py b/src/mostlyai/qa/report.py
index 45ab5d6..e4e3cea 100644
--- a/src/mostlyai/qa/report.py
+++ b/src/mostlyai/qa/report.py
@@ -46,7 +46,6 @@
     ProgressCallback,
     PrerequisiteNotMetError,
     check_min_sample_size,
-    add_tqdm,
     NXT_COLUMN,
     CTX_COLUMN_PREFIX,
     TGT_COLUMN_PREFIX,
diff --git a/src/mostlyai/qa/report_from_statistics.py b/src/mostlyai/qa/report_from_statistics.py
index e8815ae..9216057 100644
--- a/src/mostlyai/qa/report_from_statistics.py
+++ b/src/mostlyai/qa/report_from_statistics.py
@@ -26,10 +26,9 @@
     ProgressCallback,
     PrerequisiteNotMetError,
     check_min_sample_size,
-    add_tqdm,
     check_statistics_prerequisite,
     determine_data_size,
-    REPORT_CREDITS,
+    REPORT_CREDITS, wrap_progress_callback,
 )
 from mostlyai.qa.filesystem import Statistics, TemporaryWorkspace
 
@@ -50,11 +49,11 @@ def report_from_statistics(
     report_extra_info: str = "",
     max_sample_size_accuracy: int | None = None,
     max_sample_size_embeddings: int | None = None,
-    on_progress: ProgressCallback | None = None,
+    update_progress: ProgressCallback | None = None,
 ) -> Path:
     with TemporaryWorkspace() as workspace:
-        on_progress = add_tqdm(on_progress, description="Creating report from statistics")
-        on_progress(current=0, total=100)
+        update_progress, teardown_progress = wrap_progress_callback(update_progress, description="Creating report from statistics")
+        update_progress(completed=0, total=100)
 
         # prepare report_path
         if report_path is None:
@@ -73,7 +72,7 @@ def report_from_statistics(
             check_min_sample_size(syn_sample_size, 100, "synthetic")
         except PrerequisiteNotMetError:
             html_report.store_early_exit_report(report_path)
-            on_progress(current=100, total=100)
+            update_progress(completed=100, total=100)
             return report_path
 
         meta = statistics.load_meta()
@@ -96,7 +95,7 @@ def report_from_statistics(
             max_sample_size=max_sample_size_accuracy,
         )
         _LOG.info(f"sample synthetic data finished ({syn.shape=})")
-        on_progress(current=20, total=100)
+        update_progress(completed=20, total=100)
 
         # calculate and plot accuracy and correlations
         acc_uni, acc_biv, corr_trn = report_accuracy_and_correlations_from_statistics(
@@ -104,7 +103,7 @@ def report_from_statistics(
             statistics=statistics,
             workspace=workspace,
         )
-        on_progress(current=30, total=100)
+        update_progress(completed=30, total=100)
 
         _LOG.info("calculate embeddings for synthetic")
         syn_embeds = calculate_embeddings(
@@ -123,7 +122,7 @@ def report_from_statistics(
             workspace=workspace,
             statistics=statistics,
         )
-        on_progress(current=50, total=100)
+        update_progress(completed=50, total=100)
 
         meta |= {
             "rows_synthetic": syn.shape[0],
@@ -144,7 +143,7 @@ def report_from_statistics(
             acc_biv=acc_biv,
             corr_trn=corr_trn,
         )
-        on_progress(current=100, total=100)
+        update_progress(completed=100, total=100)
         return report_path
 
 

From 42d9f0d3b97d0ae7c7dc600eca83646bc2efae7a Mon Sep 17 00:00:00 2001
From: Lukasz Kolodziejczyk <lukasz.kolodziejczyk@mostly.ai>
Date: Wed, 27 Nov 2024 15:20:40 +0100
Subject: [PATCH 03/12] wip

---
 src/mostlyai/qa/report.py                 | 8 +++-----
 src/mostlyai/qa/report_from_statistics.py | 9 +++++++--
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/mostlyai/qa/report.py b/src/mostlyai/qa/report.py
index e4e3cea..fedf919 100644
--- a/src/mostlyai/qa/report.py
+++ b/src/mostlyai/qa/report.py
@@ -13,15 +13,11 @@
 # limitations under the License.
 
 import logging
-from functools import partial
-from idlelib.debugger_r import wrap_info
 from pathlib import Path
 
 import numpy as np
 import pandas as pd
-from docutils.nodes import description
 from pandas.core.dtypes.common import is_numeric_dtype, is_datetime64_dtype
-from rich.progress import Progress
 
 from mostlyai.qa import distances, similarity, html_report
 from mostlyai.qa.accuracy import (
@@ -49,7 +45,8 @@
     NXT_COLUMN,
     CTX_COLUMN_PREFIX,
     TGT_COLUMN_PREFIX,
-    REPORT_CREDITS, wrap_progress_callback,
+    REPORT_CREDITS,
+    wrap_progress_callback,
 )
 from mostlyai.qa.filesystem import Statistics, TemporaryWorkspace
 
@@ -169,6 +166,7 @@ def report(
             statistics.mark_early_exit()
             html_report.store_early_exit_report(report_path)
             update_progress(completed=100, total=100)
+            teardown_progress()
             return report_path, None
 
         # prepare datasets for accuracy
diff --git a/src/mostlyai/qa/report_from_statistics.py b/src/mostlyai/qa/report_from_statistics.py
index 9216057..eeaf7ff 100644
--- a/src/mostlyai/qa/report_from_statistics.py
+++ b/src/mostlyai/qa/report_from_statistics.py
@@ -28,7 +28,8 @@
     check_min_sample_size,
     check_statistics_prerequisite,
     determine_data_size,
-    REPORT_CREDITS, wrap_progress_callback,
+    REPORT_CREDITS,
+    wrap_progress_callback,
 )
 from mostlyai.qa.filesystem import Statistics, TemporaryWorkspace
 
@@ -52,7 +53,9 @@ def report_from_statistics(
     update_progress: ProgressCallback | None = None,
 ) -> Path:
     with TemporaryWorkspace() as workspace:
-        update_progress, teardown_progress = wrap_progress_callback(update_progress, description="Creating report from statistics")
+        update_progress, teardown_progress = wrap_progress_callback(
+            update_progress, description="Creating report from statistics"
+        )
         update_progress(completed=0, total=100)
 
         # prepare report_path
@@ -73,6 +76,7 @@ def report_from_statistics(
         except PrerequisiteNotMetError:
             html_report.store_early_exit_report(report_path)
             update_progress(completed=100, total=100)
+            teardown_progress()
             return report_path
 
         meta = statistics.load_meta()
@@ -144,6 +148,7 @@ def report_from_statistics(
             corr_trn=corr_trn,
         )
         update_progress(completed=100, total=100)
+        teardown_progress()
         return report_path
 
 

From 49fb3fb04d52dc2c705610fc77d7b508dad3ef14 Mon Sep 17 00:00:00 2001
From: Lukasz Kolodziejczyk <lukasz.kolodziejczyk@mostly.ai>
Date: Wed, 27 Nov 2024 15:32:27 +0100
Subject: [PATCH 04/12] wip

---
 src/mostlyai/qa/common.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/mostlyai/qa/common.py b/src/mostlyai/qa/common.py
index a3e3699..137951f 100644
--- a/src/mostlyai/qa/common.py
+++ b/src/mostlyai/qa/common.py
@@ -18,7 +18,6 @@
 
 import pandas as pd
 from rich.progress import Progress
-from tqdm.auto import tqdm
 
 from mostlyai.qa.filesystem import Statistics
 
@@ -79,7 +78,10 @@ def __call__(
         self, total: float | None = None, completed: float | None = None, advance: float | None = None, **kwargs
     ) -> None: ...
 
-def wrap_progress_callback(update_progress: ProgressCallback | None = None, **kwargs) -> tuple[ProgressCallback, Callable]:
+
+def wrap_progress_callback(
+    update_progress: ProgressCallback | None = None, **kwargs
+) -> tuple[ProgressCallback, Callable]:
     if not update_progress:
         rich_progress = Progress()
         rich_progress.start()
@@ -88,14 +90,13 @@ def wrap_progress_callback(update_progress: ProgressCallback | None = None, **kw
     else:
         rich_progress = None
 
-    def teardown_wrapped_progress_callback(*args, **kwargs):
+    def teardown_wrapped_progress_callback():
         if rich_progress:
             rich_progress.stop()
 
     return update_progress, teardown_wrapped_progress_callback
 
 
-
 def check_min_sample_size(size: int, min: int, type: str) -> None:
     if size < min:
         raise PrerequisiteNotMetError(f"At least {min} rows are required, but only {size} were found for {type}.")

From 66bbaf8b090c02bffef061915f8d8516bf5f534b Mon Sep 17 00:00:00 2001
From: Lukasz Kolodziejczyk <lukasz.kolodziejczyk@mostly.ai>
Date: Wed, 27 Nov 2024 15:34:17 +0100
Subject: [PATCH 05/12] wip

---
 src/mostlyai/qa/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mostlyai/qa/common.py b/src/mostlyai/qa/common.py
index 137951f..d1e309c 100644
--- a/src/mostlyai/qa/common.py
+++ b/src/mostlyai/qa/common.py
@@ -90,11 +90,11 @@ def wrap_progress_callback(
     else:
         rich_progress = None
 
-    def teardown_wrapped_progress_callback():
+    def teardown_progress():
         if rich_progress:
             rich_progress.stop()
 
-    return update_progress, teardown_wrapped_progress_callback
+    return update_progress, teardown_progress
 
 
 def check_min_sample_size(size: int, min: int, type: str) -> None:

From 179f74ca217377e6d3277f2be0ac2df77b52a300 Mon Sep 17 00:00:00 2001
From: Lukasz Kolodziejczyk <lukasz.kolodziejczyk@mostly.ai>
Date: Wed, 27 Nov 2024 16:01:50 +0100
Subject: [PATCH 06/12] wip

---
 poetry.lock    | 4 ++--
 pyproject.toml | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index feafa92..b5524ab 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
 
 [[package]]
 name = "annotated-types"
@@ -3035,4 +3035,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "84b6afdb4c4101d2a0fbe8aeddabb53e03e9aa5886dd1e8bf95181c24c17c349"
+content-hash = "12fab0b7c571095916f88504bb577687deb6c8d23b82ac4530ca2e82c1170980"
diff --git a/pyproject.toml b/pyproject.toml
index bcaa2b1..85f9d4a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,7 @@ joblib = ">=1.2.0"
 Jinja2 = ">=3.1.2"
 scikit-learn = ">=1.4.0"
 sentence-transformers = ">=3.1.0"
+rich = "^13.9.4"
 
 [tool.poetry.group.dev.dependencies]
 ruff = "0.7.0"

From b6816300e32c77cd6ca6d1facf506c8205a2172e Mon Sep 17 00:00:00 2001
From: Lukasz Kolodziejczyk <lukasz.kolodziejczyk@mostly.ai>
Date: Thu, 28 Nov 2024 15:06:04 +0100
Subject: [PATCH 07/12] simplify

---
 src/mostlyai/qa/common.py                 | 47 ++++++++++++++++-------
 src/mostlyai/qa/report.py                 | 30 +++++++--------
 src/mostlyai/qa/report_from_statistics.py | 22 ++++-------
 3 files changed, 54 insertions(+), 45 deletions(-)

diff --git a/src/mostlyai/qa/common.py b/src/mostlyai/qa/common.py
index d1e309c..4d9e2dc 100644
--- a/src/mostlyai/qa/common.py
+++ b/src/mostlyai/qa/common.py
@@ -79,22 +79,41 @@ def __call__(
     ) -> None: ...
 
 
-def wrap_progress_callback(
-    update_progress: ProgressCallback | None = None, **kwargs
-) -> tuple[ProgressCallback, Callable]:
-    if not update_progress:
-        rich_progress = Progress()
-        rich_progress.start()
-        task_id = rich_progress.add_task(**kwargs)
-        update_progress = partial(rich_progress.update, task_id=task_id)
-    else:
-        rich_progress = None
+class ProgressCallbackWrapper:
+    @staticmethod
+    def _wrap_progress_callback(
+        update_progress: ProgressCallback | None = None, **kwargs
+    ) -> tuple[ProgressCallback, Callable]:
+        if not update_progress:
+            rich_progress = Progress()
+            rich_progress.start()
+            task_id = rich_progress.add_task(**kwargs)
+            update_progress = partial(rich_progress.update, task_id=task_id)
+        else:
+            rich_progress = None
+
+        def teardown_progress():
+            if rich_progress:
+                rich_progress.stop()
+
+        return update_progress, teardown_progress
+
+    def update(
+        self, total: float | None = None, completed: float | None = None, advance: float | None = None, **kwargs
+    ) -> None:
+        self.update_progress(total=total, completed=completed, advance=advance, **kwargs)
+
+    def __init__(self, update_progress: ProgressCallback | None = None, **kwargs):
+        self.update_progress, self.teardown_progress = self._wrap_progress_callback(update_progress, **kwargs)
 
-    def teardown_progress():
-        if rich_progress:
-            rich_progress.stop()
+    def __enter__(self):
+        self.update_progress(completed=0, total=1)
+        return self
 
-    return update_progress, teardown_progress
+    def __exit__(self, exc_type, exc_value, traceback):
+        if exc_type is not None:
+            self.update_progress(completed=1, total=1)
+        self.teardown_progress()
 
 
 def check_min_sample_size(size: int, min: int, type: str) -> None:
diff --git a/src/mostlyai/qa/report.py b/src/mostlyai/qa/report.py
index 46cc52e..9514181 100644
--- a/src/mostlyai/qa/report.py
+++ b/src/mostlyai/qa/report.py
@@ -46,7 +46,7 @@
     CTX_COLUMN_PREFIX,
     TGT_COLUMN_PREFIX,
     REPORT_CREDITS,
-    wrap_progress_callback,
+    ProgressCallbackWrapper,
 )
 from mostlyai.qa.filesystem import Statistics, TemporaryWorkspace
 
@@ -119,10 +119,10 @@ def report(
             - `dcr_share`: Share of synthetic samples that are closer to a training sample than to a holdout sample. This shall not be significantly larger than 50\%.
     """
 
-    with TemporaryWorkspace() as workspace:
-        update_progress, teardown_progress = wrap_progress_callback(update_progress, description="Creating report")
-        update_progress(completed=0, total=100)
-
+    with (
+        TemporaryWorkspace() as workspace,
+        ProgressCallbackWrapper(update_progress, description="Creating report") as progress,
+    ):
         # ensure all columns are present and in the same order as training data
         syn_tgt_data = syn_tgt_data[trn_tgt_data.columns]
         if hol_tgt_data is not None:
@@ -165,8 +165,6 @@ def report(
             _LOG.info(err)
             statistics.mark_early_exit()
             html_report.store_early_exit_report(report_path)
-            update_progress(completed=100, total=100)
-            teardown_progress()
             return report_path, None
 
         # prepare datasets for accuracy
@@ -195,7 +193,7 @@ def report(
             max_sample_size=max_sample_size_accuracy,
             setup=setup,
         )
-        update_progress(completed=5, total=100)
+        progress.update(completed=5, total=100)
 
         _LOG.info("prepare training data for accuracy started")
         trn = pull_data_for_accuracy(
@@ -206,7 +204,7 @@ def report(
             max_sample_size=max_sample_size_accuracy,
             setup=setup,
         )
-        update_progress(completed=10, total=100)
+        progress.update(completed=10, total=100)
 
         # coerce dtypes to match the original training data dtypes
         for col in trn:
@@ -223,7 +221,7 @@ def report(
             statistics=statistics,
             workspace=workspace,
         )
-        update_progress(completed=20, total=100)
+        progress.update(completed=20, total=100)
 
         # ensure that embeddings are all equal size for a fair 3-way comparison
         max_sample_size_embeddings = min(
@@ -247,8 +245,8 @@ def _calc_pull_embeds(df_tgt: pd.DataFrame, df_ctx: pd.DataFrame, start: int, st
             embeds = []
             for i, bucket in enumerate(buckets, 1):
                 embeds += [calculate_embeddings(bucket.tolist())]
-                update_progress(completed=start + i, total=100)
-            update_progress(completed=stop, total=100)
+                progress.update(completed=start + i, total=100)
+            progress.update(completed=stop, total=100)
             embeds = np.concatenate(embeds, axis=0)
             _LOG.info(f"calculated embeddings {embeds.shape}")
             return embeds
@@ -259,7 +257,7 @@ def _calc_pull_embeds(df_tgt: pd.DataFrame, df_ctx: pd.DataFrame, start: int, st
             hol_embeds = _calc_pull_embeds(df_tgt=hol_tgt_data, df_ctx=hol_ctx_data, start=60, stop=80)
         else:
             hol_embeds = None
-        update_progress(completed=80, total=100)
+        progress.update(completed=80, total=100)
 
         _LOG.info("report similarity")
         sim_cosine_trn_hol, sim_cosine_trn_syn, sim_auc_trn_hol, sim_auc_trn_syn = report_similarity(
@@ -269,7 +267,7 @@ def _calc_pull_embeds(df_tgt: pd.DataFrame, df_ctx: pd.DataFrame, start: int, st
             workspace=workspace,
             statistics=statistics,
         )
-        update_progress(completed=90, total=100)
+        progress.update(completed=90, total=100)
 
         _LOG.info("report distances")
         dcr_trn, dcr_hol = report_distances(
@@ -278,7 +276,7 @@ def _calc_pull_embeds(df_tgt: pd.DataFrame, df_ctx: pd.DataFrame, start: int, st
             hol_embeds=hol_embeds,
             workspace=workspace,
         )
-        update_progress(completed=99, total=100)
+        progress.update(completed=99, total=100)
 
         metrics = calculate_metrics(
             acc_uni=acc_uni,
@@ -315,8 +313,6 @@ def _calc_pull_embeds(df_tgt: pd.DataFrame, df_ctx: pd.DataFrame, start: int, st
             acc_biv=acc_biv,
             corr_trn=corr_trn,
         )
-        update_progress(completed=100, total=100)
-        teardown_progress()
         return report_path, metrics
 
 
diff --git a/src/mostlyai/qa/report_from_statistics.py b/src/mostlyai/qa/report_from_statistics.py
index eeaf7ff..a7d483d 100644
--- a/src/mostlyai/qa/report_from_statistics.py
+++ b/src/mostlyai/qa/report_from_statistics.py
@@ -29,7 +29,7 @@
     check_statistics_prerequisite,
     determine_data_size,
     REPORT_CREDITS,
-    wrap_progress_callback,
+    ProgressCallbackWrapper,
 )
 from mostlyai.qa.filesystem import Statistics, TemporaryWorkspace
 
@@ -52,12 +52,10 @@ def report_from_statistics(
     max_sample_size_embeddings: int | None = None,
     update_progress: ProgressCallback | None = None,
 ) -> Path:
-    with TemporaryWorkspace() as workspace:
-        update_progress, teardown_progress = wrap_progress_callback(
-            update_progress, description="Creating report from statistics"
-        )
-        update_progress(completed=0, total=100)
-
+    with (
+        TemporaryWorkspace() as workspace,
+        ProgressCallbackWrapper(update_progress, description="Creating report from statistics") as progress,
+    ):
         # prepare report_path
         if report_path is None:
             report_path = Path.cwd() / "data-report.html"
@@ -75,8 +73,6 @@ def report_from_statistics(
             check_min_sample_size(syn_sample_size, 100, "synthetic")
         except PrerequisiteNotMetError:
             html_report.store_early_exit_report(report_path)
-            update_progress(completed=100, total=100)
-            teardown_progress()
             return report_path
 
         meta = statistics.load_meta()
@@ -99,7 +95,7 @@ def report_from_statistics(
             max_sample_size=max_sample_size_accuracy,
         )
         _LOG.info(f"sample synthetic data finished ({syn.shape=})")
-        update_progress(completed=20, total=100)
+        progress.update(completed=20, total=100)
 
         # calculate and plot accuracy and correlations
         acc_uni, acc_biv, corr_trn = report_accuracy_and_correlations_from_statistics(
@@ -107,7 +103,7 @@ def report_from_statistics(
             statistics=statistics,
             workspace=workspace,
         )
-        update_progress(completed=30, total=100)
+        progress.update(completed=30, total=100)
 
         _LOG.info("calculate embeddings for synthetic")
         syn_embeds = calculate_embeddings(
@@ -126,7 +122,7 @@ def report_from_statistics(
             workspace=workspace,
             statistics=statistics,
         )
-        update_progress(completed=50, total=100)
+        progress.update(completed=50, total=100)
 
         meta |= {
             "rows_synthetic": syn.shape[0],
@@ -147,8 +143,6 @@ def report_from_statistics(
             acc_biv=acc_biv,
             corr_trn=corr_trn,
         )
-        update_progress(completed=100, total=100)
-        teardown_progress()
         return report_path
 
 

From b72496b5a1a638a5b76adccc139ef14f73832c57 Mon Sep 17 00:00:00 2001
From: Michael Platzer <michael.platzer@gmail.com>
Date: Thu, 28 Nov 2024 16:26:44 +0100
Subject: [PATCH 08/12] refined wording + ensure 100% bars

---
 src/mostlyai/qa/common.py                 |  1 +
 src/mostlyai/qa/report.py                 | 19 +++++++++++--------
 src/mostlyai/qa/report_from_statistics.py |  3 ++-
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/mostlyai/qa/common.py b/src/mostlyai/qa/common.py
index 4d9e2dc..1f7b878 100644
--- a/src/mostlyai/qa/common.py
+++ b/src/mostlyai/qa/common.py
@@ -94,6 +94,7 @@ def _wrap_progress_callback(
 
         def teardown_progress():
             if rich_progress:
+                rich_progress.refresh()
                 rich_progress.stop()
 
         return update_progress, teardown_progress
diff --git a/src/mostlyai/qa/report.py b/src/mostlyai/qa/report.py
index 9514181..4405cad 100644
--- a/src/mostlyai/qa/report.py
+++ b/src/mostlyai/qa/report.py
@@ -121,7 +121,7 @@ def report(
 
     with (
         TemporaryWorkspace() as workspace,
-        ProgressCallbackWrapper(update_progress, description="Creating report") as progress,
+        ProgressCallbackWrapper(update_progress, description="Create report") as progress,
     ):
         # ensure all columns are present and in the same order as training data
         syn_tgt_data = syn_tgt_data[trn_tgt_data.columns]
@@ -231,7 +231,9 @@ def report(
             hol_sample_size or float("inf"),
         )
 
-        def _calc_pull_embeds(df_tgt: pd.DataFrame, df_ctx: pd.DataFrame, start: int, stop: int) -> np.ndarray:
+        def _calc_pull_embeds(
+            df_tgt: pd.DataFrame, df_ctx: pd.DataFrame, progress_from: int, progress_to: int
+        ) -> np.ndarray:
             strings = pull_data_for_embeddings(
                 df_tgt=df_tgt,
                 df_ctx=df_ctx,
@@ -240,21 +242,21 @@ def _calc_pull_embeds(df_tgt: pd.DataFrame, df_ctx: pd.DataFrame, start: int, st
                 max_sample_size=max_sample_size_embeddings,
             )
             # split into buckets for calculating embeddings to avoid memory issues and report continuous progress
-            buckets = np.array_split(strings, stop - start)
+            buckets = np.array_split(strings, progress_to - progress_from)
             buckets = [b for b in buckets if len(b) > 0]
             embeds = []
             for i, bucket in enumerate(buckets, 1):
                 embeds += [calculate_embeddings(bucket.tolist())]
-                progress.update(completed=start + i, total=100)
-            progress.update(completed=stop, total=100)
+                progress.update(completed=progress_from + i, total=100)
+            progress.update(completed=progress_to, total=100)
             embeds = np.concatenate(embeds, axis=0)
             _LOG.info(f"calculated embeddings {embeds.shape}")
             return embeds
 
-        syn_embeds = _calc_pull_embeds(df_tgt=syn_tgt_data, df_ctx=syn_ctx_data, start=20, stop=40)
-        trn_embeds = _calc_pull_embeds(df_tgt=trn_tgt_data, df_ctx=trn_ctx_data, start=40, stop=60)
+        syn_embeds = _calc_pull_embeds(df_tgt=syn_tgt_data, df_ctx=syn_ctx_data, progress_from=20, progress_to=40)
+        trn_embeds = _calc_pull_embeds(df_tgt=trn_tgt_data, df_ctx=trn_ctx_data, progress_from=40, progress_to=60)
         if hol_tgt_data is not None:
-            hol_embeds = _calc_pull_embeds(df_tgt=hol_tgt_data, df_ctx=hol_ctx_data, start=60, stop=80)
+            hol_embeds = _calc_pull_embeds(df_tgt=hol_tgt_data, df_ctx=hol_ctx_data, progress_from=60, progress_to=80)
         else:
             hol_embeds = None
         progress.update(completed=80, total=100)
@@ -313,6 +315,7 @@ def _calc_pull_embeds(df_tgt: pd.DataFrame, df_ctx: pd.DataFrame, start: int, st
             acc_biv=acc_biv,
             corr_trn=corr_trn,
         )
+        progress.update(completed=100, total=100)
         return report_path, metrics
 
 
diff --git a/src/mostlyai/qa/report_from_statistics.py b/src/mostlyai/qa/report_from_statistics.py
index a7d483d..8c7a404 100644
--- a/src/mostlyai/qa/report_from_statistics.py
+++ b/src/mostlyai/qa/report_from_statistics.py
@@ -54,7 +54,7 @@ def report_from_statistics(
 ) -> Path:
     with (
         TemporaryWorkspace() as workspace,
-        ProgressCallbackWrapper(update_progress, description="Creating report from statistics") as progress,
+        ProgressCallbackWrapper(update_progress, description="Create report") as progress,
     ):
         # prepare report_path
         if report_path is None:
@@ -143,6 +143,7 @@ def report_from_statistics(
             acc_biv=acc_biv,
             corr_trn=corr_trn,
         )
+        progress.update(completed=100, total=100)
         return report_path
 
 

From 287e1f83f14a18db37e9bb0261bef67219d8dbbc Mon Sep 17 00:00:00 2001
From: Michael Platzer <michael.platzer@gmail.com>
Date: Thu, 28 Nov 2024 16:43:12 +0100
Subject: [PATCH 09/12] =?UTF-8?q?=20=F0=9F=9A=80=20[skip=20ci]?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/mostlyai/qa/report.py                 | 2 +-
 src/mostlyai/qa/report_from_statistics.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mostlyai/qa/report.py b/src/mostlyai/qa/report.py
index 4405cad..b84e328 100644
--- a/src/mostlyai/qa/report.py
+++ b/src/mostlyai/qa/report.py
@@ -121,7 +121,7 @@ def report(
 
     with (
         TemporaryWorkspace() as workspace,
-        ProgressCallbackWrapper(update_progress, description="Create report") as progress,
+        ProgressCallbackWrapper(update_progress, description="Create report 🚀") as progress,
     ):
         # ensure all columns are present and in the same order as training data
         syn_tgt_data = syn_tgt_data[trn_tgt_data.columns]
diff --git a/src/mostlyai/qa/report_from_statistics.py b/src/mostlyai/qa/report_from_statistics.py
index 8c7a404..82e609e 100644
--- a/src/mostlyai/qa/report_from_statistics.py
+++ b/src/mostlyai/qa/report_from_statistics.py
@@ -54,7 +54,7 @@ def report_from_statistics(
 ) -> Path:
     with (
         TemporaryWorkspace() as workspace,
-        ProgressCallbackWrapper(update_progress, description="Create report") as progress,
+        ProgressCallbackWrapper(update_progress, description="Create report 🚀") as progress,
     ):
         # prepare report_path
         if report_path is None:

From a32a31eb34a8227a1fe88989c4b862f164bdab73 Mon Sep 17 00:00:00 2001
From: Lukasz Kolodziejczyk <lukasz.kolodziejczyk@mostly.ai>
Date: Thu, 28 Nov 2024 17:24:57 +0100
Subject: [PATCH 10/12] fixes

---
 src/mostlyai/qa/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mostlyai/qa/common.py b/src/mostlyai/qa/common.py
index 1f7b878..be03960 100644
--- a/src/mostlyai/qa/common.py
+++ b/src/mostlyai/qa/common.py
@@ -112,7 +112,7 @@ def __enter__(self):
         return self
 
     def __exit__(self, exc_type, exc_value, traceback):
-        if exc_type is not None:
+        if exc_type is None:
             self.update_progress(completed=1, total=1)
         self.teardown_progress()
 

From ca82f49c3c3974da28c0a775f61cdc259a14deec Mon Sep 17 00:00:00 2001
From: Lukasz Kolodziejczyk <lukasz.kolodziejczyk@mostly.ai>
Date: Thu, 28 Nov 2024 18:21:59 +0100
Subject: [PATCH 11/12] finish

---
 src/mostlyai/qa/common.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/mostlyai/qa/common.py b/src/mostlyai/qa/common.py
index be03960..f8c7e23 100644
--- a/src/mostlyai/qa/common.py
+++ b/src/mostlyai/qa/common.py
@@ -74,9 +74,7 @@ class PrerequisiteNotMetError(Exception):
 
 
 class ProgressCallback(Protocol):
-    def __call__(
-        self, total: float | None = None, completed: float | None = None, advance: float | None = None, **kwargs
-    ) -> None: ...
+    def __call__(self, total: float | None = None, completed: float | None = None, **kwargs) -> None: ...
 
 
 class ProgressCallbackWrapper:
@@ -99,10 +97,8 @@ def teardown_progress():
 
         return update_progress, teardown_progress
 
-    def update(
-        self, total: float | None = None, completed: float | None = None, advance: float | None = None, **kwargs
-    ) -> None:
-        self.update_progress(total=total, completed=completed, advance=advance, **kwargs)
+    def update(self, total: float | None = None, completed: float | None = None, **kwargs) -> None:
+        self.update_progress(total=total, completed=completed, **kwargs)
 
     def __init__(self, update_progress: ProgressCallback | None = None, **kwargs):
         self.update_progress, self.teardown_progress = self._wrap_progress_callback(update_progress, **kwargs)

From 62cc3da7713e4abf7617d1d25d073f213f2afc2d Mon Sep 17 00:00:00 2001
From: Lukasz Kolodziejczyk <lukasz.kolodziejczyk@mostly.ai>
Date: Thu, 28 Nov 2024 18:28:06 +0100
Subject: [PATCH 12/12] finish

---
 src/mostlyai/qa/common.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/mostlyai/qa/common.py b/src/mostlyai/qa/common.py
index f8c7e23..ae2ac8c 100644
--- a/src/mostlyai/qa/common.py
+++ b/src/mostlyai/qa/common.py
@@ -98,19 +98,19 @@ def teardown_progress():
         return update_progress, teardown_progress
 
     def update(self, total: float | None = None, completed: float | None = None, **kwargs) -> None:
-        self.update_progress(total=total, completed=completed, **kwargs)
+        self._update_progress(total=total, completed=completed, **kwargs)
 
     def __init__(self, update_progress: ProgressCallback | None = None, **kwargs):
-        self.update_progress, self.teardown_progress = self._wrap_progress_callback(update_progress, **kwargs)
+        self._update_progress, self._teardown_progress = self._wrap_progress_callback(update_progress, **kwargs)
 
     def __enter__(self):
-        self.update_progress(completed=0, total=1)
+        self._update_progress(completed=0, total=1)
         return self
 
     def __exit__(self, exc_type, exc_value, traceback):
         if exc_type is None:
-            self.update_progress(completed=1, total=1)
-        self.teardown_progress()
+            self._update_progress(completed=1, total=1)
+        self._teardown_progress()
 
 
 def check_min_sample_size(size: int, min: int, type: str) -> None: