fix: Prefix all_ to the alias if "ALL " is prefixed to the field

open-contracting · May 6, 2022 · e9427b2 · e9427b2
1 parent 9e6cdb7
commit e9427b2
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 23 deletions.
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -12,7 +12,9 @@ Changed
 Fixed
 ~~~~~
 
+
 -  :func:`~ocdskingfishercolab.calculate_coverage` uses the ``relatedprocesses_summary`` table for fields starting with ``relatedProcesses/``, where appropriate.
+-  :func:`~ocdskingfishercolab.calculate_coverage` prefixes ``all_`` to the column if ``"ALL "`` is prefixed to the field.
 
 0.3.8 (2022-04-27)
 ------------------

diff --git a/ocdskingfishercolab/__init__.py b/ocdskingfishercolab/__init__.py
@@ -464,10 +464,6 @@ def get_table_and_pointer(scope, pointer):
 
         return table, "/".join(parts)
 
-    def wrap(condition, pointer):
-        alias = pointer.replace("/", "_").lower()
-        return f"ROUND(SUM(CASE WHEN {condition} THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS {alias}_percentage"
-
     # Default to the parent table of the first field.
     if not scope:
         all_tables = _all_tables()
@@ -481,19 +477,24 @@ def wrap(condition, pointer):
                 scope = candidate
                 break
 
-    join_release_summary = False
-    columns = []
+    columns = {}
     conditions = []
+    join = ""
     for field in fields:
         split = field.split()
+        if len(split) == 2 and split[0].lower() == "all":
+            mode = "all"
+        else:
+            mode = "any"
 
         table, pointer = get_table_and_pointer(scope, split[-1])
 
+        # Add a JOIN clause for the release_summary table, unless it is already in the FROM clause.
         if table == "release_summary" and scope != "release_summary":
-            join_release_summary = True
+            join = f"JOIN\n            release_summary ON release_summary.id = {scope}.id"
 
         # If the first token isn't "ALL" or if there are more than 2, behave as if only the last token was provided.
-        if len(split) == 2 and split[0].lower() == "all":
+        if mode == "all":
             parts = pointer.split("/")
             # https://github.com/open-contracting/kingfisher-colab/issues/62
             one_to_manys = [part for part in parts[:-1] if part.endswith("s")]
@@ -520,20 +521,21 @@ def wrap(condition, pointer):
             condition = f"{table}.field_list ? '{pointer}'"
 
         # Add the field coverage.
-        columns.append(wrap(condition, pointer))
+        alias = pointer.replace("/", "_").lower()
+        if mode == "all":
+            alias = f"all_{alias}"
+        columns[alias] = condition
 
         # Collect the conditions for co-occurrence coverage.
         conditions.append(condition)
 
     # Add the co-occurrence coverage.
-    columns.append(wrap(" AND\n                ".join(conditions), "total"))
-
-    select = ",\n            ".join(columns)
-    if join_release_summary:
-        join = f"JOIN\n            release_summary ON release_summary.id = {scope}.id"
-    else:
-        join = ""
+    columns["total"] = " AND\n                ".join(conditions)
 
+    select = ",\n            ".join(
+        f"ROUND(SUM(CASE WHEN {condition} THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS {alias}_percentage"
+        for alias, condition in columns.items()
+    )
     query = textwrap.dedent(f"""\
         SELECT
             count(*) AS total_{scope},

diff --git a/tests/test_module.py b/tests/test_module.py
@@ -368,7 +368,7 @@ def test_calculate_coverage_all_one_to_one(db, capsys, tmpdir):
         SELECT
             count(*) AS total_awards_summary,
             ROUND(SUM(CASE WHEN coalesce(awards_summary.field_list->>'date' =
-                  awards_summary.field_list->>'date', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS date_percentage,
+                  awards_summary.field_list->>'date', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_date_percentage,
             ROUND(SUM(CASE WHEN coalesce(awards_summary.field_list->>'date' =
                   awards_summary.field_list->>'date', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS total_percentage
         FROM awards_summary
@@ -385,7 +385,7 @@ def test_calculate_coverage_all_one_to_one_s(db, capsys, tmpdir):
         SELECT
             count(*) AS total_release_summary,
             ROUND(SUM(CASE WHEN coalesce(release_summary.field_list->>'parties/address/region' =
-                  release_summary.field_list->>'address', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS parties_address_region_percentage,
+                  release_summary.field_list->>'address', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_parties_address_region_percentage,
             ROUND(SUM(CASE WHEN coalesce(release_summary.field_list->>'parties/address/region' =
                   release_summary.field_list->>'address', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS total_percentage
         FROM release_summary
@@ -404,7 +404,7 @@ def test_calculate_coverage_all_one_to_many(db, capsys, tmpdir):
         SELECT
             count(*) AS total_awards_summary,
             ROUND(SUM(CASE WHEN coalesce(awards_summary.field_list->>'items/description' =
-                  awards_summary.field_list->>'items', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS items_description_percentage,
+                  awards_summary.field_list->>'items', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_items_description_percentage,
             ROUND(SUM(CASE WHEN coalesce(awards_summary.field_list->>'items/description' =
                   awards_summary.field_list->>'items', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS total_percentage
         FROM awards_summary
@@ -422,7 +422,7 @@ def test_calculate_coverage_all_many_to_many(db, capsys, tmpdir):
         SELECT
             count(*) AS total_release_summary,
             ROUND(SUM(CASE WHEN coalesce(release_summary.field_list->>'awards/items/additionalClassifications/scheme' =
-                  release_summary.field_list->>'additionalClassifications', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS awards_items_additionalclassifications_scheme_percentage,
+                  release_summary.field_list->>'additionalClassifications', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_awards_items_additionalclassifications_scheme_percentage,
             ROUND(SUM(CASE WHEN coalesce(release_summary.field_list->>'awards/items/additionalClassifications/scheme' =
                   release_summary.field_list->>'additionalClassifications', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS total_percentage
         FROM release_summary
@@ -435,13 +435,11 @@ def test_calculate_coverage_all_many_to_many(db, capsys, tmpdir):
 def test_calculate_coverage_all_mixed(db, capsys, tmpdir):
     sql = calculate_coverage(["ALL :items/description", ":items/description"], scope="awards_summary", sql=False, sql_only=True)
 
-    # There should not be two columns named "items_description_percentage", but there is a bug.
-    # https://github.com/open-contracting/kingfisher-colab/issues/64
     assert sql == textwrap.dedent("""\
         SELECT
             count(*) AS total_awards_summary,
             ROUND(SUM(CASE WHEN coalesce(awards_summary.field_list->>'items/description' =
-                  awards_summary.field_list->>'items', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS items_description_percentage,
+                  awards_summary.field_list->>'items', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_items_description_percentage,
             ROUND(SUM(CASE WHEN awards_summary.field_list ? 'items/description' THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS items_description_percentage,
             ROUND(SUM(CASE WHEN coalesce(awards_summary.field_list->>'items/description' =
                   awards_summary.field_list->>'items', false) AND