Skip to content

Commit

Permalink
fix: Prefix all_ to the alias if "ALL " is prefixed to the field
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed May 6, 2022
1 parent 9e6cdb7 commit e9427b2
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 23 deletions.
2 changes: 2 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ Changed
Fixed
~~~~~


- :func:`~ocdskingfishercolab.calculate_coverage` uses the ``relatedprocesses_summary`` table for fields starting with ``relatedProcesses/``, where appropriate.
- :func:`~ocdskingfishercolab.calculate_coverage` prefixes ``all_`` to the column if ``"ALL "`` is prefixed to the field.

0.3.8 (2022-04-27)
------------------
Expand Down
34 changes: 18 additions & 16 deletions ocdskingfishercolab/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,10 +464,6 @@ def get_table_and_pointer(scope, pointer):

return table, "/".join(parts)

def wrap(condition, pointer):
alias = pointer.replace("/", "_").lower()
return f"ROUND(SUM(CASE WHEN {condition} THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS {alias}_percentage"

# Default to the parent table of the first field.
if not scope:
all_tables = _all_tables()
Expand All @@ -481,19 +477,24 @@ def wrap(condition, pointer):
scope = candidate
break

join_release_summary = False
columns = []
columns = {}
conditions = []
join = ""
for field in fields:
split = field.split()
if len(split) == 2 and split[0].lower() == "all":
mode = "all"
else:
mode = "any"

table, pointer = get_table_and_pointer(scope, split[-1])

# Add a JOIN clause for the release_summary table, unless it is already in the FROM clause.
if table == "release_summary" and scope != "release_summary":
join_release_summary = True
join = f"JOIN\n release_summary ON release_summary.id = {scope}.id"

# If the first token isn't "ALL" or if there are more than 2, behave as if only the last token was provided.
if len(split) == 2 and split[0].lower() == "all":
if mode == "all":
parts = pointer.split("/")
# https://github.com/open-contracting/kingfisher-colab/issues/62
one_to_manys = [part for part in parts[:-1] if part.endswith("s")]
Expand All @@ -520,20 +521,21 @@ def wrap(condition, pointer):
condition = f"{table}.field_list ? '{pointer}'"

# Add the field coverage.
columns.append(wrap(condition, pointer))
alias = pointer.replace("/", "_").lower()
if mode == "all":
alias = f"all_{alias}"
columns[alias] = condition

# Collect the conditions for co-occurrence coverage.
conditions.append(condition)

# Add the co-occurrence coverage.
columns.append(wrap(" AND\n ".join(conditions), "total"))

select = ",\n ".join(columns)
if join_release_summary:
join = f"JOIN\n release_summary ON release_summary.id = {scope}.id"
else:
join = ""
columns["total"] = " AND\n ".join(conditions)

select = ",\n ".join(
f"ROUND(SUM(CASE WHEN {condition} THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS {alias}_percentage"
for alias, condition in columns.items()
)
query = textwrap.dedent(f"""\
SELECT
count(*) AS total_{scope},
Expand Down
12 changes: 5 additions & 7 deletions tests/test_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ def test_calculate_coverage_all_one_to_one(db, capsys, tmpdir):
SELECT
count(*) AS total_awards_summary,
ROUND(SUM(CASE WHEN coalesce(awards_summary.field_list->>'date' =
awards_summary.field_list->>'date', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS date_percentage,
awards_summary.field_list->>'date', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_date_percentage,
ROUND(SUM(CASE WHEN coalesce(awards_summary.field_list->>'date' =
awards_summary.field_list->>'date', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS total_percentage
FROM awards_summary
Expand All @@ -385,7 +385,7 @@ def test_calculate_coverage_all_one_to_one_s(db, capsys, tmpdir):
SELECT
count(*) AS total_release_summary,
ROUND(SUM(CASE WHEN coalesce(release_summary.field_list->>'parties/address/region' =
release_summary.field_list->>'address', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS parties_address_region_percentage,
release_summary.field_list->>'address', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_parties_address_region_percentage,
ROUND(SUM(CASE WHEN coalesce(release_summary.field_list->>'parties/address/region' =
release_summary.field_list->>'address', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS total_percentage
FROM release_summary
Expand All @@ -404,7 +404,7 @@ def test_calculate_coverage_all_one_to_many(db, capsys, tmpdir):
SELECT
count(*) AS total_awards_summary,
ROUND(SUM(CASE WHEN coalesce(awards_summary.field_list->>'items/description' =
awards_summary.field_list->>'items', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS items_description_percentage,
awards_summary.field_list->>'items', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_items_description_percentage,
ROUND(SUM(CASE WHEN coalesce(awards_summary.field_list->>'items/description' =
awards_summary.field_list->>'items', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS total_percentage
FROM awards_summary
Expand All @@ -422,7 +422,7 @@ def test_calculate_coverage_all_many_to_many(db, capsys, tmpdir):
SELECT
count(*) AS total_release_summary,
ROUND(SUM(CASE WHEN coalesce(release_summary.field_list->>'awards/items/additionalClassifications/scheme' =
release_summary.field_list->>'additionalClassifications', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS awards_items_additionalclassifications_scheme_percentage,
release_summary.field_list->>'additionalClassifications', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_awards_items_additionalclassifications_scheme_percentage,
ROUND(SUM(CASE WHEN coalesce(release_summary.field_list->>'awards/items/additionalClassifications/scheme' =
release_summary.field_list->>'additionalClassifications', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS total_percentage
FROM release_summary
Expand All @@ -435,13 +435,11 @@ def test_calculate_coverage_all_many_to_many(db, capsys, tmpdir):
def test_calculate_coverage_all_mixed(db, capsys, tmpdir):
sql = calculate_coverage(["ALL :items/description", ":items/description"], scope="awards_summary", sql=False, sql_only=True)

# There should not be two columns named "items_description_percentage", but there is a bug.
# https://github.com/open-contracting/kingfisher-colab/issues/64
assert sql == textwrap.dedent("""\
SELECT
count(*) AS total_awards_summary,
ROUND(SUM(CASE WHEN coalesce(awards_summary.field_list->>'items/description' =
awards_summary.field_list->>'items', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS items_description_percentage,
awards_summary.field_list->>'items', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_items_description_percentage,
ROUND(SUM(CASE WHEN awards_summary.field_list ? 'items/description' THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS items_description_percentage,
ROUND(SUM(CASE WHEN coalesce(awards_summary.field_list->>'items/description' =
awards_summary.field_list->>'items', false) AND
Expand Down

0 comments on commit e9427b2

Please sign in to comment.