Skip to content

Commit

Permalink
fix: calculate_coverage no longer warns about address fields, closes #62
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed May 9, 2022
1 parent d706145 commit e2b8d72
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 17 deletions.
2 changes: 1 addition & 1 deletion docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ Changed
Fixed
~~~~~


- :func:`~ocdskingfishercolab.calculate_coverage` uses the ``relatedprocesses_summary`` table for fields starting with ``relatedProcesses/``, where appropriate.
- :func:`~ocdskingfishercolab.calculate_coverage` prefixes ``all_`` to the column if ``"ALL "`` is prefixed to the field.
- :func:`~ocdskingfishercolab.calculate_coverage` no longer warns about ``address`` fields.

0.3.8 (2022-04-27)
------------------
Expand Down
23 changes: 12 additions & 11 deletions ocdskingfishercolab/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,8 @@ def get_table_and_pointer(scope, pointer):
join = ""
for field in fields:
split = field.split()

# If the first token isn't "ALL" or if there are more than 2, behave as if only the last token was provided.
if len(split) == 2 and split[0].lower() == "all":
mode = "all"
else:
Expand All @@ -493,35 +495,34 @@ def get_table_and_pointer(scope, pointer):
if table == "release_summary" and scope != "release_summary":
join = f"JOIN\n release_summary ON release_summary.id = {scope}.id"

# If the first token isn't "ALL" or if there are more than 2, behave as if only the last token was provided.
# https://www.postgresql.org/docs/11/functions-json.html
if mode == "all":
parts = pointer.split("/")
# This causes spurious warnings.
# https://github.com/open-contracting/kingfisher-colab/issues/62
one_to_manys = [part for part in parts[:-1] if part.endswith("s")]

# It would be more robust to analyze the release schema. That said, as of OCDS 1.1.5, all arrays of objects
# end in "s", and only one object ends in "s" ("address").
arrays = [part for part in parts[:-1] if part.endswith("s") and part != "address"]

# This logic is likely incorrect.
# https://github.com/open-contracting/kingfisher-colab/issues/63
if not one_to_manys:
if not arrays:
nearest_one_to_many_parent = parts[0]
else:
nearest_one_to_many_parent = one_to_manys[-1]
nearest_one_to_many_parent = arrays[-1]

if len(one_to_manys) > 1:
if len(arrays) > 1:
print(
'WARNING: Results might be inaccurate due to nested arrays. Check that there is exactly one '
f"`{'/'.join(one_to_manys[:-1])}` entry per `{table[:-8]}`."
f"`{'/'.join(arrays[:-1])}` entry per `{table[:-8]}`."
)

# This logic is likely incorrect.
# https://github.com/open-contracting/kingfisher-colab/issues/63#issuecomment-1120005015
# Test whether the number of occurrences of the path and its nearest 1:n parent are equal.
condition = (
f"coalesce({table}.field_list->>'{pointer}' =\n"
f" {table}.field_list->>'{nearest_one_to_many_parent}', false)"
)
else:
# Test for the presence of the field.
# https://www.postgresql.org/docs/11/functions-json.html
condition = f"{table}.field_list ? '{pointer}'"

# Add the field coverage.
Expand Down
8 changes: 3 additions & 5 deletions tests/test_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,16 +385,14 @@ def test_calculate_coverage_all_one_to_one_s(db, capsys, tmpdir):
SELECT
count(*) AS total_release_summary,
ROUND(SUM(CASE WHEN coalesce(release_summary.field_list->>'parties/address/region' =
release_summary.field_list->>'address', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_parties_address_region_percentage,
release_summary.field_list->>'parties', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_parties_address_region_percentage,
ROUND(SUM(CASE WHEN coalesce(release_summary.field_list->>'parties/address/region' =
release_summary.field_list->>'address', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS total_percentage
release_summary.field_list->>'parties', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS total_percentage
FROM release_summary
""") # noqa: E501

# The output should be empty, but there is a bug.
# https://github.com/open-contracting/kingfisher-colab/issues/62
# assert capsys.readouterr().out == ""
assert capsys.readouterr().out == ""


def test_calculate_coverage_all_one_to_many(db, capsys, tmpdir):
Expand Down

0 comments on commit e2b8d72

Please sign in to comment.