Skip to content

Commit

Permalink
fix: calculate_coverage simplifies the query if "ALL " is prefixed to…
Browse files Browse the repository at this point in the history
… a field that is not within an array
  • Loading branch information
jpmckinney committed May 9, 2022
1 parent 3dced1a commit 869a9d0
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 15 deletions.
1 change: 1 addition & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Changed

- :func:`~ocdskingfishercolab.save_dataframe_to_sheet` and :func:`save_dataframe_to_spreadsheet` do nothing if the data frame is empty.
- :func:`~ocdskingfishercolab.calculate_coverage`: Rename keyword arguments ``sql`` to ``print_sql`` and ``sql_only`` to ``return_sql``.
- :func:`~ocdskingfishercolab.calculate_coverage` simplifies the query if ``"ALL "`` is prefixed to a field that is not within an array.

Fixed
~~~~~
Expand Down
13 changes: 6 additions & 7 deletions ocdskingfishercolab/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,8 +466,8 @@ def get_table_and_pointer(scope, pointer):

# https://www.postgresql.org/docs/11/functions-json.html
def get_condition(table, pointer, mode):
# Test for the presence of the field in any object.
if mode == "any":
# Test for the presence of the field.
return f"{table}.field_list ? '{pointer}'"

# The logic from here is for mode == "all".
Expand All @@ -477,11 +477,11 @@ def get_condition(table, pointer, mode):
# end in "s", and only one object ends in "s" ("address").
array_indices = [i for i, part in enumerate(parts[:-1]) if part.endswith("s") and part != "address"]

# If the field is an immediate child, the comparison will end up comparing the field's count to itself.
# If the field is not within an array, simplify the logic from ALL to ANY.
if not array_indices:
closest_array = parts[0]
else:
closest_array = "/".join(parts[:array_indices[-1] + 1])
return f"{table}.field_list ? '{pointer}'"

closest_array = "/".join(parts[:array_indices[-1] + 1])

if len(array_indices) > 1:
next_closest_array = "/".join(parts[:array_indices[-2] + 1])
Expand Down Expand Up @@ -522,13 +522,12 @@ def get_condition(table, pointer, mode):
mode = "any"

table, pointer = get_table_and_pointer(scope, split[-1])
condition = get_condition(table, pointer, mode)

# Add a JOIN clause for the release_summary table, unless it is already in the FROM clause.
if table == "release_summary" and scope != "release_summary":
join = f"JOIN\n release_summary ON release_summary.id = {scope}.id"

condition = get_condition(table, pointer, mode)

# Add the field coverage.
alias = pointer.replace("/", "_").lower()
if mode == "all":
Expand Down
12 changes: 4 additions & 8 deletions tests/test_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,10 +367,8 @@ def test_calculate_coverage_all_one_to_one_1(db, capsys, tmpdir):
assert sql == textwrap.dedent("""\
SELECT
count(*) AS total_awards_summary,
ROUND(SUM(CASE WHEN coalesce(awards_summary.field_list->>'date' =
awards_summary.field_list->>'date', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_date_percentage,
ROUND(SUM(CASE WHEN coalesce(awards_summary.field_list->>'date' =
awards_summary.field_list->>'date', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS total_percentage
ROUND(SUM(CASE WHEN awards_summary.field_list ? 'date' THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_date_percentage,
ROUND(SUM(CASE WHEN awards_summary.field_list ? 'date' THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS total_percentage
FROM awards_summary
""") # noqa: E501
Expand All @@ -384,10 +382,8 @@ def test_calculate_coverage_all_one_to_one_2(db, capsys, tmpdir):
assert sql == textwrap.dedent("""\
SELECT
count(*) AS total_release_summary,
ROUND(SUM(CASE WHEN coalesce(release_summary.field_list->>'tender/id' =
release_summary.field_list->>'tender', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_tender_id_percentage,
ROUND(SUM(CASE WHEN coalesce(release_summary.field_list->>'tender/id' =
release_summary.field_list->>'tender', false) THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS total_percentage
ROUND(SUM(CASE WHEN release_summary.field_list ? 'tender/id' THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS all_tender_id_percentage,
ROUND(SUM(CASE WHEN release_summary.field_list ? 'tender/id' THEN 1 ELSE 0 END) * 100.0 / count(*), 2) AS total_percentage
FROM release_summary
""") # noqa: E501
Expand Down

0 comments on commit 869a9d0

Please sign in to comment.