Skip to content

Commit

Permalink
chore: Rewrite get_table_and_pointer to be able to share logic
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed May 10, 2022
1 parent 0e9b582 commit 45ec1f8
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 27 deletions.
40 changes: 16 additions & 24 deletions ocdskingfishercolab/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,12 +443,7 @@ def calculate_coverage(fields, scope=None, print_sql=True, return_sql=False):
:rtype: pandas.DataFrame or sql.run.ResultSet
"""

def get_table_and_pointer(scope, pointer):
# Handle relative pointers.
if pointer.startswith(":"):
return scope, pointer[1:]

# Handle absolute pointers.
def get_table_and_pointer(tables, pointer):
parts = pointer.split("/")
table = "release_summary"

Expand All @@ -457,9 +452,9 @@ def get_table_and_pointer(scope, pointer):
for i in range(len(parts), 0, -1):
# Kingfisher Summarize tables are lowercase.
candidate = f"{'_'.join(parts[:i])}_summary".lower()
if scope == candidate:
if candidate in tables:
parts = parts[i:]
table = scope
table = candidate
break

return table, "/".join(parts)
Expand All @@ -481,50 +476,47 @@ def get_condition(table, pointer, mode):
if not array_indices:
return f"{table}.field_list ? '{pointer}'"

closest_array = "/".join(parts[:array_indices[-1] + 1])

# If arrays are nested, then the condition below can be satisfied for, e.g., awards/items/description, if there
# are 2 awards, only one of which sets items/description.
if len(array_indices) > 1:
next_closest_array = "/".join(parts[:array_indices[-2] + 1])
print(
'WARNING: Results might be inaccurate due to nested arrays. Check that there is exactly one '
f"`{next_closest_array}` entry per {table} row."
f"`{'/'.join(parts[:array_indices[-2] + 1])}` path per {table} row."
)

# Test whether the number of occurrences of the path and its closest enclosing array are equal.
return (
f"coalesce({table}.field_list->>'{pointer}' =\n"
f" {table}.field_list->>'{closest_array}', false)"
f" {table}.field_list->>'{'/'.join(parts[:array_indices[-1] + 1])}', false)"
)

if not fields:
raise MissingFieldsError("You must provide a list of fields as the first argument to `calculate_coverage`.")

# Default to the parent table of the first field.
if not scope:
all_tables = _all_tables()
parts = fields[0].split()[-1].split("/")
scope = "release_summary"

for i in range(len(parts), 0, -1):
# Kingfisher Summarize tables are lowercase.
candidate = f"{'_'.join(parts[:i])}_summary".lower()
if candidate in all_tables:
scope = candidate
break
scope, _ = get_table_and_pointer(_all_tables(), fields[0].split()[-1])

columns = {}
conditions = []
join = ""
for field in fields:
split = field.split()
pointer = split[-1]

# If the first token isn't "ALL" or if there are more than 2, behave as if only the last token was provided.
if len(split) == 2 and split[0].lower() == "all":
mode = "all"
else:
mode = "any"

table, pointer = get_table_and_pointer(scope, split[-1])
# Handle relative pointers.
if pointer.startswith(":"):
table, pointer = scope, pointer[1:]
# Handle absolute pointers.
else:
table, pointer = get_table_and_pointer({scope}, pointer)

condition = get_condition(table, pointer, mode)

# Add a JOIN clause for the release_summary table, unless it is already in the FROM clause.
Expand Down
6 changes: 3 additions & 3 deletions tests/test_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ def test_calculate_coverage_all_many_to_many_2(db, capsys, tmpdir):
""") # noqa: E501

assert capsys.readouterr().out == "WARNING: Results might be inaccurate due to nested arrays. Check that there is exactly one `awards` entry per release_summary row.\n" # noqa: E501
assert capsys.readouterr().out == "WARNING: Results might be inaccurate due to nested arrays. Check that there is exactly one `awards` path per release_summary row.\n" # noqa: E501


def test_calculate_coverage_all_many_to_many_3(db, capsys, tmpdir):
Expand All @@ -458,7 +458,7 @@ def test_calculate_coverage_all_many_to_many_3(db, capsys, tmpdir):
""") # noqa: E501

assert capsys.readouterr().out == "WARNING: Results might be inaccurate due to nested arrays. Check that there is exactly one `awards/items` entry per release_summary row.\n" # noqa: E501
assert capsys.readouterr().out == "WARNING: Results might be inaccurate due to nested arrays. Check that there is exactly one `awards/items` path per release_summary row.\n" # noqa: E501


def test_calculate_coverage_all_many_to_many_interleaved(db, capsys, tmpdir):
Expand All @@ -476,7 +476,7 @@ def test_calculate_coverage_all_many_to_many_interleaved(db, capsys, tmpdir):
""") # noqa: E501

assert capsys.readouterr().out == "WARNING: Results might be inaccurate due to nested arrays. Check that there is exactly one `a/bs/c/ds` entry per release_summary row.\n" # noqa: E501
assert capsys.readouterr().out == "WARNING: Results might be inaccurate due to nested arrays. Check that there is exactly one `a/bs/c/ds` path per release_summary row.\n" # noqa: E501


def test_calculate_coverage_all_mixed(db, capsys, tmpdir):
Expand Down

0 comments on commit 45ec1f8

Please sign in to comment.