Skip to content

Commit

Permalink
Merge pull request #1406: Miscellaneous changes
Browse files Browse the repository at this point in the history
  • Loading branch information
victorlin committed Feb 7, 2024
2 parents abc86a8 + e13834d commit 4bf72f8
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 18 deletions.
25 changes: 11 additions & 14 deletions augur/filter/include_exclude_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def filter_by_exclude_where(metadata, exclude_where) -> FilterFunctionReturn:
return filtered


def filter_by_query(metadata, query) -> FilterFunctionReturn:
def filter_by_query(metadata: pd.DataFrame, query: str) -> FilterFunctionReturn:
"""Filter metadata in the given pandas DataFrame with a query string and return
the strain names that pass the filter.
Expand Down Expand Up @@ -204,7 +204,12 @@ def filter_by_query(metadata, query) -> FilterFunctionReturn:
for column in metadata_copy.columns:
metadata_copy[column] = pd.to_numeric(metadata_copy[column], errors='ignore')

return set(metadata_copy.query(query).index.values)
try:
return set(metadata_copy.query(query).index.values)
except Exception as e:
if isinstance(e, PandasUndefinedVariableError):
raise AugurError(f"Query contains a column that does not exist in metadata.") from e
raise AugurError(f"Internal Pandas error when applying query:\n\t{e}\nEnsure the syntax is valid per <https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-query>.") from e


def filter_by_ambiguous_date(metadata, date_column, ambiguity) -> FilterFunctionReturn:
Expand Down Expand Up @@ -733,18 +738,10 @@ def apply_filters(metadata, exclude_by: List[FilterOption], include_by: List[Fil
for filter_function, filter_kwargs in exclude_by:
# Apply the current function with its given arguments. Each function
# returns a set of strains that passed the corresponding filter.
try:
passed = metadata.pipe(
filter_function,
**filter_kwargs,
)
except Exception as e:
if filter_function is filter_by_query:
if isinstance(e, PandasUndefinedVariableError):
raise AugurError(f"Query contains a column that does not exist in metadata.") from e
raise AugurError(f"Internal Pandas error when applying query:\n\t{e}\nEnsure the syntax is valid per <https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-query>.") from e
else:
raise
passed = metadata.pipe(
filter_function,
**filter_kwargs,
)

# Track the strains that failed this filter, so we can explain why later
# on and update the list of strains to keep to intersect with the
Expand Down
6 changes: 3 additions & 3 deletions augur/refine.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from Bio import Phylo
from .dates import get_numerical_dates
from .dates.errors import InvalidYearBounds
from .io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, InvalidDelimiter, read_metadata
from .io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, METADATA_DATE_COLUMN, InvalidDelimiter, read_metadata
from .utils import read_tree, write_json, InvalidTreeError
from .errors import AugurError
from treetime.vcf_utils import read_vcf
Expand Down Expand Up @@ -236,8 +236,8 @@ def run(args):

# save input state string for later export
for n in T.get_terminals():
if n.name in metadata.index and 'date' in metadata.columns:
n.raw_date = metadata.at[n.name, 'date']
if n.name in metadata.index and METADATA_DATE_COLUMN in metadata.columns:
n.raw_date = metadata.at[n.name, METADATA_DATE_COLUMN]

if args.date_confidence:
time_inference_mode = 'always' if args.date_inference=='marginal' else 'only-final'
Expand Down
2 changes: 1 addition & 1 deletion tests/functional/filter/cram/filter-query-errors.t
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ However, other Pandas errors are not so helpful, so a link is provided for users

$ ${AUGUR} filter \
> --metadata "$TESTDIR/../data/metadata.tsv" \
> --query "invalid = 'value'" \
> --query "country = 'value'" \
> --output-strains filtered_strains.txt > /dev/null
ERROR: Internal Pandas error when applying query:
cannot assign without a target object
Expand Down

0 comments on commit 4bf72f8

Please sign in to comment.