Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add type checking (mypy) #1244

Merged
merged 7 commits into from Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions augur/filter/include_exclude_rules.py
Expand Up @@ -680,7 +680,7 @@ def apply_filters(metadata, exclude_by: List[FilterOption], include_by: List[Fil
strains_to_keep = set(metadata.index.values)
strains_to_filter = []
strains_to_force_include = []
distinct_strains_to_force_include = set()
distinct_strains_to_force_include: Set = set()

# Track strains that should be included regardless of filters.
for include_function, include_kwargs in include_by:
Expand Down Expand Up @@ -713,9 +713,9 @@ def apply_filters(metadata, exclude_by: List[FilterOption], include_by: List[Fil
if filter_function is filter_by_query:
try:
# pandas ≥1.5.0 only
UndefinedVariableError = pd.errors.UndefinedVariableError
UndefinedVariableError = pd.errors.UndefinedVariableError # type: ignore
except AttributeError:
UndefinedVariableError = pd.core.computation.ops.UndefinedVariableError
UndefinedVariableError = pd.core.computation.ops.UndefinedVariableError # type: ignore
if isinstance(e, UndefinedVariableError):
raise AugurError(f"Query contains a column that does not exist in metadata.") from e
raise AugurError(f"Error when applying query. Ensure the syntax is valid per <https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-query>.") from e
Expand Down
2 changes: 1 addition & 1 deletion augur/filter/subsample.py
Expand Up @@ -475,7 +475,7 @@ def _calculate_fractional_sequences_per_group(
0.4844
"""
lo = 1e-5
hi = target_max_value
hi = float(target_max_value)

while (hi / lo) > 1.1:
mid = (lo + hi) / 2
Expand Down
2 changes: 1 addition & 1 deletion augur/io/shell_command_runner.py
Expand Up @@ -8,7 +8,7 @@
from signal import SIGKILL
except ImportError:
# A non-POSIX platform
SIGKILL = None
SIGKILL = None # type: ignore[assignment]


def run_shell_command(cmd, raise_errors=False, extra_env=None):
Expand Down
3 changes: 3 additions & 0 deletions augur/titer_model.py
@@ -1,3 +1,6 @@
# Prevent mypy from erroring on "flu" not being defined.
# mypy: disable-error-code="name-defined"
victorlin marked this conversation as resolved.
Show resolved Hide resolved

import os
import logging
import numpy as np
Expand Down
4 changes: 2 additions & 2 deletions augur/utils.py
Expand Up @@ -278,7 +278,7 @@ def available_cpu_cores(fallback: int = 1) -> int:
**computer**, if determinable, otherwise the *fallback* number (which
defaults to 1).
"""
try:
if hasattr(os, "sched_getaffinity"):
# Note that this is the correct function to use, not os.cpu_count(), as
# described in the latter's documentation.
#
Expand All @@ -290,7 +290,7 @@ def available_cpu_cores(fallback: int = 1) -> int:
# naively use all 24, we'd end up with two threads across the 12 cores.
# This would degrade performance rather than improve it!
return len(os.sched_getaffinity(0))
except:
else:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a semantic change here in the edge case, because os.sched_getaffinity(0) can fail and raise a Python exception. When it does, Augur will be broken with the change to if/else instead of falling back to os.cpu_count() with the original try/except.

I think this change should be reverted and mypy appeased another way.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

# cpu_count() returns None if the value is indeterminable.
return os.cpu_count() or fallback

Expand Down
2 changes: 1 addition & 1 deletion augur/validate.py
Expand Up @@ -137,7 +137,7 @@ def format_path(path: Iterable[Union[str, int]]) -> str:
'.x.y[42].z'
"""
def valid_identifier(x) -> bool:
return isinstance(x, str) and re.search(r'^[a-zA-Z$_][a-zA-Z0-9_$]*$', x)
return isinstance(x, str) and re.search(r'^[a-zA-Z$_][a-zA-Z0-9_$]*$', x) is not None

def fmt(x) -> str:
return (f"[{x}]" if isinstance(x, int) else
Expand Down
13 changes: 13 additions & 0 deletions docs/contribute/DEV_DOCS.md
Expand Up @@ -140,6 +140,19 @@ Troubleshooting tip: As tests run on the development code in the augur repositor
We use continuous integration with GitHub Actions to run tests on every pull request submitted to the project.
We use [codecov](https://codecov.io/) to automatically produce test coverage for new contributions and the project as a whole.

### Type annotations

Our goal is to gradually add [type annotations][] to our code so that we can catch errors earlier and be explicit about the interfaces expected and provided. Annotation pairs well with the functional approach taken by the package.

During development you can run static type checks using [mypy][]:

$ mypy
# No output is good!

There are also many [editor integrations for mypy][].

[editor integrations for mypy]: https://github.com/python/mypy#integrations

### Releasing

Versions for this project, Augur, from 3.0.0 onwards aim to follow the
Expand Down
49 changes: 49 additions & 0 deletions mypy.ini
@@ -0,0 +1,49 @@
[mypy]
# Don't set python_version. Instead, use the default behavior of checking for
# compatibility against the version of Python used to run mypy.
files = augur/

# Require functions with an annotated return type to be explicit about
# potentially returning None (via Optional[…]).
strict_optional = False

# In the future maybe we can contribute typing stubs for these modules (either
# complete stubs in the python/typeshed repo or partial stubs just in
# this repo), but for now that's more work than we want to invest. These
# sections let us ignore missing stubs for specific modules without hiding all
# missing errors like (--ignore-missing-imports).
[mypy-treetime.*]
ignore_missing_imports = True

[mypy-isodate.*]
ignore_missing_imports = True

[mypy-Bio.*]
ignore_missing_imports = True

[mypy-BCBio.*]
ignore_missing_imports = True

[mypy-matplotlib.*]
ignore_missing_imports = True

[mypy-seaborn.*]
ignore_missing_imports = True

[mypy-cvxopt.*]
ignore_missing_imports = True

[mypy-ipdb.*]
ignore_missing_imports = True

[mypy-jsonschema.*]
ignore_missing_imports = True

[mypy-pyfastx.*]
ignore_missing_imports = True

[mypy-networkx.*]
ignore_missing_imports = True

[mypy-scipy.*]
ignore_missing_imports = True
4 changes: 4 additions & 0 deletions setup.py
Expand Up @@ -70,7 +70,9 @@
"cram >=0.7",
"deepdiff >=4.3.2",
"freezegun >=0.3.15",
"mypy",
"nextstrain-sphinx-theme >=2022.5",
"pandas-stubs >=1.0.0, ==1.*",
"pylint >=1.7.6",
"pytest >=5.4.1",
"pytest-cov >=2.8.1",
Expand All @@ -83,6 +85,8 @@
"sphinx-markdown-tables >= 0.0.9",
"sphinx-rtd-theme >=0.4.3",
"sphinx-autodoc-typehints >=1.21.4",
"types-jsonschema >=3.0.0, ==3.*",
"types-setuptools",
"wheel >=0.32.3",
"ipdb >=0.10.1"
]
Expand Down
9 changes: 9 additions & 0 deletions tests/test_mypy.py
@@ -0,0 +1,9 @@
from pathlib import Path
from subprocess import run

topdir = Path(__file__).resolve().parent.parent

def test_mypy():
# Check the exit status ourselves for nicer test output on failure
result = run(["mypy"], cwd = topdir)
assert result.returncode == 0, "mypy exited with errors"