nextstrain · victorlin · Jun 30, 2023 · Jun 27, 2023 · Jun 29, 2023 · Jun 29, 2023
diff --git a/augur/filter/include_exclude_rules.py b/augur/filter/include_exclude_rules.py
@@ -680,7 +680,7 @@ def apply_filters(metadata, exclude_by: List[FilterOption], include_by: List[Fil
     strains_to_keep = set(metadata.index.values)
     strains_to_filter = []
     strains_to_force_include = []
-    distinct_strains_to_force_include = set()
+    distinct_strains_to_force_include: Set = set()
 
     # Track strains that should be included regardless of filters.
     for include_function, include_kwargs in include_by:
@@ -713,9 +713,9 @@ def apply_filters(metadata, exclude_by: List[FilterOption], include_by: List[Fil
             if filter_function is filter_by_query:
                 try:
                     # pandas ≥1.5.0 only
-                    UndefinedVariableError = pd.errors.UndefinedVariableError
+                    UndefinedVariableError = pd.errors.UndefinedVariableError  # type: ignore
                 except AttributeError:
-                    UndefinedVariableError = pd.core.computation.ops.UndefinedVariableError
+                    UndefinedVariableError = pd.core.computation.ops.UndefinedVariableError  # type: ignore
                 if isinstance(e, UndefinedVariableError):
                     raise AugurError(f"Query contains a column that does not exist in metadata.") from e
                 raise AugurError(f"Error when applying query. Ensure the syntax is valid per <https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-query>.") from e

diff --git a/augur/filter/subsample.py b/augur/filter/subsample.py
@@ -475,7 +475,7 @@ def _calculate_fractional_sequences_per_group(
     0.4844
     """
     lo = 1e-5
-    hi = target_max_value
+    hi = float(target_max_value)
 
     while (hi / lo) > 1.1:
         mid = (lo + hi) / 2

diff --git a/augur/io/shell_command_runner.py b/augur/io/shell_command_runner.py
@@ -8,7 +8,7 @@
     from signal import SIGKILL
 except ImportError:
     # A non-POSIX platform
-    SIGKILL = None
+    SIGKILL = None  # type: ignore[assignment]
 
 
 def run_shell_command(cmd, raise_errors=False, extra_env=None):

diff --git a/augur/titer_model.py b/augur/titer_model.py
@@ -1,3 +1,6 @@
+# Prevent mypy from erroring on "flu" not being defined.
+# mypy: disable-error-code="name-defined"
+
 import os
 import logging
 import numpy as np

diff --git a/augur/utils.py b/augur/utils.py
@@ -278,7 +278,7 @@ def available_cpu_cores(fallback: int = 1) -> int:
     **computer**, if determinable, otherwise the *fallback* number (which
     defaults to 1).
     """
-    try:
+    if hasattr(os, "sched_getaffinity"):
         # Note that this is the correct function to use, not os.cpu_count(), as
         # described in the latter's documentation.
         #
@@ -290,7 +290,7 @@ def available_cpu_cores(fallback: int = 1) -> int:
         # naively use all 24, we'd end up with two threads across the 12 cores.
         # This would degrade performance rather than improve it!
         return len(os.sched_getaffinity(0))
-    except:
+    else:
         # cpu_count() returns None if the value is indeterminable.
         return os.cpu_count() or fallback
 

diff --git a/augur/validate.py b/augur/validate.py
@@ -137,7 +137,7 @@ def format_path(path: Iterable[Union[str, int]]) -> str:
     '.x.y[42].z'
     """
     def valid_identifier(x) -> bool:
-        return isinstance(x, str) and re.search(r'^[a-zA-Z$_][a-zA-Z0-9_$]*$', x)
+        return isinstance(x, str) and re.search(r'^[a-zA-Z$_][a-zA-Z0-9_$]*$', x) is not None
 
     def fmt(x) -> str:
         return (f"[{x}]"  if isinstance(x, int)  else

diff --git a/docs/contribute/DEV_DOCS.md b/docs/contribute/DEV_DOCS.md
@@ -140,6 +140,19 @@ Troubleshooting tip: As tests run on the development code in the augur repositor
 We use continuous integration with GitHub Actions to run tests on every pull request submitted to the project.
 We use [codecov](https://codecov.io/) to automatically produce test coverage for new contributions and the project as a whole.
 
+### Type annotations
+
+Our goal is to gradually add [type annotations][] to our code so that we can catch errors earlier and be explicit about the interfaces expected and provided.  Annotation pairs well with the functional approach taken by the package.
+
+During development you can run static type checks using [mypy][]:
+
+    $ mypy
+    # No output is good!
+
+There are also many [editor integrations for mypy][].
+
+[editor integrations for mypy]: https://github.com/python/mypy#integrations
+
 ### Releasing
 
 Versions for this project, Augur, from 3.0.0 onwards aim to follow the

diff --git a/mypy.ini b/mypy.ini
@@ -0,0 +1,49 @@
+[mypy]
+# Don't set python_version. Instead, use the default behavior of checking for
+# compatibility against the version of Python used to run mypy.
+files = augur/
+
+# Require functions with an annotated return type to be explicit about
+# potentially returning None (via Optional[…]).
+strict_optional = False
+
+# In the future maybe we can contribute typing stubs for these modules (either
+# complete stubs in the python/typeshed repo or partial stubs just in
+# this repo), but for now that's more work than we want to invest.  These
+# sections let us ignore missing stubs for specific modules without hiding all
+# missing errors like (--ignore-missing-imports).
+[mypy-treetime.*]
+ignore_missing_imports = True
+
+[mypy-isodate.*]
+ignore_missing_imports = True
+
+[mypy-Bio.*]
+ignore_missing_imports = True
+
+[mypy-BCBio.*]
+ignore_missing_imports = True
+
+[mypy-matplotlib.*]
+ignore_missing_imports = True
+
+[mypy-seaborn.*]
+ignore_missing_imports = True
+
+[mypy-cvxopt.*]
+ignore_missing_imports = True
+
+[mypy-ipdb.*]
+ignore_missing_imports = True
+
+[mypy-jsonschema.*]
+ignore_missing_imports = True
+
+[mypy-pyfastx.*]
+ignore_missing_imports = True
+
+[mypy-networkx.*]
+ignore_missing_imports = True
+
+[mypy-scipy.*]
+ignore_missing_imports = True
diff --git a/setup.py b/setup.py
@@ -70,7 +70,9 @@
             "cram >=0.7",
             "deepdiff >=4.3.2",
             "freezegun >=0.3.15",
+            "mypy",
             "nextstrain-sphinx-theme >=2022.5",
+            "pandas-stubs >=1.0.0, ==1.*",
             "pylint >=1.7.6",
             "pytest >=5.4.1",
             "pytest-cov >=2.8.1",
@@ -83,6 +85,8 @@
             "sphinx-markdown-tables >= 0.0.9",
             "sphinx-rtd-theme >=0.4.3",
             "sphinx-autodoc-typehints >=1.21.4",
+            "types-jsonschema >=3.0.0, ==3.*",
+            "types-setuptools",
             "wheel >=0.32.3",
             "ipdb >=0.10.1"
         ]

diff --git a/tests/test_mypy.py b/tests/test_mypy.py
@@ -0,0 +1,9 @@
+from pathlib import Path
+from subprocess import run
+
+topdir = Path(__file__).resolve().parent.parent
+
+def test_mypy():
+    # Check the exit status ourselves for nicer test output on failure
+    result = run(["mypy"], cwd = topdir)
+    assert result.returncode == 0, "mypy exited with errors"