Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 47 additions & 53 deletions scripts/run_mypy.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,13 @@

import argparse
import importlib
import io
import os
import pathlib
import subprocess
import sys

from collections.abc import Iterator

import pandas
import pandas as pd

DP_ROOT = pathlib.Path(__file__).absolute().parent.parent
FAILING = """
Expand Down Expand Up @@ -59,55 +58,25 @@ def enforce_pep561(module_name):
return


def mypy_to_pandas(input_lines: Iterator[str]) -> pandas.DataFrame:
def mypy_to_pandas(mypy_result: str) -> pd.DataFrame:
"""Reformats mypy output with error codes to a DataFrame.

Adapted from: https://gist.github.com/michaelosthege/24d0703e5f37850c9e5679f69598930a
"""
current_section = None
data = {
"file": [],
"line": [],
"type": [],
"errorcode": [],
"message": [],
}
for line in input_lines:
line = line.strip()
elems = line.split(":")
if len(elems) < 3:
continue
try:
file, lineno, message_type, *_ = elems[0:3]
message_type = message_type.strip()
if message_type == "error":
current_section = line.split(" [")[-1][:-1]
message = line.replace(f"{file}:{lineno}: {message_type}: ", "").replace(
f" [{current_section}]", ""
)
data["file"].append(file)
data["line"].append(lineno)
data["type"].append(message_type)
data["errorcode"].append(current_section)
data["message"].append(message)
except Exception as ex:
print(elems)
print(ex)
return pandas.DataFrame(data=data).set_index(["file", "line"])


def check_no_unexpected_results(mypy_lines: Iterator[str]):
return pd.read_json(io.StringIO(mypy_result), lines=True)


def check_no_unexpected_results(mypy_df: pd.DataFrame, show_expected: bool):
"""Compare mypy results with list of known FAILING files.

Exits the process with non-zero exit code upon unexpected results.
"""
df = mypy_to_pandas(mypy_lines)
all_files = {
str(fp).replace(str(DP_ROOT), "").strip(os.sep).replace(os.sep, "/")
for fp in DP_ROOT.glob("pymc/**/*.py")
if "tests" not in str(fp)
}
failing = set(df.reset_index().file.str.replace(os.sep, "/", regex=False))
failing = set(mypy_df.file.str.replace(os.sep, "/", regex=False))
if not failing.issubset(all_files):
raise Exception(
"Mypy should have ignored these files:\n"
Expand All @@ -122,13 +91,24 @@ def check_no_unexpected_results(mypy_lines: Iterator[str]):
print(f"{len(passing)}/{len(all_files)} files pass as expected.")
else:
print("!!!!!!!!!")
print(f"{len(unexpected_failing)} files unexpectedly failed.")
print(f"{len(unexpected_failing)} files unexpectedly failed:")
print("\n".join(sorted(map(str, unexpected_failing))))
print(
"These files did not fail before, so please check the above output"
f" for errors in {unexpected_failing} and fix them."
)
print("You can run `python scripts/run_mypy.py --verbose` to reproduce this test locally.")

if show_expected:
print(
"\nThese files did not fail before, so please check the above output"
f" for errors in {unexpected_failing} and fix them."
)
else:
print("\nThese files did not fail before. Fix all errors reported in the output above.")
print(
f"\nNote: In addition to these errors, {len(failing.intersection(expected_failing))} errors in files "
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This counts the number of files with expected failures, not the number of individual errors. The message is misleading because it says "{count} errors in files" when it actually means "errors from {count} files". Consider rephrasing to: "In addition to these errors, errors from {count} files marked as 'expected failures' were also found."

Suggested change
f"\nNote: In addition to these errors, {len(failing.intersection(expected_failing))} errors in files "
f"\nNote: In addition to these errors, errors from {len(failing.intersection(expected_failing))} files "

Copilot uses AI. Check for mistakes.
f'marked as "expected failures" were also found. To see these failures, run: '
f"`python scripts/run_mypy.py --show-expected`"
)

print("You can run `python scripts/run_mypy.py` to reproduce this test locally.")

sys.exit(1)

if unexpected_passing:
Expand All @@ -149,7 +129,12 @@ def check_no_unexpected_results(mypy_lines: Iterator[str]):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run mypy type checks on PyMC codebase.")
parser.add_argument(
"--verbose", action="count", default=0, help="Pass this to print mypy output."
"--verbose", action="count", default=1, help="Pass this to print mypy output."
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The help text for --verbose is now misleading. Since default=1 makes verbose mode enabled by default, the help text should reflect this. Consider changing it to something like "Verbose output is enabled by default. Pass --verbose 0 to suppress output." or simply removing this argument in favor of a --quiet flag.

Suggested change
"--verbose", action="count", default=1, help="Pass this to print mypy output."
"--verbose", action="count", default=1, help="Verbose output is enabled by default. Pass --verbose 0 to suppress output."

Copilot uses AI. Check for mistakes.
)
parser.add_argument(
"--show-expected",
action="store_true",
help="Also show expected failures in verbose output.",
)
parser.add_argument(
"--groupby",
Expand All @@ -159,16 +144,24 @@ def check_no_unexpected_results(mypy_lines: Iterator[str]):
args, _ = parser.parse_known_args()

cp = subprocess.run(
["mypy", "--show-error-codes", "--exclude", "tests", "pymc"],
["mypy", "--output", "json", "--show-error-codes", "--exclude", "tests", "pymc"],
capture_output=True,
)
output = cp.stdout.decode()

output = cp.stdout.decode("utf-8")
df = mypy_to_pandas(output)

if args.verbose:
df = mypy_to_pandas(output.split("\n"))
for section, sdf in df.reset_index().groupby(args.groupby):
if not args.show_expected:
expected_failing = set(FAILING.strip().split("\n")) - {""}
filtered_df = df.query("file not in @expected_failing")
else:
filtered_df = df

for section, sdf in filtered_df.groupby(args.groupby):
print(f"\n\n[{section}]")
for row in sdf.itertuples():
print(f"{row.file}:{row.line}: {row.type} [{row.errorcode}]: {row.message}")
for idx, row in sdf.iterrows():
print(f"{row.file}:{row.line}: {row.code} [{row.severity}]: {row.message}")
print()
else:
print(
Expand All @@ -177,5 +170,6 @@ def check_no_unexpected_results(mypy_lines: Iterator[str]):
" or `python run_mypy.py --help` for other options."
)

check_no_unexpected_results(output.split("\n"))
check_no_unexpected_results(df, show_expected=args.show_expected)

sys.exit(0)
Loading