From 95b4f05ae41e13f1765eb397666e653fea2f6dd1 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 16 Nov 2025 13:43:46 -0600 Subject: [PATCH 1/4] tweak run_mypy.py to be more user-friendly --- scripts/run_mypy.py | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/scripts/run_mypy.py b/scripts/run_mypy.py index 032fbc938b..37e0e7a1ce 100755 --- a/scripts/run_mypy.py +++ b/scripts/run_mypy.py @@ -96,7 +96,7 @@ def mypy_to_pandas(input_lines: Iterator[str]) -> pandas.DataFrame: return pandas.DataFrame(data=data).set_index(["file", "line"]) -def check_no_unexpected_results(mypy_lines: Iterator[str]): +def check_no_unexpected_results(mypy_lines: Iterator[str], show_expected: bool): """Compare mypy results with list of known FAILING files. Exits the process with non-zero exit code upon unexpected results. @@ -122,13 +122,23 @@ def check_no_unexpected_results(mypy_lines: Iterator[str]): print(f"{len(passing)}/{len(all_files)} files pass as expected.") else: print("!!!!!!!!!") - print(f"{len(unexpected_failing)} files unexpectedly failed.") + print(f"{len(unexpected_failing)} files unexpectedly failed:") print("\n".join(sorted(map(str, unexpected_failing)))) + + if show_expected: + print( + "\nThese files did not fail before, so please check the above output" + f" for errors in {unexpected_failing} and fix them." + ) + else: + print("\nThese files did not fail before. Fix all errors reported in the output above.") + print("You can run `python scripts/run_mypy.py` to reproduce this test locally.") print( - "These files did not fail before, so please check the above output" - f" for errors in {unexpected_failing} and fix them." + f"\nNote: In addition to these errors, {len(failing.intersection(expected_failing))} errors in files " + f'marked as "expected failures" were also found. To see these failures, run: ' + f"`python scripts/run_mypy.py --show-expected`" ) - print("You can run `python scripts/run_mypy.py --verbose` to reproduce this test locally.") + sys.exit(1) if unexpected_passing: @@ -149,7 +159,12 @@ def check_no_unexpected_results(mypy_lines: Iterator[str]): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run mypy type checks on PyMC codebase.") parser.add_argument( - "--verbose", action="count", default=0, help="Pass this to print mypy output." + "--verbose", action="count", default=1, help="Pass this to print mypy output." + ) + parser.add_argument( + "--show-expected", + action="store_true", + help="Also show expected failures in verbose output.", ) parser.add_argument( "--groupby", @@ -165,6 +180,11 @@ def check_no_unexpected_results(mypy_lines: Iterator[str]): output = cp.stdout.decode() if args.verbose: df = mypy_to_pandas(output.split("\n")) + + if not args.show_expected: + expected_failing = set(FAILING.strip().split("\n")) - {""} + df = df.query("file not in @expected_failing") + for section, sdf in df.reset_index().groupby(args.groupby): print(f"\n\n[{section}]") for row in sdf.itertuples(): @@ -177,5 +197,6 @@ def check_no_unexpected_results(mypy_lines: Iterator[str]): " or `python run_mypy.py --help` for other options." ) - check_no_unexpected_results(output.split("\n")) + check_no_unexpected_results(output.split("\n"), show_expected=args.show_expected) + sys.exit(0) From fb537fd6f20f7279d83f8f2f7742c0efc30b6efe Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 16 Nov 2025 20:01:13 -0600 Subject: [PATCH 2/4] Use json mypy output --- scripts/run_mypy.py | 65 ++++++++++++--------------------------------- 1 file changed, 17 insertions(+), 48 deletions(-) diff --git a/scripts/run_mypy.py b/scripts/run_mypy.py index 37e0e7a1ce..bafebc3ef4 100755 --- a/scripts/run_mypy.py +++ b/scripts/run_mypy.py @@ -13,14 +13,13 @@ import argparse import importlib +import io import os import pathlib import subprocess import sys -from collections.abc import Iterator - -import pandas +import pandas as pd DP_ROOT = pathlib.Path(__file__).absolute().parent.parent FAILING = """ @@ -59,55 +58,25 @@ def enforce_pep561(module_name): return -def mypy_to_pandas(input_lines: Iterator[str]) -> pandas.DataFrame: +def mypy_to_pandas(mypy_result: str) -> pd.DataFrame: """Reformats mypy output with error codes to a DataFrame. Adapted from: https://gist.github.com/michaelosthege/24d0703e5f37850c9e5679f69598930a """ - current_section = None - data = { - "file": [], - "line": [], - "type": [], - "errorcode": [], - "message": [], - } - for line in input_lines: - line = line.strip() - elems = line.split(":") - if len(elems) < 3: - continue - try: - file, lineno, message_type, *_ = elems[0:3] - message_type = message_type.strip() - if message_type == "error": - current_section = line.split(" [")[-1][:-1] - message = line.replace(f"{file}:{lineno}: {message_type}: ", "").replace( - f" [{current_section}]", "" - ) - data["file"].append(file) - data["line"].append(lineno) - data["type"].append(message_type) - data["errorcode"].append(current_section) - data["message"].append(message) - except Exception as ex: - print(elems) - print(ex) - return pandas.DataFrame(data=data).set_index(["file", "line"]) - - -def check_no_unexpected_results(mypy_lines: Iterator[str], show_expected: bool): + return pd.read_json(io.StringIO(mypy_result), lines=True) + + +def check_no_unexpected_results(mypy_df: pd.DataFrame, show_expected: bool): """Compare mypy results with list of known FAILING files. Exits the process with non-zero exit code upon unexpected results. """ - df = mypy_to_pandas(mypy_lines) all_files = { str(fp).replace(str(DP_ROOT), "").strip(os.sep).replace(os.sep, "/") for fp in DP_ROOT.glob("pymc/**/*.py") if "tests" not in str(fp) } - failing = set(df.reset_index().file.str.replace(os.sep, "/", regex=False)) + failing = set(mypy_df.file.str.replace(os.sep, "/", regex=False)) if not failing.issubset(all_files): raise Exception( "Mypy should have ignored these files:\n" @@ -174,21 +143,21 @@ def check_no_unexpected_results(mypy_lines: Iterator[str], show_expected: bool): args, _ = parser.parse_known_args() cp = subprocess.run( - ["mypy", "--show-error-codes", "--exclude", "tests", "pymc"], - capture_output=True, + ["mypy", "--output", "json", "--show-error-codes", "--exclude", "tests", "pymc"], + stdout=subprocess.PIPE, ) - output = cp.stdout.decode() - if args.verbose: - df = mypy_to_pandas(output.split("\n")) + output = cp.stdout.decode("utf-8") + df = mypy_to_pandas(output) + if args.verbose: if not args.show_expected: expected_failing = set(FAILING.strip().split("\n")) - {""} df = df.query("file not in @expected_failing") - for section, sdf in df.reset_index().groupby(args.groupby): + for section, sdf in df.groupby(args.groupby): print(f"\n\n[{section}]") - for row in sdf.itertuples(): - print(f"{row.file}:{row.line}: {row.type} [{row.errorcode}]: {row.message}") + for idx, row in sdf.iterrows(): + print(f"{row.file}:{row.line}: {row.code} [{row.severity}]: {row.message}") print() else: print( @@ -197,6 +166,6 @@ def check_no_unexpected_results(mypy_lines: Iterator[str], show_expected: bool): " or `python run_mypy.py --help` for other options." ) - check_no_unexpected_results(output.split("\n"), show_expected=args.show_expected) + check_no_unexpected_results(df, show_expected=args.show_expected) sys.exit(0) From b4dadc09443304f87848f052971d3f4f51673091 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 16 Nov 2025 20:12:24 -0600 Subject: [PATCH 3/4] fix indentation --- scripts/run_mypy.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/run_mypy.py b/scripts/run_mypy.py index bafebc3ef4..35a9224241 100755 --- a/scripts/run_mypy.py +++ b/scripts/run_mypy.py @@ -102,11 +102,13 @@ def check_no_unexpected_results(mypy_df: pd.DataFrame, show_expected: bool): else: print("\nThese files did not fail before. Fix all errors reported in the output above.") print("You can run `python scripts/run_mypy.py` to reproduce this test locally.") - print( - f"\nNote: In addition to these errors, {len(failing.intersection(expected_failing))} errors in files " - f'marked as "expected failures" were also found. To see these failures, run: ' - f"`python scripts/run_mypy.py --show-expected`" - ) + + if not show_expected: + print( + f"\nNote: In addition to these errors, {len(failing.intersection(expected_failing))} errors in files " + f'marked as "expected failures" were also found. To see these failures, run: ' + f"`python scripts/run_mypy.py --show-expected`" + ) sys.exit(1) From aac8c880aa6d61b627446f85a8b0d395d64f92b5 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sun, 16 Nov 2025 20:22:46 -0600 Subject: [PATCH 4/4] bugfix --- scripts/run_mypy.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/run_mypy.py b/scripts/run_mypy.py index 35a9224241..42a4bb9e77 100755 --- a/scripts/run_mypy.py +++ b/scripts/run_mypy.py @@ -101,15 +101,14 @@ def check_no_unexpected_results(mypy_df: pd.DataFrame, show_expected: bool): ) else: print("\nThese files did not fail before. Fix all errors reported in the output above.") - print("You can run `python scripts/run_mypy.py` to reproduce this test locally.") - - if not show_expected: print( f"\nNote: In addition to these errors, {len(failing.intersection(expected_failing))} errors in files " f'marked as "expected failures" were also found. To see these failures, run: ' f"`python scripts/run_mypy.py --show-expected`" ) + print("You can run `python scripts/run_mypy.py` to reproduce this test locally.") + sys.exit(1) if unexpected_passing: @@ -146,17 +145,20 @@ def check_no_unexpected_results(mypy_df: pd.DataFrame, show_expected: bool): cp = subprocess.run( ["mypy", "--output", "json", "--show-error-codes", "--exclude", "tests", "pymc"], - stdout=subprocess.PIPE, + capture_output=True, ) + output = cp.stdout.decode("utf-8") df = mypy_to_pandas(output) if args.verbose: if not args.show_expected: expected_failing = set(FAILING.strip().split("\n")) - {""} - df = df.query("file not in @expected_failing") + filtered_df = df.query("file not in @expected_failing") + else: + filtered_df = df - for section, sdf in df.groupby(args.groupby): + for section, sdf in filtered_df.groupby(args.groupby): print(f"\n\n[{section}]") for idx, row in sdf.iterrows(): print(f"{row.file}:{row.line}: {row.code} [{row.severity}]: {row.message}")