In [1]:
import warnings

warnings.filterwarnings("ignore", message=r".*GIL", category=RuntimeWarning)

In [2]:
import polars as pl
import subprocess
import json


pl.Config.set_tbl_rows(-1)
pl.Config.set_tbl_cols(-1)
pl.Config.set_fmt_str_lengths(1000);

In [3]:
RUFF = "./ruff.26230b1ed3"
linter_data = json.loads(
    subprocess.run(
        [RUFF, "linter", "--output-format=json"],
        check=True,
        text=True,
        capture_output=True,
    ).stdout
)
linters = {linter["name"]: linter["prefix"] for linter in linter_data}
rule_data = json.loads(
    subprocess.run(
        [RUFF, "rule", "--all", "--output-format=json"],
        check=True,
        text=True,
        capture_output=True,
    ).stdout
)
rules = {rule["code"]: rule["name"] for rule in rule_data}
rule_to_linter = {rule["code"]: linters[rule["linter"]] for rule in rule_data}

In [4]:
df = pl.read_csv("out.csv")
full = df.with_columns(
    (
        pl.col("accuracy")
        + 2 * pl.col("severity")
        + 0.5 * pl.col("fixability")
        + pl.col("applicability")
        + pl.col("configuration")
        + pl.col("conflicts")
    ).alias("total"),
    pl.col("rule").replace_strict(rule_to_linter).alias("linter"),
    pl.col("rule").replace_strict(rules).alias("name"),
)

In [5]:
full.group_by("accuracy").agg(pl.len()).sort("accuracy")

accuracy,len
i64,u32
0,76
1,60
2,802


In [6]:
full.filter(
    pl.col("total") >= 9,
    pl.col("type") == "Stable",
).height

578

In [7]:
full.filter(
    pl.col("accuracy") == 2,
    pl.col("total") >= 9,
    pl.col("type") == "Stable",
).height

551

In [8]:
# Just requiring a total score >= 9 doesn't really narrow things down enough, even with
# a required accuracy of 2 and limiting to stable rules. Let's impose a few more constraints
# such as no conflicts or redundancy with other tools, a high applicability, and also a high
# severity. This actually requires a total score >= 10 so the >= 9 check is now redundant.
#
# Expanding the applicabilty range to >= 1 just pulls in the pytest rules. pytest was basically
# (maybe literally) the only library I considered widespread enough to warrant a 1.
#
# 197 seems like a pretty reasonable number of default rules (out of 938), just percentage-wise.
df = full.filter(
    pl.col("accuracy") == 2,
    pl.col("type") == "Stable",
    pl.col("conflicts") == 2,
    pl.col("applicability") == 2,
    pl.col("severity") == 2,
)
df.sort("rule")

rule,type,accuracy,severity,fixability,applicability,configuration,conflicts,total,linter,name
str,str,i64,i64,i64,i64,i64,i64,f64,str,str
"""A002""","""Stable""",2,2,0,2,1,2,11.0,"""A""","""builtin-argument-shadowing"""
"""A004""","""Stable""",2,2,0,2,1,2,11.0,"""A""","""builtin-import-shadowing"""
"""A005""","""Stable""",2,2,0,2,1,2,11.0,"""A""","""stdlib-module-shadowing"""
"""A006""","""Stable""",2,2,0,2,1,2,11.0,"""A""","""builtin-lambda-argument-shadowing"""
"""ARG001""","""Stable""",2,2,0,2,1,2,11.0,"""ARG""","""unused-function-argument"""
"""ARG002""","""Stable""",2,2,0,2,1,2,11.0,"""ARG""","""unused-method-argument"""
"""ARG003""","""Stable""",2,2,0,2,1,2,11.0,"""ARG""","""unused-class-method-argument"""
"""ARG004""","""Stable""",2,2,0,2,1,2,11.0,"""ARG""","""unused-static-method-argument"""
"""ARG005""","""Stable""",2,2,0,2,1,2,11.0,"""ARG""","""unused-lambda-argument"""
"""ASYNC100""","""Stable""",2,2,0,2,2,2,12.0,"""ASYNC""","""cancel-scope-no-checkpoint"""


In [9]:
# fmt: off
current_defaults = [
    "E401",	    "E742",	    "F504",	    "F602",	    "F811",
    "E712",	    "F404",	    "F521",	    "F634",	    "F901",
    "E741",	    "F503",	    "F601",	    "F722",	    "E711",
    "F403",	    "F509",	    "F633",	    "F842",	    "E731",
    "F502",	    "F541",	    "F707",	    "E703",	    "F402",
    "F508",	    "F632",	    "F841",	    "E722",	    "F501",
    "F525",	    "F706",	    "E702",	    "F401",	    "F507",
    "F631",	    "F823",	    "E721",	    "F407",	    "F524",
    "F704",	    "E701",	    "E902",	    "F506",	    "F622",
    "F822",	    "E714",	    "F406",	    "F523",	    "F702",
    "E402",	    "E743",	    "F505",	    "F621",	    "F821",
    "E713",	    "F405",	    "F522",	    "F701",
]
# fmt: on
len(current_defaults)

59

In [10]:
in_defaults = pl.col("rule").is_in(current_defaults)
len(df.filter(in_defaults))

42

In [11]:
missing_current_defaults = set(current_defaults) - set(df["rule"])
len(missing_current_defaults)

17

In [12]:
# looks like these all got filtered out for severity == 1
full.filter(pl.col("rule").is_in(missing_current_defaults)).sort("rule")

rule,type,accuracy,severity,fixability,applicability,configuration,conflicts,total,linter,name
str,str,i64,i64,i64,i64,i64,i64,f64,str,str
"""E401""","""Stable""",2,1,2,2,2,2,11.0,"""""","""multiple-imports-on-one-line"""
"""E402""","""Stable""",2,1,0,2,2,2,10.0,"""""","""module-import-not-at-top-of-file"""
"""E701""","""Stable""",2,1,0,2,2,1,9.0,"""""","""multiple-statements-on-one-line-colon"""
"""E702""","""Stable""",2,1,0,2,2,1,9.0,"""""","""multiple-statements-on-one-line-semicolon"""
"""E703""","""Stable""",2,1,2,2,2,1,10.0,"""""","""useless-semicolon"""
"""E711""","""Stable""",2,1,2,2,2,2,11.0,"""""","""none-comparison"""
"""E712""","""Stable""",2,1,2,2,2,2,11.0,"""""","""true-false-comparison"""
"""E713""","""Stable""",2,1,2,2,2,2,11.0,"""""","""not-in-test"""
"""E714""","""Stable""",2,1,2,2,2,2,11.0,"""""","""not-is-test"""
"""E721""","""Stable""",2,1,0,2,2,2,10.0,"""""","""type-comparison"""


In [13]:
# looks like these were filtered out for accuracy = 1, I think I was
# probably too harsh here. There's an open bug report from dscorbett
# with a false negative, which is the only reason I can identify for this
full.filter(pl.col("rule").is_in(("A001", "A003")))

rule,type,accuracy,severity,fixability,applicability,configuration,conflicts,total,linter,name
str,str,i64,i64,i64,i64,i64,i64,f64,str,str
"""A001""","""Stable""",1,2,0,2,1,2,10.0,"""A""","""builtin-variable-shadowing"""
"""A003""","""Stable""",1,2,0,2,1,2,10.0,"""A""","""builtin-attribute-shadowing"""


In [14]:
# the ASYNC rules feel kind of out of place to me as defaults,
# not all projects are going to write any async code. I think
# we could consider revising the Applicability rubric to something like:
# - 2) widely used stdlib types
# - 1) "Niche" stdlib types or very widely used third-party
# - 0) Niche third-party
df.filter(pl.col("linter") == "ASYNC")

rule,type,accuracy,severity,fixability,applicability,configuration,conflicts,total,linter,name
str,str,i64,i64,i64,i64,i64,i64,f64,str,str
"""ASYNC100""","""Stable""",2,2,0,2,2,2,12.0,"""ASYNC""","""cancel-scope-no-checkpoint"""
"""ASYNC220""","""Stable""",2,2,0,2,2,2,12.0,"""ASYNC""","""create-subprocess-in-async-function"""
"""ASYNC221""","""Stable""",2,2,0,2,2,2,12.0,"""ASYNC""","""run-process-in-async-function"""
"""ASYNC222""","""Stable""",2,2,0,2,2,2,12.0,"""ASYNC""","""wait-for-process-in-async-function"""
"""ASYNC251""","""Stable""",2,2,0,2,2,2,12.0,"""ASYNC""","""blocking-sleep-in-async-function"""


In [15]:
# Similarly, the EXE rules probably shouldn't be default
# because they don't even do anything on Windows, and many
# projects may not have any executable Python scripts.
df.filter(pl.col("linter") == "EXE")

rule,type,accuracy,severity,fixability,applicability,configuration,conflicts,total,linter,name
str,str,i64,i64,i64,i64,i64,i64,f64,str,str
"""EXE001""","""Stable""",2,2,0,2,2,2,12.0,"""EXE""","""shebang-not-executable"""
"""EXE002""","""Stable""",2,2,0,2,2,2,12.0,"""EXE""","""shebang-missing-executable-file"""
"""EXE003""","""Stable""",2,2,0,2,2,2,12.0,"""EXE""","""shebang-missing-python"""
"""EXE004""","""Stable""",2,2,2,2,2,2,13.0,"""EXE""","""shebang-leading-whitespace"""
"""EXE005""","""Stable""",2,2,0,2,2,2,12.0,"""EXE""","""shebang-not-first-line"""
