From 9d2ae0b03eb2d1eb2a3b9bd072369b43ee68d29f Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Thu, 30 Apr 2026 19:16:07 +0300 Subject: [PATCH] [mypyc] Fix non-deterministic class struct layout under separate=True MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit detect_undefined_bitmap() was extending cl.bitmap_attrs in place. Under separate=True each SCC's analyze_always_defined_attrs is invoked once per group, and detect_undefined_bitmap recurses through cl.base_mro from the subclass into its base classes. The seen set passed in dedupes within one call but is fresh per call, so every subclass-group call re-extends the shared base class's bitmap_attrs with another copy of the contributions. The base class's emitted ObjectStruct then grows by one bitmap field per ~32 subclasses processed in the same build. The exact final length is a function of how many SCCs went through compile_scc_to_ir this run: - clean build: every SCC fresh -> base bitmap_attrs accumulates fully - incremental build affecting N subclasses: base accumulates a fraction - second incremental: yet another count Subclasses not rebuilt this round still see their base's old, larger struct layout. Any attribute access on the base segfaults with a mismatched bitmap-field offset. Pre-existing in mypyc; only manifested once the prior over-conservative 44-file always-rebuild was lifted (1.20.0.post5), because that wasteful behavior kept rebuild sets self-consistent. Fix: compute a fresh local list and assign at the end. The function becomes naturally idempotent across repeated calls — same input, same output, regardless of how many groups have visited the class. No new fields, no serialization changes. Verified against sqlglot[c] (separate=True, ~100 modules): Edit: add a method to MySQLParser (a class with 7 dialect subclasses) Before: parser.h struct layout differs between clean and incremental builds; make unitc segfaults at first parser-using test. After: parser.h identical between clean and incremental; make unitc passes (1163 tests, 0 segfaults). --- mypyc/analysis/attrdefined.py | 14 ++++++++++---- mypyc/test/test_emitclass.py | 21 +++++++++++++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/mypyc/analysis/attrdefined.py b/mypyc/analysis/attrdefined.py index 1dfd33630f1c0..bfb9e7652a4f8 100644 --- a/mypyc/analysis/attrdefined.py +++ b/mypyc/analysis/attrdefined.py @@ -424,14 +424,20 @@ def detect_undefined_bitmap(cl: ClassIR, seen: set[ClassIR]) -> None: for base in cl.base_mro[1:]: detect_undefined_bitmap(base, seen) + # Build fresh and assign once. This function is called per SCC and `seen` + # only dedupes within a single call, so appending in place to a shared base + # would accumulate duplicates across SCCs and produce non-deterministic + # struct layouts under separate=True. + new_attrs: list[str] = [] if len(cl.base_mro) > 1: - cl.bitmap_attrs.extend(cl.base_mro[1].bitmap_attrs) + new_attrs.extend(cl.base_mro[1].bitmap_attrs) for n, t in cl.attributes.items(): if t.error_overlap and not cl.is_always_defined(n): - cl.bitmap_attrs.append(n) + new_attrs.append(n) for base in cl.mro[1:]: if base.is_trait: for n, t in base.attributes.items(): - if t.error_overlap and not cl.is_always_defined(n) and n not in cl.bitmap_attrs: - cl.bitmap_attrs.append(n) + if t.error_overlap and not cl.is_always_defined(n) and n not in new_attrs: + new_attrs.append(n) + cl.bitmap_attrs = new_attrs diff --git a/mypyc/test/test_emitclass.py b/mypyc/test/test_emitclass.py index eb04b22495de6..9c3cd02d1100c 100644 --- a/mypyc/test/test_emitclass.py +++ b/mypyc/test/test_emitclass.py @@ -2,8 +2,10 @@ import unittest +from mypyc.analysis.attrdefined import detect_undefined_bitmap from mypyc.codegen.emitclass import getter_name, setter_name, slot_key from mypyc.ir.class_ir import ClassIR +from mypyc.ir.rtypes import int32_rprimitive from mypyc.namegen import NameGenerator @@ -33,3 +35,22 @@ def test_getter_name(self) -> None: generator = NameGenerator([["mod"]]) assert getter_name(cls, "down", generator) == "testing___SomeClass_get_down" + + def test_bitmap_attrs_stable_across_repeat_analysis(self) -> None: + # Regression: detect_undefined_bitmap used to mutate cl.bitmap_attrs + # in place, so under separate=True (one SCC per group) a shared base + # class would accumulate duplicate entries as each subclass's SCC + # walked into it, growing the emitted struct between builds. + base = ClassIR("Base", "mod") + base.attributes = {"i": int32_rprimitive} + sub = ClassIR("Sub", "mod") + sub.attributes = {"j": int32_rprimitive} + base.mro = base.base_mro = [base] + sub.mro = sub.base_mro = [sub, base] + base.children = [sub] + + detect_undefined_bitmap(sub, seen=set()) + for _ in range(10): + detect_undefined_bitmap(sub, seen=set()) + assert base.bitmap_attrs == ["i"] + assert sub.bitmap_attrs == ["i", "j"]