From 93b6d5cf2db91f2e5428784cd3cd6d038250b831 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 7 Apr 2026 17:01:05 +0100 Subject: [PATCH] Fix AArch64 multi-instruction constants and relocations * Elimates rendundant orr xN, xN, 0xffff after 16 or 32 bit loads * Merges adrp (21rx) and ldr (12) relocations into single 33rx relocation, when safe to do so. --- Python/jit.c | 66 ++++++++ Tools/jit/_optimizers.py | 323 +++++++++++++++++++++++++++------------ Tools/jit/_stencils.py | 46 +++++- Tools/jit/_targets.py | 3 + 4 files changed, 331 insertions(+), 107 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index d56ff6ad156c03..af75acf1ff2bb3 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -355,6 +355,14 @@ patch_aarch64_12(unsigned char *location, uint64_t value) set_bits(loc32, 10, value, shift, 12); } +// Relaxable 12-bit low part of an absolute address. +// Usually paired with patch_aarch64_21rx (below). +void +patch_aarch64_12x(unsigned char *location, uint64_t value) +{ + patch_aarch64_12(location, value); +} + // 16-bit low part of an absolute address. void patch_aarch64_16a(unsigned char *location, uint64_t value) @@ -415,6 +423,14 @@ patch_aarch64_21r(unsigned char *location, uint64_t value) set_bits(loc32, 5, value, 2, 19); } +// Relaxable 21-bit count of pages between this page and an absolute address's +// page. Usually paired with patch_aarch64_12x (above). +void +patch_aarch64_21rx(unsigned char *location, uint64_t value) +{ + patch_aarch64_21r(location, value); +} + // 21-bit relative branch. void patch_aarch64_19r(unsigned char *location, uint64_t value) @@ -445,6 +461,56 @@ patch_aarch64_26r(unsigned char *location, uint64_t value) set_bits(loc32, 0, value, 2, 26); } +// A pair of patch_aarch64_21rx and patch_aarch64_12x. +void +patch_aarch64_33rx(unsigned char *location_a, unsigned char *location_b, uint64_t value) +{ + uint32_t *loc32_a = (uint32_t *)location_a; + uint32_t *loc32_b = (uint32_t *)location_b; + // Try to relax the pair of GOT loads into an immediate value: + assert(IS_AARCH64_ADRP(*loc32_a)); + assert(IS_AARCH64_LDR_OR_STR(*loc32_b)); + unsigned char reg = get_bits(*loc32_a, 0, 5); + // There should be only one register involved: + assert(reg == get_bits(*loc32_a, 0, 5)); // ldr's output register. + assert(reg == get_bits(*loc32_b, 5, 5)); // ldr's input register. + uint64_t relaxed = *(uint64_t *)value; + if (relaxed < (1UL << 16)) { + // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; nop + *loc32_a = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg; + *loc32_b = 0xD503201F; + return; + } + if (relaxed < (1ULL << 32)) { + // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; movk reg, YYY + *loc32_a = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg; + *loc32_b = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | reg; + return; + } + int64_t page_delta = (relaxed >> 12) - ((uintptr_t)location_a >> 12); + if (page_delta >= -(1L << 20) && + page_delta < (1L << 20)) + { + // adrp reg, AAA; ldr reg, [reg + BBB] -> adrp reg, AAA; add reg, reg, BBB + patch_aarch64_21rx(location_a, relaxed); + *loc32_b = 0x91000000 | get_bits(relaxed, 0, 12) << 10 | reg << 5 | reg; + return; + } + relaxed = value - (uintptr_t)location_a; + if ((relaxed & 0x3) == 0 && + (int64_t)relaxed >= -(1L << 19) && + (int64_t)relaxed < (1L << 19)) + { + // adrp reg, AAA; ldr reg, [reg + BBB] -> ldr reg, XXX; nop + *loc32_a = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | reg; + *loc32_b = 0xD503201F; + return; + } + // Couldn't do it. Just patch the two instructions normally: + patch_aarch64_21rx(location_a, value); + patch_aarch64_12x(location_b, value); +} + // Relaxable 32-bit relative address. void patch_x86_64_32rx(unsigned char *location, uint64_t value) diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py index ef28e0c0ddeac8..f192783a55950c 100644 --- a/Tools/jit/_optimizers.py +++ b/Tools/jit/_optimizers.py @@ -99,6 +99,9 @@ class InstructionKind(enum.Enum): RETURN = enum.auto() SMALL_CONST_1 = enum.auto() SMALL_CONST_2 = enum.auto() + SMALL_CONST_MASK = enum.auto() + LARGE_CONST_1 = enum.auto() + LARGE_CONST_2 = enum.auto() OTHER = enum.auto() @@ -107,6 +110,7 @@ class Instruction: kind: InstructionKind name: str text: str + register: str | None target: str | None def is_branch(self) -> bool: @@ -115,7 +119,11 @@ def is_branch(self) -> bool: def update_target(self, target: str) -> "Instruction": assert self.target is not None return Instruction( - self.kind, self.name, self.text.replace(self.target, target), target + self.kind, + self.name, + self.text.replace(self.target, target), + self.register, + target, ) def update_name_and_target(self, name: str, target: str) -> "Instruction": @@ -124,6 +132,7 @@ def update_name_and_target(self, name: str, target: str) -> "Instruction": self.kind, name, self.text.replace(self.name, name).replace(self.target, target), + self.register, target, ) @@ -193,8 +202,12 @@ class Optimizer: globals: set[str] = dataclasses.field(default_factory=set) _re_small_const_1 = _RE_NEVER_MATCH _re_small_const_2 = _RE_NEVER_MATCH + _re_small_const_mask = _RE_NEVER_MATCH + _re_large_const_1 = _RE_NEVER_MATCH + _re_large_const_2 = _RE_NEVER_MATCH const_reloc = "" _frame_pointer_modify: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH + label_index: int = 0 def __post_init__(self) -> None: # Split the code into a linked list of basic blocks. A basic block is an @@ -255,6 +268,7 @@ def _preprocess(self, text: str) -> str: def _parse_instruction(self, line: str) -> Instruction: target = None + reg = None if match := self._re_branch.match(line): target = match["target"] name = match["instruction"] @@ -276,15 +290,34 @@ def _parse_instruction(self, line: str) -> Instruction: elif match := self._re_small_const_1.match(line): target = match["value"] name = match["instruction"] + reg = match["register"] kind = InstructionKind.SMALL_CONST_1 elif match := self._re_small_const_2.match(line): target = match["value"] name = match["instruction"] + reg = match["register"] kind = InstructionKind.SMALL_CONST_2 + elif match := self._re_small_const_mask.match(line): + target = match["value"] + name = match["instruction"] + reg = match["register"] + if reg.startswith("w"): + reg = "x" + reg[1:] + kind = InstructionKind.SMALL_CONST_MASK + elif match := self._re_large_const_1.match(line): + target = match["value"] + name = match["instruction"] + reg = match["register"] + kind = InstructionKind.LARGE_CONST_1 + elif match := self._re_large_const_2.match(line): + target = match["value"] + name = match["instruction"] + reg = match["register"] + kind = InstructionKind.LARGE_CONST_2 else: name, *_ = line.split(" ") kind = InstructionKind.OTHER - return Instruction(kind, name, line, target) + return Instruction(kind, name, line, reg, target) def _invert_branch(self, inst: Instruction, target: str) -> Instruction | None: assert inst.is_branch() @@ -487,73 +520,13 @@ def _fixup_external_labels(self) -> None: name = target[len(self.symbol_prefix) :] label = f"{self.symbol_prefix}{reloc}_JIT_RELOCATION_{name}_JIT_RELOCATION_{index}:" block.instructions[-1] = Instruction( - InstructionKind.OTHER, "", label, None + InstructionKind.OTHER, "", label, None, None ) block.instructions.append(branch.update_target("0")) - def _make_temp_label(self, index: int) -> Instruction: - marker = f"jit_temp_{index}:" - return Instruction(InstructionKind.OTHER, "", marker, None) - def _fixup_constants(self) -> None: - if not self.supports_small_constants: - return - index = 0 - for block in self._blocks(): - fixed: list[Instruction] = [] - small_const_index = -1 - for inst in block.instructions: - if inst.kind == InstructionKind.SMALL_CONST_1: - marker = f"jit_pending_{inst.target}{index}:" - fixed.append(self._make_temp_label(index)) - index += 1 - small_const_index = len(fixed) - fixed.append(inst) - elif inst.kind == InstructionKind.SMALL_CONST_2: - if small_const_index < 0: - fixed.append(inst) - continue - small_const_1 = fixed[small_const_index] - if not self._small_consts_match(small_const_1, inst): - small_const_index = -1 - fixed.append(inst) - continue - assert small_const_1.target is not None - if small_const_1.target.endswith("16"): - fixed[small_const_index] = self._make_temp_label(index) - index += 1 - else: - assert small_const_1.target.endswith("32") - patch_kind, replacement = self._small_const_1(small_const_1) - if replacement is not None: - label = f"{self.const_reloc}{patch_kind}_JIT_RELOCATION_CONST{small_const_1.target[:-3]}_JIT_RELOCATION_{index}:" - index += 1 - fixed[small_const_index - 1] = Instruction( - InstructionKind.OTHER, "", label, None - ) - fixed[small_const_index] = replacement - patch_kind, replacement = self._small_const_2(inst) - if replacement is not None: - assert inst.target is not None - label = f"{self.const_reloc}{patch_kind}_JIT_RELOCATION_CONST{inst.target[:-3]}_JIT_RELOCATION_{index}:" - index += 1 - fixed.append( - Instruction(InstructionKind.OTHER, "", label, None) - ) - fixed.append(replacement) - small_const_index = -1 - else: - fixed.append(inst) - block.instructions = fixed - - def _small_const_1(self, inst: Instruction) -> tuple[str, Instruction | None]: - raise NotImplementedError() - - def _small_const_2(self, inst: Instruction) -> tuple[str, Instruction | None]: - raise NotImplementedError() - - def _small_consts_match(self, inst1: Instruction, inst2: Instruction) -> bool: - raise NotImplementedError() + "Fixup loading of constants. Overridden by OptimizerAArch64" + pass def _validate(self) -> None: for block in self._blocks(): @@ -602,52 +575,200 @@ class OptimizerAArch64(Optimizer): # pylint: disable = too-few-public-methods supports_small_constants = True _re_small_const_1 = re.compile( - r"\s*(?Padrp)\s+.*(?P_JIT_OP(ARG|ERAND(0|1))_(16|32)).*" + r"\s*(?Padrp)\s+(?Px\d\d?),.*(?P_JIT_OP(ARG|ERAND(0|1))_(16|32)).*" ) _re_small_const_2 = re.compile( - r"\s*(?Pldr)\s+.*(?P_JIT_OP(ARG|ERAND(0|1))_(16|32)).*" + r"\s*(?Pldr)\s+(?Px\d\d?),.*(?P_JIT_OP(ARG|ERAND(0|1))_(16|32)).*" + ) + _re_small_const_mask = re.compile( + r"\s*(?Pand)\s+[xw]\d\d?, *(?P[xw]\d\d?).*(?P0xffff)" + ) + _re_large_const_1 = re.compile( + r"\s*(?Padrp)\s+(?Px\d\d?),.*:got:(?P[_A-Za-z0-9]+).*" + ) + _re_large_const_2 = re.compile( + r"\s*(?Pldr)\s+(?Px\d\d?),.*:got_lo12:(?P[_A-Za-z0-9]+).*" ) const_reloc = "CUSTOM_AARCH64_CONST" _frame_pointer_modify = re.compile(r"\s*stp\s+x29.*") - def _get_reg(self, inst: Instruction) -> str: - _, rest = inst.text.split(inst.name) - reg, *_ = rest.split(",") - return reg.strip() - - def _small_const_1(self, inst: Instruction) -> tuple[str, Instruction | None]: - assert inst.kind is InstructionKind.SMALL_CONST_1 - assert inst.target is not None - if "16" in inst.target: - return "", None - pre, _ = inst.text.split(inst.name) - return "16a", Instruction( - InstructionKind.OTHER, "movz", f"{pre}movz {self._get_reg(inst)}, 0", None + def _make_temp_label(self, note: object = None) -> Instruction: + marker = f"jit_temp_{self.label_index}:" + if note is not None: + marker = f"{marker[:-1]}_{note}:" + self.label_index += 1 + return Instruction(InstructionKind.OTHER, "", marker, None, None) + + def _both_registers_same(self, inst: Instruction) -> bool: + reg = inst.register + assert reg is not None + if reg not in inst.text: + reg = "w" + reg[1:] + return inst.text.count(reg) == 2 + + def _fixup_small_constant_pair( + self, output: list[Instruction], label_index: int, inst: Instruction + ) -> str | None: + first = output[label_index + 1] + reg = first.register + if reg is None or inst.register != reg: + output.append( + Instruction(InstructionKind.OTHER, "", "# registers differ", None, None) + ) + output.append(inst) + return None + assert first.target is not None + if first.target != inst.target: + output.append( + Instruction(InstructionKind.OTHER, "", "# targets differ", None, None) + ) + output.append(inst) + return None + if not self._both_registers_same(inst): + output.append( + Instruction( + InstructionKind.OTHER, "", "# not same register", None, None + ) + ) + output.append(inst) + return None + pre, _ = first.text.split(first.name) + output[label_index + 1] = Instruction( + InstructionKind.OTHER, + "movz", + f"{pre}movz {reg}, 0", + reg, + None, ) - - def _small_const_2(self, inst: Instruction) -> tuple[str, Instruction | None]: - assert inst.kind is InstructionKind.SMALL_CONST_2 - assert inst.target is not None - pre, _ = inst.text.split(inst.name) - if "16" in inst.target: - return "16a", Instruction( - InstructionKind.OTHER, - "movz", - f"{pre}movz {self._get_reg(inst)}, 0", - None, + label_text = f"{self.const_reloc}16a_JIT_RELOCATION_CONST{first.target[:-3]}_JIT_RELOCATION_{self.label_index}:" + self.label_index += 1 + output[label_index] = Instruction( + InstructionKind.OTHER, "", label_text, None, None + ) + assert first.target.endswith("16") or first.target.endswith("32") + if first.target.endswith("32"): + label_text = f"{self.const_reloc}16b_JIT_RELOCATION_CONST{first.target[:-3]}_JIT_RELOCATION_{self.label_index}:" + self.label_index += 1 + output.append( + Instruction(InstructionKind.OTHER, "", label_text, None, None) ) - else: - return "16b", Instruction( - InstructionKind.OTHER, - "movk", - f"{pre}movk {self._get_reg(inst)}, 0, lsl #16", - None, + pre, _ = inst.text.split(inst.name) + output.append( + Instruction( + InstructionKind.OTHER, + "movk", + f"{pre}movk {reg}, 0, lsl #16", + reg, + None, + ) ) + return reg + + def may_use_reg(self, inst: Instruction, reg: str | None) -> bool: + "Return False if `reg` is not explicitly used by this instruction" + if reg is None: + return False + assert reg.startswith("w") or reg.startswith("x") + xreg = f"x{reg[1:]}" + wreg = f"w{reg[1:]}" + if wreg in inst.text: + return True + if xreg in inst.text: + # Exclude false positives like 0x80 for x8 + count = inst.text.count(xreg) + number_count = inst.text.count("0" + xreg) + return count > number_count + return False + + def _fixup_large_constant_pair( + self, output: list[Instruction], label_index: int, inst: Instruction + ) -> None: + first = output[label_index + 1] + reg = first.register + if reg is None or inst.register != reg: + output.append(inst) + return + assert first.target is not None + if first.target != inst.target: + output.append(inst) + return + label = f"{self.const_reloc}33a_JIT_PAIR_{first.target}_JIT_PAIR_{self.label_index}:" + output[label_index] = Instruction(InstructionKind.OTHER, "", label, None, None) + label = ( + f"{self.const_reloc}33b_JIT_PAIR_{inst.target}_JIT_PAIR_{self.label_index}:" + ) + self.label_index += 1 + output.append(Instruction(InstructionKind.OTHER, "", label, None, None)) + output.append(inst) + + def _fixup_mask(self, output: list[Instruction], inst: Instruction) -> None: + if self._both_registers_same(inst): + # Nop + pass + else: + output.append(inst) - def _small_consts_match(self, inst1: Instruction, inst2: Instruction) -> bool: - reg1 = self._get_reg(inst1) - reg2 = self._get_reg(inst2) - return reg1 == reg2 + def _fixup_constants(self) -> None: + for block in self._blocks(): + fixed: list[Instruction] = [] + small_const_part: dict[str, int | None] = {} + small_const_whole: dict[str, str | None] = {} + large_const_part: dict[str, int | None] = {} + for inst in block.instructions: + if inst.kind == InstructionKind.SMALL_CONST_1: + assert inst.register is not None + small_const_part[inst.register] = len(fixed) + small_const_whole[inst.register] = None + large_const_part[inst.register] = None + fixed.append(self._make_temp_label(inst.register)) + fixed.append(inst) + elif inst.kind == InstructionKind.SMALL_CONST_2: + assert inst.register is not None + index = small_const_part.get(inst.register) + small_const_part[inst.register] = None + if index is None: + fixed.append(inst) + continue + small_const_whole[inst.register] = self._fixup_small_constant_pair( + fixed, index, inst + ) + small_const_part[inst.register] = None + elif inst.kind == InstructionKind.SMALL_CONST_MASK: + assert inst.register is not None + reg = small_const_whole.get(inst.register) + if reg is not None: + self._fixup_mask(fixed, inst) + else: + fixed.append(inst) + elif inst.kind == InstructionKind.LARGE_CONST_1: + assert inst.register is not None + small_const_part[inst.register] = None + small_const_whole[inst.register] = None + large_const_part[inst.register] = len(fixed) + fixed.append(self._make_temp_label()) + fixed.append(inst) + elif inst.kind == InstructionKind.LARGE_CONST_2: + assert inst.register is not None + small_const_part[inst.register] = None + small_const_whole[inst.register] = None + index = large_const_part.get(inst.register) + large_const_part[inst.register] = None + if index is None: + fixed.append(inst) + continue + self._fixup_large_constant_pair(fixed, index, inst) + else: + for reg in small_const_part: + if self.may_use_reg(inst, reg): + small_const_part[reg] = None + for reg in small_const_whole: + if self.may_use_reg(inst, reg): + small_const_whole[reg] = None + for reg in small_const_part: + if self.may_use_reg(inst, reg): + large_const_part[reg] = None + fixed.append(inst) + block.instructions = fixed class OptimizerX86(Optimizer): # pylint: disable = too-few-public-methods diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 55a4aece5427c2..e2ae3d988fc7ac 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -57,11 +57,12 @@ class HoleValue(enum.Enum): _PATCH_FUNCS = { # aarch64-apple-darwin: "ARM64_RELOC_BRANCH26": "patch_aarch64_26r", - "ARM64_RELOC_GOT_LOAD_PAGE21": "patch_aarch64_21r", - "ARM64_RELOC_GOT_LOAD_PAGEOFF12": "patch_aarch64_12", + "ARM64_RELOC_GOT_LOAD_PAGE21": "patch_aarch64_21rx", + "ARM64_RELOC_GOT_LOAD_PAGEOFF12": "patch_aarch64_12x", "ARM64_RELOC_PAGE21": "patch_aarch64_21r", "ARM64_RELOC_PAGEOFF12": "patch_aarch64_12", "ARM64_RELOC_UNSIGNED": "patch_64", + # custom aarch64, both darwin and linux: "CUSTOM_AARCH64_BRANCH19": "patch_aarch64_19r", "CUSTOM_AARCH64_CONST16a": "patch_aarch64_16a", "CUSTOM_AARCH64_CONST16b": "patch_aarch64_16b", @@ -70,21 +71,21 @@ class HoleValue(enum.Enum): # aarch64-pc-windows-msvc: "IMAGE_REL_ARM64_BRANCH19": "patch_aarch64_19r", "IMAGE_REL_ARM64_BRANCH26": "patch_aarch64_26r", - "IMAGE_REL_ARM64_PAGEBASE_REL21": "patch_aarch64_21r", + "IMAGE_REL_ARM64_PAGEBASE_REL21": "patch_aarch64_21rx", "IMAGE_REL_ARM64_PAGEOFFSET_12A": "patch_aarch64_12", - "IMAGE_REL_ARM64_PAGEOFFSET_12L": "patch_aarch64_12", + "IMAGE_REL_ARM64_PAGEOFFSET_12L": "patch_aarch64_12x", # i686-pc-windows-msvc: "IMAGE_REL_I386_DIR32": "patch_32", "IMAGE_REL_I386_REL32": "patch_x86_64_32rx", # aarch64-unknown-linux-gnu: "R_AARCH64_ABS64": "patch_64", "R_AARCH64_ADD_ABS_LO12_NC": "patch_aarch64_12", - "R_AARCH64_ADR_GOT_PAGE": "patch_aarch64_21r", + "R_AARCH64_ADR_GOT_PAGE": "patch_aarch64_21rx", "R_AARCH64_ADR_PREL_PG_HI21": "patch_aarch64_21r", "R_AARCH64_CALL26": "patch_aarch64_26r", "R_AARCH64_CONDBR19": "patch_aarch64_19r", "R_AARCH64_JUMP26": "patch_aarch64_26r", - "R_AARCH64_LD64_GOT_LO12_NC": "patch_aarch64_12", + "R_AARCH64_LD64_GOT_LO12_NC": "patch_aarch64_12x", "R_AARCH64_MOVW_UABS_G0_NC": "patch_aarch64_16a", "R_AARCH64_MOVW_UABS_G1_NC": "patch_aarch64_16b", "R_AARCH64_MOVW_UABS_G2_NC": "patch_aarch64_16c", @@ -165,14 +166,30 @@ class Hole: custom_location: str = "" custom_value: str = "" func: str = dataclasses.field(init=False) + offset2: int = -1 + void: bool = False # Convenience method: replace = dataclasses.replace def __post_init__(self) -> None: self.func = _PATCH_FUNCS[self.kind] + def fold(self, other: typing.Self) -> None: + """Combine two holes into a single hole.""" + assert ( + self.func == "patch_aarch64_12x" and other.func == "patch_aarch64_21rx" + ), (self.func, other.func) + assert self.value == other.value + assert self.symbol == other.symbol + assert self.addend == other.addend + self.func = "patch_aarch64_33rx" + self.offset2 = other.offset + other.void = True + def as_c(self, where: str) -> str: """Dump this hole as a call to a patch_* function.""" + if self.void: + return "" if self.custom_location: location = self.custom_location else: @@ -194,6 +211,9 @@ def as_c(self, where: str) -> str: value += f"{_signed(self.addend):#x}" if self.need_state: return f"{self.func}({location}, {value}, state);" + if self.offset2 >= 0: + first_location = f"{where} + {self.offset2:#x}" + return f"{self.func}({first_location}, {location}, {value});" return f"{self.func}({location}, {value});" @@ -238,6 +258,10 @@ class StencilGroup: _got_entries: set[int] = dataclasses.field(default_factory=set, init=False) def convert_labels_to_relocations(self) -> None: + holes_by_offset: dict[int, Hole] = {} + first_in_pair: dict[str, Hole] = {} + for hole in self.code.holes: + holes_by_offset[hole.offset] = hole for name, hole_plus in self.symbols.items(): if isinstance(name, str) and "_JIT_RELOCATION_" in name: _, offset = hole_plus @@ -247,6 +271,16 @@ def convert_labels_to_relocations(self) -> None: int(offset), typing.cast(_schema.HoleKind, reloc), value, symbol, 0 ) self.code.holes.append(hole) + elif isinstance(name, str) and "_JIT_PAIR_" in name: + _, offset = hole_plus + reloc, target, index = name.split("_JIT_PAIR_") + if offset in holes_by_offset: + hole = holes_by_offset[offset] + if "33a" in reloc: + first_in_pair[index] = hole + elif "33b" in reloc and index in first_in_pair: + first = first_in_pair[index] + hole.fold(first) def process_relocations(self, known_symbols: dict[str, int]) -> None: """Fix up all GOT and internal relocations for this stencil group.""" diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index ea0a9722c3cdf8..fd5c143b8a812f 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -208,6 +208,9 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: ) ) tasks = [] + # If you need to see the generated assembly files, + # uncomment line below (and comment out line below that) + # with tempfile.TemporaryDirectory("-stencils-assembly", delete=False) as tempdir: with tempfile.TemporaryDirectory() as tempdir: work = pathlib.Path(tempdir).resolve() async with asyncio.TaskGroup() as group: