From 8838d7fbd624e6af9848bd782a6d0a72a8089cd4 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 8 Dec 2025 09:48:04 +0000 Subject: [PATCH] [AArch64][GlobalISel] Fix creation of incorrect COPY gpr32, grp32.sub_32 In selecting a G_STORE with i8 memty and i16 type, we would try to generate a %6:gpr32 = COPY %3.sub_32:gpr32all instruction. Make sure we only add the subreg if the Ty is larger than 32bits and the subreg is needed. --- .../GISel/AArch64InstructionSelector.cpp | 2 + .../CodeGen/AArch64/load-store-forwarding.ll | 103 ++++++++++++++---- 2 files changed, 82 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index f9db39e5f8622..88f1778680efe 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -3015,6 +3015,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return false; // Generate a subreg copy. + if (RB.getID() == AArch64::GPRRegBankID && ValTy.getSizeInBits() < 32) + SubReg = 0; auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {}) .addReg(ValReg, 0, SubReg) .getReg(0); diff --git a/llvm/test/CodeGen/AArch64/load-store-forwarding.ll b/llvm/test/CodeGen/AArch64/load-store-forwarding.ll index 02efbe9b409de..f1635239b146b 100644 --- a/llvm/test/CodeGen/AArch64/load-store-forwarding.ll +++ b/llvm/test/CodeGen/AArch64/load-store-forwarding.ll @@ -1,8 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck %s --check-prefix CHECK-BE -; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s --check-prefix CHECK-LE +; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s --check-prefixes CHECK-LE,CHECK-SD +; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck %s --check-prefixes CHECK-BE +; RUN: llc -mtriple=aarch64 -global-isel -o - %s | FileCheck %s --check-prefixes CHECK-LE,CHECK-GI define i8 @test1(i32 %a, ptr %pa) { +; CHECK-SD-LABEL: test1: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: str w0, [x1] +; CHECK-SD-NEXT: ret +; ; CHECK-BE-LABEL: test1: ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: mov w8, w0 @@ -10,27 +16,28 @@ define i8 @test1(i32 %a, ptr %pa) { ; CHECK-BE-NEXT: str w8, [x1] ; CHECK-BE-NEXT: ret ; -; CHECK-LE-LABEL: test1: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: str w0, [x1] -; CHECK-LE-NEXT: ret +; CHECK-GI-LABEL: test1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str w0, [x1] +; CHECK-GI-NEXT: and w0, w0, #0xff +; CHECK-GI-NEXT: ret store i32 %a, ptr %pa %res = load i8, ptr %pa ret i8 %res } define i8 @test2(i32 %a, ptr %pa) { -; CHECK-BE-LABEL: test2: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: str w0, [x1] -; CHECK-BE-NEXT: ldrb w0, [x1, #1] -; CHECK-BE-NEXT: ret -; ; CHECK-LE-LABEL: test2: ; CHECK-LE: // %bb.0: ; CHECK-LE-NEXT: str w0, [x1] ; CHECK-LE-NEXT: ubfx w0, w0, #8, #8 ; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test2: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str w0, [x1] +; CHECK-BE-NEXT: ldrb w0, [x1, #1] +; CHECK-BE-NEXT: ret %p8 = getelementptr i8, ptr %pa, i32 1 store i32 %a, ptr %pa %res = load i8, ptr %p8 @@ -38,17 +45,17 @@ define i8 @test2(i32 %a, ptr %pa) { } define i8 @test3(i32 %a, ptr %pa) { -; CHECK-BE-LABEL: test3: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: str w0, [x1] -; CHECK-BE-NEXT: ldrb w0, [x1, #2] -; CHECK-BE-NEXT: ret -; ; CHECK-LE-LABEL: test3: ; CHECK-LE: // %bb.0: ; CHECK-LE-NEXT: str w0, [x1] ; CHECK-LE-NEXT: ubfx w0, w0, #16, #8 ; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test3: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str w0, [x1] +; CHECK-BE-NEXT: ldrb w0, [x1, #2] +; CHECK-BE-NEXT: ret %p8 = getelementptr i8, ptr %pa, i32 2 store i32 %a, ptr %pa %res = load i8, ptr %p8 @@ -56,18 +63,68 @@ define i8 @test3(i32 %a, ptr %pa) { } define i8 @test4(i32 %a, ptr %pa) { -; CHECK-BE-LABEL: test4: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: str w0, [x1] -; CHECK-BE-NEXT: ret -; ; CHECK-LE-LABEL: test4: ; CHECK-LE: // %bb.0: ; CHECK-LE-NEXT: str w0, [x1] ; CHECK-LE-NEXT: lsr w0, w0, #24 ; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test4: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str w0, [x1] +; CHECK-BE-NEXT: ret %p8 = getelementptr i8, ptr %pa, i32 3 store i32 %a, ptr %pa %res = load i8, ptr %p8 ret i8 %res } + +define i32 @load_i16_store_i8(ptr %p, ptr %q) { +; CHECK-SD-LABEL: load_i16_store_i8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ldrb w8, [x0] +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: strb w8, [x1] +; CHECK-SD-NEXT: ret +; +; CHECK-BE-LABEL: load_i16_store_i8: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldrb w8, [x0, #1] +; CHECK-BE-NEXT: mov w0, wzr +; CHECK-BE-NEXT: strb w8, [x1] +; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: load_i16_store_i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: strb w8, [x1] +; CHECK-GI-NEXT: ret +entry: + %l = load i16, ptr %p, align 4 + %tr = trunc i16 %l to i8 + store i8 %tr, ptr %q, align 1 + ret i32 0 +} + +define i32 @load_i16_store_i8_freeze(ptr %p, ptr %q) { +; CHECK-LE-LABEL: load_i16_store_i8_freeze: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldrh w8, [x0] +; CHECK-LE-NEXT: mov w0, wzr +; CHECK-LE-NEXT: strb w8, [x1] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: load_i16_store_i8_freeze: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldrh w8, [x0] +; CHECK-BE-NEXT: mov w0, wzr +; CHECK-BE-NEXT: strb w8, [x1] +; CHECK-BE-NEXT: ret +entry: + %l = load i16, ptr %p, align 4 + %fr = freeze i16 %l + %tr = trunc i16 %fr to i8 + store i8 %tr, ptr %q, align 1 + ret i32 0 +}