Skip to content

Conversation

@davemgreen
Copy link
Collaborator

@davemgreen davemgreen commented Nov 13, 2025

We generate a ADDLV node that incorporates a vecreduce(zext) from elements of half the size. This means that we need the input type to be at least twice the size of the input.

I updated some variable names whilst I was here.

Fixes #167935

We generate a ADDLV node that incorporates a vecreduce(zext) from elements of
half the size. This means that we need the input type to be at least twice the
size of the input.
@llvmbot
Copy link
Member

llvmbot commented Nov 13, 2025

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

Changes

We generate a ADDLV node that incorporates a vecreduce(zext) from elements of half the size. This means that we need the input type to be at least twice the size of the input.

I updated some variable names whilst I was here.


Full diff: https://github.com/llvm/llvm-project/pull/167944.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp (+16-14)
  • (modified) llvm/test/CodeGen/AArch64/vecreduce-add.ll (+32)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 896eab521bfdb..29538d0f9ba1b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -435,6 +435,8 @@ bool matchExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
   Register ExtSrcReg = ExtMI->getOperand(1).getReg();
   LLT ExtSrcTy = MRI.getType(ExtSrcReg);
   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+  if (ExtSrcTy.getScalarSizeInBits() * 2 > DstTy.getScalarSizeInBits())
+    return false;
   if ((DstTy.getScalarSizeInBits() == 16 &&
        ExtSrcTy.getNumElements() % 8 == 0 && ExtSrcTy.getNumElements() < 256) ||
       (DstTy.getScalarSizeInBits() == 32 &&
@@ -492,7 +494,7 @@ void applyExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
 
   unsigned MidScalarSize = MainTy.getScalarSizeInBits() * 2;
   LLT MidScalarLLT = LLT::scalar(MidScalarSize);
-  Register zeroReg = B.buildConstant(LLT::scalar(64), 0).getReg(0);
+  Register ZeroReg = B.buildConstant(LLT::scalar(64), 0).getReg(0);
   for (unsigned I = 0; I < WorkingRegisters.size(); I++) {
     // If the number of elements is too small to build an instruction, extend
     // its size before applying addlv
@@ -508,10 +510,10 @@ void applyExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
 
     // Generate the {U/S}ADDLV instruction, whose output is always double of the
     // Src's Scalar size
-    LLT addlvTy = MidScalarSize <= 32 ? LLT::fixed_vector(4, 32)
+    LLT AddlvTy = MidScalarSize <= 32 ? LLT::fixed_vector(4, 32)
                                       : LLT::fixed_vector(2, 64);
-    Register addlvReg =
-        B.buildInstr(Opc, {addlvTy}, {WorkingRegisters[I]}).getReg(0);
+    Register AddlvReg =
+        B.buildInstr(Opc, {AddlvTy}, {WorkingRegisters[I]}).getReg(0);
 
     // The output from {U/S}ADDLV gets placed in the lowest lane of a v4i32 or
     // v2i64 register.
@@ -520,26 +522,26 @@ void applyExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
     // Therefore we have to extract/truncate the the value to the right type
     if (MidScalarSize == 32 || MidScalarSize == 64) {
       WorkingRegisters[I] = B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
-                                         {MidScalarLLT}, {addlvReg, zeroReg})
+                                         {MidScalarLLT}, {AddlvReg, ZeroReg})
                                 .getReg(0);
     } else {
-      Register extractReg = B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
-                                         {LLT::scalar(32)}, {addlvReg, zeroReg})
+      Register ExtractReg = B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
+                                         {LLT::scalar(32)}, {AddlvReg, ZeroReg})
                                 .getReg(0);
       WorkingRegisters[I] =
-          B.buildTrunc({MidScalarLLT}, {extractReg}).getReg(0);
+          B.buildTrunc({MidScalarLLT}, {ExtractReg}).getReg(0);
     }
   }
 
-  Register outReg;
+  Register OutReg;
   if (WorkingRegisters.size() > 1) {
-    outReg = B.buildAdd(MidScalarLLT, WorkingRegisters[0], WorkingRegisters[1])
+    OutReg = B.buildAdd(MidScalarLLT, WorkingRegisters[0], WorkingRegisters[1])
                  .getReg(0);
     for (unsigned I = 2; I < WorkingRegisters.size(); I++) {
-      outReg = B.buildAdd(MidScalarLLT, outReg, WorkingRegisters[I]).getReg(0);
+      OutReg = B.buildAdd(MidScalarLLT, OutReg, WorkingRegisters[I]).getReg(0);
     }
   } else {
-    outReg = WorkingRegisters[0];
+    OutReg = WorkingRegisters[0];
   }
 
   if (DstTy.getScalarSizeInBits() > MidScalarSize) {
@@ -547,9 +549,9 @@ void applyExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
     // Src's ScalarType
     B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
                                         : TargetOpcode::G_ZEXT,
-                 {DstReg}, {outReg});
+                 {DstReg}, {OutReg});
   } else {
-    B.buildCopy(DstReg, outReg);
+    B.buildCopy(DstReg, OutReg);
   }
 
   MI.eraseFromParent();
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 2d0df562b9a4b..12c13e8337e8d 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -4808,6 +4808,38 @@ define i64 @extract_scalable(<2 x i32> %0) "target-features"="+sve2" {
   ret i64 %5
 }
 
+define i32 @vecreduce_add_from_i21_zero() {
+; CHECK-SD-LABEL: vecreduce_add_from_i21_zero:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    mov w0, wzr
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vecreduce_add_from_i21_zero:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT:    addv s0, v0.4s
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
+entry:
+  %0 = zext <4 x i21> zeroinitializer to <4 x i32>
+  %1 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %0)
+  ret i32 %1
+}
+
+define i32 @vecreduce_add_from_i21(<4 x i21> %a) {
+; CHECK-LABEL: vecreduce_add_from_i21:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.4s, #31, msl #16
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %0 = zext <4 x i21> %a to <4 x i32>
+  %1 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %0)
+  ret i32 %1
+}
+
 declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1 immarg) #1
 declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>)
 declare i16 @llvm.vector.reduce.add.v24i16(<24 x i16>)

Copy link
Member

@XChy XChy left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@davemgreen davemgreen merged commit 19c1381 into llvm:main Nov 17, 2025
12 checks passed
@davemgreen davemgreen deleted the gh-gi-addvtype branch November 17, 2025 10:53
@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 17, 2025

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-gcc-ubuntu running on sie-linux-worker3 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/27583

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'SanitizerCommon-hwasan-x86_64-Linux :: Linux/soft_rss_limit_mb_test.cpp' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 2
/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/./bin/clang  --driver-mode=g++ -gline-tables-only -fsanitize=hwaddress -fuse-ld=lld -fsanitize-hwaddress-experimental-aliasing  -m64 -funwind-tables  -I/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test -ldl -O2 /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -o /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
# executed command: /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/./bin/clang --driver-mode=g++ -gline-tables-only -fsanitize=hwaddress -fuse-ld=lld -fsanitize-hwaddress-experimental-aliasing -m64 -funwind-tables -I/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test -ldl -O2 /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -o /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
# note: command had no output on stdout or stderr
# RUN: at line 5
env HWASAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=1      /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp 2>&1 | FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_1
# executed command: env HWASAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=1 /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
# note: command had no output on stdout or stderr
# executed command: FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_1
# note: command had no output on stdout or stderr
# RUN: at line 6
env HWASAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=0 not  /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp 2>&1 | FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_0 --implicit-check-not="returned null"
# executed command: env HWASAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=0 not /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
# note: command had no output on stdout or stderr
# executed command: FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_0 '--implicit-check-not=returned null'
# note: command had no output on stdout or stderr
# RUN: at line 10
env HWASAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=0:can_use_proc_maps_statm=0 not  /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp 2>&1 | FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_0 --implicit-check-not="returned null"
# executed command: env HWASAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=0:can_use_proc_maps_statm=0 not /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
# note: command had no output on stdout or stderr
# executed command: FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_0 '--implicit-check-not=returned null'
# .---command stderr------------
# | �[1m/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp:72:24: �[0m�[0;1;31merror: �[0m�[1mCHECK_MAY_RETURN_0: expected string not found in input
�[0m# | �[1m�[0m// CHECK_MAY_RETURN_0: Some of the malloc calls returned non-null:
# | �[0;1;32m                       ^
�[0m# | �[0;1;32m�[0m�[1m<stdin>:1:24: �[0m�[0;1;30mnote: �[0m�[1mscanning from here
�[0m# | �[1m�[0m[0] allocating 32 times
# | �[0;1;32m                       ^
�[0m# | �[0;1;32m�[0m�[1m<stdin>:9:54: �[0m�[0;1;30mnote: �[0m�[1mpossible intended match here
�[0m# | �[1m�[0m==987139==HINT: if you don't care about these errors you may set allocator_may_return_null=1
# | �[0;1;32m                                                     ^
�[0m# | �[0;1;32m�[0m
# | Input file: <stdin>
# | Check file: /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# | �[1m�[0m�[0;1;30m            1: �[0m�[1m�[0;1;46m[0] �[0mallocating 32 times�[0;1;46m �[0m
# | �[0;1;32mcheck:71           ^~~~~~~~~~~~~~~~~~~
�[0m# | �[0;1;32m�[0m�[0;1;32mnot:imp1       X~~~
�[0m# | �[0;1;32m�[0m�[0;1;31mcheck:72'0                            X error: no match found
�[0m# | �[0;1;31m�[0m�[0;1;30m            2: �[0m�[1m�[0;1;46m [0] �[0m
# | �[0;1;31mcheck:72'0     ~~~~~
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

5 participants