Skip to content

Conversation

@cofibrant
Copy link
Contributor

Updates the demanded elements before recursing through copies in case the type of the source register changes from a non-vector register to a vector register.

Fixes #167842.

@llvmbot
Copy link
Member

llvmbot commented Nov 14, 2025

@llvm/pr-subscribers-backend-aarch64

Author: Nathan Corbyn (cofibrant)

Changes

Updates the demanded elements before recursing through copies in case the type of the source register changes from a non-vector register to a vector register.

Fixes #167842.


Full diff: https://github.com/llvm/llvm-project/pull/168081.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp (+9-2)
  • (added) llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll (+50)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index c1fb8b6d78ff8..ecba323f8d6bf 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -247,6 +247,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
       const MachineOperand &Src = MI.getOperand(Idx);
       Register SrcReg = Src.getReg();
+      LLT SrcTy = MRI.getType(SrcReg);
       // Look through trivial copies and phis but don't look through trivial
       // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
       // analysis is currently unable to determine the bit width of a
@@ -255,9 +256,15 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
       // We can't use NoSubRegister by name as it's defined by each target but
       // it's always defined to be 0 by tablegen.
       if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
-          MRI.getType(SrcReg).isValid()) {
+          SrcTy.isValid()) {
+        // In case we're forwarding from a vector register to a non-vector
+        // register we need to update the demanded elements to reflect this
+        // before recursing.
+        APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
+                                    ? APInt::getAllOnes(SrcTy.getNumElements())
+                                    : DemandedElts; // Known to be APInt(1, 1)
         // For COPYs we don't do anything, don't increase the depth.
-        computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
+        computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
                              Depth + (Opcode != TargetOpcode::COPY));
         Known2 = Known2.anyextOrTrunc(BitWidth);
         Known = Known.intersectWith(Known2);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
new file mode 100644
index 0000000000000..76975e334e00b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -O3 -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-unknown"
+
+; Check we don't crash here when computing known bits.
+
+define <4 x i32> @test(<8 x i16> %in, i1 %continue) {
+; CHECK-LABEL: test:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    mov w9, wzr
+; CHECK-NEXT:  .LBB0_1: // %loop
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    mov w8, w9
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    str q0, [sp]
+; CHECK-NEXT:    bfi x9, x8, #1, #3
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ldrh w9, [x9]
+; CHECK-NEXT:    tst w9, #0xff
+; CHECK-NEXT:    cset w9, eq
+; CHECK-NEXT:    mov v1.h[0], w9
+; CHECK-NEXT:    xtn v1.8b, v1.8h
+; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    tbz w0, #0, .LBB0_1
+; CHECK-NEXT:  // %bb.2: // %exit
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.s[0], w8
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    ret
+entry:
+  br label %loop
+
+exit:
+  %result = insertelement <4 x i32> zeroinitializer, i32 %index, i64 0
+  ret <4 x i32> %result
+
+loop:
+  %index = phi i32 [ 0, %entry ], [ %insert.bitcast, %loop ]
+  %extracted = extractelement <8 x i16> %in, i32 %index
+  %masked = and i16 %extracted, 255
+  %maskedIsZero = icmp eq i16 %masked, 0
+  %maskedIsZero.zext = zext i1 %maskedIsZero to i8
+  %insert = insertelement <4 x i8> zeroinitializer, i8 %maskedIsZero.zext, i64 0
+  %insert.bitcast = bitcast <4 x i8> %insert to i32
+  br i1 %continue, label %exit, label %loop
+}

@llvmbot
Copy link
Member

llvmbot commented Nov 14, 2025

@llvm/pr-subscribers-llvm-globalisel

Author: Nathan Corbyn (cofibrant)

Changes

Updates the demanded elements before recursing through copies in case the type of the source register changes from a non-vector register to a vector register.

Fixes #167842.


Full diff: https://github.com/llvm/llvm-project/pull/168081.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp (+9-2)
  • (added) llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll (+50)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index c1fb8b6d78ff8..ecba323f8d6bf 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -247,6 +247,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
       const MachineOperand &Src = MI.getOperand(Idx);
       Register SrcReg = Src.getReg();
+      LLT SrcTy = MRI.getType(SrcReg);
       // Look through trivial copies and phis but don't look through trivial
       // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
       // analysis is currently unable to determine the bit width of a
@@ -255,9 +256,15 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
       // We can't use NoSubRegister by name as it's defined by each target but
       // it's always defined to be 0 by tablegen.
       if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
-          MRI.getType(SrcReg).isValid()) {
+          SrcTy.isValid()) {
+        // In case we're forwarding from a vector register to a non-vector
+        // register we need to update the demanded elements to reflect this
+        // before recursing.
+        APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
+                                    ? APInt::getAllOnes(SrcTy.getNumElements())
+                                    : DemandedElts; // Known to be APInt(1, 1)
         // For COPYs we don't do anything, don't increase the depth.
-        computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
+        computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
                              Depth + (Opcode != TargetOpcode::COPY));
         Known2 = Known2.anyextOrTrunc(BitWidth);
         Known = Known.intersectWith(Known2);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
new file mode 100644
index 0000000000000..76975e334e00b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -O3 -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-unknown"
+
+; Check we don't crash here when computing known bits.
+
+define <4 x i32> @test(<8 x i16> %in, i1 %continue) {
+; CHECK-LABEL: test:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    mov w9, wzr
+; CHECK-NEXT:  .LBB0_1: // %loop
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    mov w8, w9
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    str q0, [sp]
+; CHECK-NEXT:    bfi x9, x8, #1, #3
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ldrh w9, [x9]
+; CHECK-NEXT:    tst w9, #0xff
+; CHECK-NEXT:    cset w9, eq
+; CHECK-NEXT:    mov v1.h[0], w9
+; CHECK-NEXT:    xtn v1.8b, v1.8h
+; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    tbz w0, #0, .LBB0_1
+; CHECK-NEXT:  // %bb.2: // %exit
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.s[0], w8
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    ret
+entry:
+  br label %loop
+
+exit:
+  %result = insertelement <4 x i32> zeroinitializer, i32 %index, i64 0
+  ret <4 x i32> %result
+
+loop:
+  %index = phi i32 [ 0, %entry ], [ %insert.bitcast, %loop ]
+  %extracted = extractelement <8 x i16> %in, i32 %index
+  %masked = and i16 %extracted, 255
+  %maskedIsZero = icmp eq i16 %masked, 0
+  %maskedIsZero.zext = zext i1 %maskedIsZero to i8
+  %insert = insertelement <4 x i8> zeroinitializer, i8 %maskedIsZero.zext, i64 0
+  %insert.bitcast = bitcast <4 x i8> %insert to i32
+  br i1 %continue, label %exit, label %loop
+}

@@ -0,0 +1,50 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -O3 -o - %s | FileCheck %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need -global-isel for this test.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also probably don't need -O3

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch 😅 Thanks! (We need at least -O1 for the crash to trigger. Any preference?)

Copy link
Contributor

@aemerson aemerson left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assuming the test is fixed, LGTM.

@@ -0,0 +1,50 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -O3 -o - %s | FileCheck %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also probably don't need -O3

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -O3 -o - %s | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

4 participants