-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64][GISel] Don't crash in known-bits when copying from vectors to non-vectors #168081
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-backend-aarch64 Author: Nathan Corbyn (cofibrant) ChangesUpdates the demanded elements before recursing through copies in case the type of the source register changes from a non-vector register to a vector register. Fixes #167842. Full diff: https://github.com/llvm/llvm-project/pull/168081.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index c1fb8b6d78ff8..ecba323f8d6bf 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -247,6 +247,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
const MachineOperand &Src = MI.getOperand(Idx);
Register SrcReg = Src.getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
// Look through trivial copies and phis but don't look through trivial
// copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
// analysis is currently unable to determine the bit width of a
@@ -255,9 +256,15 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
// We can't use NoSubRegister by name as it's defined by each target but
// it's always defined to be 0 by tablegen.
if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
- MRI.getType(SrcReg).isValid()) {
+ SrcTy.isValid()) {
+ // In case we're forwarding from a vector register to a non-vector
+ // register we need to update the demanded elements to reflect this
+ // before recursing.
+ APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
+ ? APInt::getAllOnes(SrcTy.getNumElements())
+ : DemandedElts; // Known to be APInt(1, 1)
// For COPYs we don't do anything, don't increase the depth.
- computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
+ computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
Depth + (Opcode != TargetOpcode::COPY));
Known2 = Known2.anyextOrTrunc(BitWidth);
Known = Known.intersectWith(Known2);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
new file mode 100644
index 0000000000000..76975e334e00b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -O3 -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-unknown"
+
+; Check we don't crash here when computing known bits.
+
+define <4 x i32> @test(<8 x i16> %in, i1 %continue) {
+; CHECK-LABEL: test:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: mov w9, wzr
+; CHECK-NEXT: .LBB0_1: // %loop
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: mov w8, w9
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: str q0, [sp]
+; CHECK-NEXT: bfi x9, x8, #1, #3
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: ldrh w9, [x9]
+; CHECK-NEXT: tst w9, #0xff
+; CHECK-NEXT: cset w9, eq
+; CHECK-NEXT: mov v1.h[0], w9
+; CHECK-NEXT: xtn v1.8b, v1.8h
+; CHECK-NEXT: fmov w9, s1
+; CHECK-NEXT: tbz w0, #0, .LBB0_1
+; CHECK-NEXT: // %bb.2: // %exit
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: mov v0.s[0], w8
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+entry:
+ br label %loop
+
+exit:
+ %result = insertelement <4 x i32> zeroinitializer, i32 %index, i64 0
+ ret <4 x i32> %result
+
+loop:
+ %index = phi i32 [ 0, %entry ], [ %insert.bitcast, %loop ]
+ %extracted = extractelement <8 x i16> %in, i32 %index
+ %masked = and i16 %extracted, 255
+ %maskedIsZero = icmp eq i16 %masked, 0
+ %maskedIsZero.zext = zext i1 %maskedIsZero to i8
+ %insert = insertelement <4 x i8> zeroinitializer, i8 %maskedIsZero.zext, i64 0
+ %insert.bitcast = bitcast <4 x i8> %insert to i32
+ br i1 %continue, label %exit, label %loop
+}
|
|
@llvm/pr-subscribers-llvm-globalisel Author: Nathan Corbyn (cofibrant) ChangesUpdates the demanded elements before recursing through copies in case the type of the source register changes from a non-vector register to a vector register. Fixes #167842. Full diff: https://github.com/llvm/llvm-project/pull/168081.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index c1fb8b6d78ff8..ecba323f8d6bf 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -247,6 +247,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
const MachineOperand &Src = MI.getOperand(Idx);
Register SrcReg = Src.getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
// Look through trivial copies and phis but don't look through trivial
// copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
// analysis is currently unable to determine the bit width of a
@@ -255,9 +256,15 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
// We can't use NoSubRegister by name as it's defined by each target but
// it's always defined to be 0 by tablegen.
if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
- MRI.getType(SrcReg).isValid()) {
+ SrcTy.isValid()) {
+ // In case we're forwarding from a vector register to a non-vector
+ // register we need to update the demanded elements to reflect this
+ // before recursing.
+ APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
+ ? APInt::getAllOnes(SrcTy.getNumElements())
+ : DemandedElts; // Known to be APInt(1, 1)
// For COPYs we don't do anything, don't increase the depth.
- computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
+ computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
Depth + (Opcode != TargetOpcode::COPY));
Known2 = Known2.anyextOrTrunc(BitWidth);
Known = Known.intersectWith(Known2);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
new file mode 100644
index 0000000000000..76975e334e00b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -O3 -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-unknown"
+
+; Check we don't crash here when computing known bits.
+
+define <4 x i32> @test(<8 x i16> %in, i1 %continue) {
+; CHECK-LABEL: test:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: mov w9, wzr
+; CHECK-NEXT: .LBB0_1: // %loop
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: mov w8, w9
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: str q0, [sp]
+; CHECK-NEXT: bfi x9, x8, #1, #3
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: ldrh w9, [x9]
+; CHECK-NEXT: tst w9, #0xff
+; CHECK-NEXT: cset w9, eq
+; CHECK-NEXT: mov v1.h[0], w9
+; CHECK-NEXT: xtn v1.8b, v1.8h
+; CHECK-NEXT: fmov w9, s1
+; CHECK-NEXT: tbz w0, #0, .LBB0_1
+; CHECK-NEXT: // %bb.2: // %exit
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: mov v0.s[0], w8
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+entry:
+ br label %loop
+
+exit:
+ %result = insertelement <4 x i32> zeroinitializer, i32 %index, i64 0
+ ret <4 x i32> %result
+
+loop:
+ %index = phi i32 [ 0, %entry ], [ %insert.bitcast, %loop ]
+ %extracted = extractelement <8 x i16> %in, i32 %index
+ %masked = and i16 %extracted, 255
+ %maskedIsZero = icmp eq i16 %masked, 0
+ %maskedIsZero.zext = zext i1 %maskedIsZero to i8
+ %insert = insertelement <4 x i8> zeroinitializer, i8 %maskedIsZero.zext, i64 0
+ %insert.bitcast = bitcast <4 x i8> %insert to i32
+ br i1 %continue, label %exit, label %loop
+}
|
| @@ -0,0 +1,50 @@ | |||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 | |||
| ; RUN: llc -O3 -o - %s | FileCheck %s | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You need -global-isel for this test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also probably don't need -O3
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch 😅 Thanks! (We need at least -O1 for the crash to trigger. Any preference?)
aemerson
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Assuming the test is fixed, LGTM.
| @@ -0,0 +1,50 @@ | |||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 | |||
| ; RUN: llc -O3 -o - %s | FileCheck %s | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also probably don't need -O3
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 | ||
| ; RUN: llc -O3 -o - %s | FileCheck %s | ||
|
|
||
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" |
Updates the demanded elements before recursing through copies in case the type of the source register changes from a non-vector register to a vector register.
Fixes #167842.