diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index c1fb8b6d78ff8..ecba323f8d6bf 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -247,6 +247,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { const MachineOperand &Src = MI.getOperand(Idx); Register SrcReg = Src.getReg(); + LLT SrcTy = MRI.getType(SrcReg); // Look through trivial copies and phis but don't look through trivial // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits // analysis is currently unable to determine the bit width of a @@ -255,9 +256,15 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, // We can't use NoSubRegister by name as it's defined by each target but // it's always defined to be 0 by tablegen. if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ && - MRI.getType(SrcReg).isValid()) { + SrcTy.isValid()) { + // In case we're forwarding from a vector register to a non-vector + // register we need to update the demanded elements to reflect this + // before recursing. + APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector() + ? APInt::getAllOnes(SrcTy.getNumElements()) + : DemandedElts; // Known to be APInt(1, 1) // For COPYs we don't do anything, don't increase the depth. - computeKnownBitsImpl(SrcReg, Known2, DemandedElts, + computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts, Depth + (Opcode != TargetOpcode::COPY)); Known2 = Known2.anyextOrTrunc(BitWidth); Known = Known.intersectWith(Known2); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll new file mode 100644 index 0000000000000..f15253682c336 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -global-isel -o - %s | FileCheck %s + +target triple = "aarch64-unknown-unknown" + +; Check we don't crash here when computing known bits. + +define <4 x i32> @test(<8 x i16> %in, i1 %continue) { +; CHECK-LABEL: test: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: mov w12, wzr +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: mov w9, #2 // =0x2 +; CHECK-NEXT: mov w10, #0 // =0x0 +; CHECK-NEXT: .LBB0_1: // %loop +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov w11, w12 +; CHECK-NEXT: mov w12, w12 +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: and x12, x12, #0x7 +; CHECK-NEXT: umull x12, w12, w9 +; CHECK-NEXT: ldrb w12, [x8, x12] +; CHECK-NEXT: cmp w12, #0 +; CHECK-NEXT: cset w12, eq +; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: mov v1.b[1], w10 +; CHECK-NEXT: mov v1.b[2], w10 +; CHECK-NEXT: mov v1.b[3], w10 +; CHECK-NEXT: fmov w12, s1 +; CHECK-NEXT: tbz w0, #0, .LBB0_1 +; CHECK-NEXT: // %bb.2: // %exit +; CHECK-NEXT: fmov s0, w11 +; CHECK-NEXT: mov v0.s[1], wzr +; CHECK-NEXT: mov v0.s[2], wzr +; CHECK-NEXT: mov v0.s[3], wzr +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret +entry: + br label %loop + +exit: + %result = insertelement <4 x i32> zeroinitializer, i32 %index, i64 0 + ret <4 x i32> %result + +loop: + %index = phi i32 [ 0, %entry ], [ %insert.bitcast, %loop ] + %extracted = extractelement <8 x i16> %in, i32 %index + %masked = and i16 %extracted, 255 + %maskedIsZero = icmp eq i16 %masked, 0 + %maskedIsZero.zext = zext i1 %maskedIsZero to i8 + %insert = insertelement <4 x i8> zeroinitializer, i8 %maskedIsZero.zext, i64 0 + %insert.bitcast = bitcast <4 x i8> %insert to i32 + br i1 %continue, label %exit, label %loop +}