diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 64a515270fd57..5cbcb017f97c1 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -825,23 +825,32 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) { ElementCount EC = cast(Op0->getType())->getElementCount(); Value *EVL = VPI.getArgOperand(3); const DataLayout &DL = VPI.getModule()->getDataLayout(); - bool MustHaveNonZeroVL = - IntrID == Intrinsic::vp_sdiv || IntrID == Intrinsic::vp_udiv || - IntrID == Intrinsic::vp_srem || IntrID == Intrinsic::vp_urem; - - if (!MustHaveNonZeroVL || isKnownNonZero(EVL, DL, 0, &AC, &VPI, &DT)) { - Value *ScalarOp0 = getSplatValue(Op0); - Value *ScalarOp1 = getSplatValue(Op1); - Value *ScalarVal = - ScalarIntrID - ? Builder.CreateIntrinsic(VecTy->getScalarType(), *ScalarIntrID, - {ScalarOp0, ScalarOp1}) - : Builder.CreateBinOp((Instruction::BinaryOps)(*FunctionalOpcode), - ScalarOp0, ScalarOp1); - replaceValue(VPI, *Builder.CreateVectorSplat(EC, ScalarVal)); - return true; - } - return false; + + // If the VP op might introduce UB or poison, we can scalarize it provided + // that we know the EVL > 0: If the EVL is zero, then the original VP op + // becomes a no-op and thus won't be UB, so make sure we don't introduce UB by + // scalarizing it. + bool SafeToSpeculate; + if (ScalarIntrID) + SafeToSpeculate = Intrinsic::getAttributes(I.getContext(), *ScalarIntrID) + .hasFnAttr(Attribute::AttrKind::Speculatable); + else + SafeToSpeculate = isSafeToSpeculativelyExecuteWithOpcode( + *FunctionalOpcode, &VPI, nullptr, &AC, &DT); + if (!SafeToSpeculate && !isKnownNonZero(EVL, DL, 0, &AC, &VPI, &DT)) + return false; + + Value *ScalarOp0 = getSplatValue(Op0); + Value *ScalarOp1 = getSplatValue(Op1); + Value *ScalarVal = + ScalarIntrID + ? Builder.CreateIntrinsic(VecTy->getScalarType(), *ScalarIntrID, + {ScalarOp0, ScalarOp1}) + : Builder.CreateBinOp((Instruction::BinaryOps)(*FunctionalOpcode), + ScalarOp0, ScalarOp1); + + replaceValue(VPI, *Builder.CreateVectorSplat(EC, ScalarVal)); + return true; } /// Match a vector binop or compare instruction with at least one inserted diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll index da183a6b14bc6..e95aea4eb487b 100644 --- a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll @@ -166,14 +166,23 @@ define @mul_nxv1i64_anymask( %x, i64 %y, @sdiv_nxv1i64_allonesmask( %x, i64 %y, i32 zeroext %evl) { -; ALL-LABEL: @sdiv_nxv1i64_allonesmask( -; ALL-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 -; ALL-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer -; ALL-NEXT: [[TMP1:%.*]] = insertelement poison, i64 [[Y:%.*]], i64 0 -; ALL-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call @llvm.vp.sdiv.nxv1i64( [[TMP2]], shufflevector ( insertelement ( poison, i64 42, i32 0), poison, zeroinitializer), [[MASK]], i32 [[EVL:%.*]]) -; ALL-NEXT: [[TMP4:%.*]] = call @llvm.vp.mul.nxv1i64( [[X:%.*]], [[TMP3]], [[MASK]], i32 [[EVL]]) -; ALL-NEXT: ret [[TMP4]] +; VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask( +; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP1:%.*]] = sdiv i64 [[Y:%.*]], 42 +; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 +; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.mul.nxv1i64( [[X:%.*]], [[TMP2]], [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-NEXT: ret [[TMP3]] +; +; NO-VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask( +; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement poison, i64 [[Y:%.*]], i64 0 +; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.sdiv.nxv1i64( [[TMP2]], shufflevector ( insertelement ( poison, i64 42, i32 0), poison, zeroinitializer), [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call @llvm.vp.mul.nxv1i64( [[X:%.*]], [[TMP3]], [[MASK]], i32 [[EVL]]) +; NO-VEC-COMBINE-NEXT: ret [[TMP4]] ; %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -221,14 +230,23 @@ define @sdiv_nxv1i64_unspeculatable(i64 %x, i64 %y, i32 zeroe } define @udiv_nxv1i64_allonesmask( %x, i64 %y, i32 zeroext %evl) { -; ALL-LABEL: @udiv_nxv1i64_allonesmask( -; ALL-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 -; ALL-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer -; ALL-NEXT: [[TMP1:%.*]] = insertelement poison, i64 [[Y:%.*]], i64 0 -; ALL-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call @llvm.vp.udiv.nxv1i64( [[TMP2]], shufflevector ( insertelement ( poison, i64 42, i32 0), poison, zeroinitializer), [[MASK]], i32 [[EVL:%.*]]) -; ALL-NEXT: [[TMP4:%.*]] = call @llvm.vp.mul.nxv1i64( [[X:%.*]], [[TMP3]], [[MASK]], i32 [[EVL]]) -; ALL-NEXT: ret [[TMP4]] +; VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask( +; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP1:%.*]] = udiv i64 [[Y:%.*]], 42 +; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 +; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.mul.nxv1i64( [[X:%.*]], [[TMP2]], [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-NEXT: ret [[TMP3]] +; +; NO-VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask( +; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement poison, i64 [[Y:%.*]], i64 0 +; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.udiv.nxv1i64( [[TMP2]], shufflevector ( insertelement ( poison, i64 42, i32 0), poison, zeroinitializer), [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call @llvm.vp.mul.nxv1i64( [[X:%.*]], [[TMP3]], [[MASK]], i32 [[EVL]]) +; NO-VEC-COMBINE-NEXT: ret [[TMP4]] ; %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -276,14 +294,23 @@ define @udiv_nxv1i64_unspeculatable(i64 %x, i64 %y, i32 zeroe } define @srem_nxv1i64_allonesmask( %x, i64 %y, i32 zeroext %evl) { -; ALL-LABEL: @srem_nxv1i64_allonesmask( -; ALL-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 -; ALL-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer -; ALL-NEXT: [[TMP1:%.*]] = insertelement poison, i64 [[Y:%.*]], i64 0 -; ALL-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call @llvm.vp.srem.nxv1i64( [[TMP2]], shufflevector ( insertelement ( poison, i64 42, i32 0), poison, zeroinitializer), [[MASK]], i32 [[EVL:%.*]]) -; ALL-NEXT: [[TMP4:%.*]] = call @llvm.vp.mul.nxv1i64( [[X:%.*]], [[TMP3]], [[MASK]], i32 [[EVL]]) -; ALL-NEXT: ret [[TMP4]] +; VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask( +; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP1:%.*]] = srem i64 [[Y:%.*]], 42 +; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 +; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.mul.nxv1i64( [[X:%.*]], [[TMP2]], [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-NEXT: ret [[TMP3]] +; +; NO-VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask( +; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement poison, i64 [[Y:%.*]], i64 0 +; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.srem.nxv1i64( [[TMP2]], shufflevector ( insertelement ( poison, i64 42, i32 0), poison, zeroinitializer), [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call @llvm.vp.mul.nxv1i64( [[X:%.*]], [[TMP3]], [[MASK]], i32 [[EVL]]) +; NO-VEC-COMBINE-NEXT: ret [[TMP4]] ; %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -331,14 +358,23 @@ define @srem_nxv1i64_unspeculatable(i64 %x, i64 %y, i32 zeroe } define @urem_nxv1i64_allonesmask( %x, i64 %y, i32 zeroext %evl) { -; ALL-LABEL: @urem_nxv1i64_allonesmask( -; ALL-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 -; ALL-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer -; ALL-NEXT: [[TMP1:%.*]] = insertelement poison, i64 [[Y:%.*]], i64 0 -; ALL-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call @llvm.vp.urem.nxv1i64( [[TMP2]], shufflevector ( insertelement ( poison, i64 42, i32 0), poison, zeroinitializer), [[MASK]], i32 [[EVL:%.*]]) -; ALL-NEXT: [[TMP4:%.*]] = call @llvm.vp.mul.nxv1i64( [[X:%.*]], [[TMP3]], [[MASK]], i32 [[EVL]]) -; ALL-NEXT: ret [[TMP4]] +; VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask( +; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP1:%.*]] = urem i64 [[Y:%.*]], 42 +; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 +; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.mul.nxv1i64( [[X:%.*]], [[TMP2]], [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-NEXT: ret [[TMP3]] +; +; NO-VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask( +; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement poison, i64 [[Y:%.*]], i64 0 +; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.urem.nxv1i64( [[TMP2]], shufflevector ( insertelement ( poison, i64 42, i32 0), poison, zeroinitializer), [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call @llvm.vp.mul.nxv1i64( [[X:%.*]], [[TMP3]], [[MASK]], i32 [[EVL]]) +; NO-VEC-COMBINE-NEXT: ret [[TMP4]] ; %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer