diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 265493eb105a0..9002452204480 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -826,6 +826,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase { enum class OperationType : unsigned char { Cmp, OverflowingBinOp, + DisjointOp, PossiblyExactOp, GEPOp, FPMathOp, @@ -842,6 +843,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase { }; private: + struct DisjointFlagsTy { + char IsDisjoint : 1; + }; struct ExactFlagsTy { char IsExact : 1; }; @@ -868,6 +872,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase { union { CmpInst::Predicate CmpPredicate; WrapFlagsTy WrapFlags; + DisjointFlagsTy DisjointFlags; ExactFlagsTy ExactFlags; GEPFlagsTy GEPFlags; NonNegFlagsTy NonNegFlags; @@ -889,6 +894,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase { if (auto *Op = dyn_cast(&I)) { OpType = OperationType::Cmp; CmpPredicate = Op->getPredicate(); + } else if (auto *Op = dyn_cast(&I)) { + OpType = OperationType::DisjointOp; + DisjointFlags.IsDisjoint = Op->isDisjoint(); } else if (auto *Op = dyn_cast(&I)) { OpType = OperationType::OverflowingBinOp; WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()}; @@ -942,6 +950,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase { WrapFlags.HasNUW = false; WrapFlags.HasNSW = false; break; + case OperationType::DisjointOp: + DisjointFlags.IsDisjoint = false; + break; case OperationType::PossiblyExactOp: ExactFlags.IsExact = false; break; @@ -968,6 +979,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase { I->setHasNoUnsignedWrap(WrapFlags.HasNUW); I->setHasNoSignedWrap(WrapFlags.HasNSW); break; + case OperationType::DisjointOp: + cast(I)->setIsDisjoint(DisjointFlags.IsDisjoint); + break; case OperationType::PossiblyExactOp: I->setIsExact(ExactFlags.IsExact); break; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 462cd201b6fcc..1891b211a3566 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -635,6 +635,10 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const { case OperationType::Cmp: O << " " << CmpInst::getPredicateName(getPredicate()); break; + case OperationType::DisjointOp: + if (DisjointFlags.IsDisjoint) + O << " disjoint"; + break; case OperationType::PossiblyExactOp: if (ExactFlags.IsExact) O << " exact"; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index b09960f29f00b..0b5058cff8d5c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -122,7 +122,7 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, [[VEC_IND]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[TMP4]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP5:%.*]] = or [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP5:%.*]] = or disjoint [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, [[TMP5]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[TMP6]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) ; CHECK-NEXT: [[TMP7:%.*]] = sext [[WIDE_MASKED_GATHER]] to @@ -217,7 +217,7 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = or [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP7:%.*]] = or disjoint [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP8:%.*]] = add nsw [[TMP5]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = trunc [[TMP8]] to ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr @CD_i16, i64 0, [[VEC_IND]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index d82d9a2d10cd8..bf2b9e2aef85a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -24,7 +24,7 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP12]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> , <16 x ptr> [[TMP13]], i32 16, <16 x i1> ) -; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i64> [[VEC_IND3]], +; CHECK-NEXT: [[TMP14:%.*]] = or disjoint <16 x i64> [[VEC_IND3]], ; CHECK-NEXT: [[TMP15:%.*]] = add nsw <16 x i64> [[TMP10]], [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP15]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> , <16 x ptr> [[TMP16]], i32 8, <16 x i1> ) diff --git a/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll b/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll index cbe22fe8a905d..66509ffedd68b 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll @@ -18,7 +18,7 @@ define void @generate_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = or disjoint <4 x i32> [[WIDE_LOAD]], ; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[WIDE_LOAD]], ; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP3]], [[TMP4]] ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP2]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll index 3d951da9feb27..2a2d55fb75c53 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll @@ -195,7 +195,7 @@ for.end: ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 2 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP11]], i32 3 ; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP15]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP17:%.*]] = or <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP17:%.*]] = or disjoint <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP17]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 5a7c3dcc8b686..f129cf3e2a1aa 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -773,7 +773,7 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%gep.x> -; CHECK-NEXT: WIDEN ir<%or.1> = or ir<%lv>, ir<1> +; CHECK-NEXT: WIDEN ir<%or.1> = or disjoint ir<%lv>, ir<1> ; CHECK-NEXT: WIDEN ir<%or.2> = or ir<%lv>, ir<3> ; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%or.1>, ir<%or.2> ; CHECK-NEXT: WIDEN store ir<%gep.x>, ir<%add>