diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 7bff13d59528c..f17b94a4bd5a9 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6474,10 +6474,27 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, const CallBase *Call) { unsigned BitWidth = ReturnType->getScalarSizeInBits(); switch (IID) { - case Intrinsic::get_active_lane_mask: + case Intrinsic::get_active_lane_mask: { if (match(Op1, m_Zero())) return ConstantInt::getFalse(ReturnType); + + const Function *F = Call->getFunction(); + auto *ScalableTy = dyn_cast(ReturnType); + Attribute Attr = F->getFnAttribute(Attribute::VScaleRange); + if (ScalableTy && Attr.isValid()) { + std::optional VScaleMax = Attr.getVScaleRangeMax(); + if (!VScaleMax) + break; + uint64_t MaxPossibleMaskElements = + (uint64_t)ScalableTy->getMinNumElements() * (*VScaleMax); + + const APInt *Op1Val; + if (match(Op0, m_Zero()) && match(Op1, m_APInt(Op1Val)) && + Op1Val->uge(MaxPossibleMaskElements)) + return ConstantInt::getAllOnesValue(ReturnType); + } break; + } case Intrinsic::abs: // abs(abs(x)) -> abs(x). We don't need to worry about the nsw arg here. // It is always ok to pick the earlier abs. We'll just lose nsw if its only diff --git a/llvm/test/Transforms/InstSimplify/get_active_lane_mask.ll b/llvm/test/Transforms/InstSimplify/get_active_lane_mask.ll index a3b8e4efbe939..180012a4e8211 100644 --- a/llvm/test/Transforms/InstSimplify/get_active_lane_mask.ll +++ b/llvm/test/Transforms/InstSimplify/get_active_lane_mask.ll @@ -18,3 +18,51 @@ define @foo_nxv8i1(i32 %a) { %mask = call @llvm.get.active.lane.mask.nxv8i1(i32 %a, i32 0) ret %mask } + +define @foo_vscale_max_255() vscale_range(1,16) { +; CHECK-LABEL: define @foo_vscale_max_255( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 255) +; CHECK-NEXT: ret [[MASK]] +; + %mask = call @llvm.get.active.lane.mask.nxv16i1(i32 0, i32 255) + ret %mask +} + +define @foo_vscale_max_256() vscale_range(1,16) { +; CHECK-LABEL: define @foo_vscale_max_256( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: ret splat (i1 true) +; + %mask = call @llvm.get.active.lane.mask.nxv16i1(i32 0, i32 256) + ret %mask +} + +define @foo_vscale_max_nxv2i1_1_1_2() vscale_range(1,1) { +; CHECK-LABEL: define @foo_vscale_max_nxv2i1_1_1_2( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: ret splat (i1 true) +; + %mask = call @llvm.get.active.lane.mask.nxv2i1(i32 0, i32 2) + ret %mask +} + +define @foo_vscale_max_nxv4i1_2_4_16() vscale_range(2,4) { +; CHECK-LABEL: define @foo_vscale_max_nxv4i1_2_4_16( +; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: ret splat (i1 true) +; + %mask = call @llvm.get.active.lane.mask.nxv4i1(i128 0, i128 16) + ret %mask +} + +define @foo_vscale_max_nxv4i1_2_4_1_16() vscale_range(2,4) { +; CHECK-LABEL: define @foo_vscale_max_nxv4i1_2_4_1_16( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i128(i128 1, i128 16) +; CHECK-NEXT: ret [[MASK]] +; + %mask = call @llvm.get.active.lane.mask.nxv4i1(i128 1, i128 16) + ret %mask +} + diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index c7218ef249a9d..d2ef01028a2d6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -52,11 +52,10 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[ENTRY:.*]]: ; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFCOMMON-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 -; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]] ; TFCOMMON: [[VECTOR_BODY]]: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] ; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) ; TFCOMMON-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) @@ -77,12 +76,11 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: ; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() @@ -181,11 +179,10 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[ENTRY:.*]]: ; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFCOMMON-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 -; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]] ; TFCOMMON: [[VECTOR_BODY]]: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) ; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) @@ -209,12 +206,11 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: ; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() @@ -335,11 +331,10 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[ENTRY:.*]]: ; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFCOMMON-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 -; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]] ; TFCOMMON: [[VECTOR_BODY]]: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) ; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) @@ -366,12 +361,11 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: ; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() @@ -613,11 +607,10 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-NEXT: [[ENTRY:.*]]: ; TFALWAYS-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFALWAYS-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 -; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFALWAYS-NEXT: br label %[[VECTOR_BODY:.*]] ; TFALWAYS: [[VECTOR_BODY]]: ; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFALWAYS-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] ; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) ; TFALWAYS-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) @@ -636,11 +629,10 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[ENTRY:.*]]: ; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFFALLBACK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 -; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFFALLBACK-NEXT: br label %[[VECTOR_BODY:.*]] ; TFFALLBACK: [[VECTOR_BODY]]: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFFALLBACK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] ; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) ; TFFALLBACK-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) @@ -661,12 +653,11 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: ; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() @@ -770,13 +761,12 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFALWAYS-NEXT: [[ENTRY:.*]]: ; TFALWAYS-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFALWAYS-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 -; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFALWAYS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M]], i64 0 ; TFALWAYS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TFALWAYS-NEXT: br label %[[VECTOR_BODY:.*]] ; TFALWAYS: [[VECTOR_BODY]]: ; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFALWAYS-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; TFALWAYS-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] ; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) @@ -800,13 +790,12 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFFALLBACK-NEXT: [[ENTRY:.*]]: ; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFFALLBACK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 -; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFFALLBACK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M]], i64 0 ; TFFALLBACK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TFFALLBACK-NEXT: br label %[[VECTOR_BODY:.*]] ; TFFALLBACK: [[VECTOR_BODY]]: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFFALLBACK-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; TFFALLBACK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] ; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) @@ -832,14 +821,13 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M]], i64 0 ; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: ; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vfabi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vfabi.ll index c3ace983fd911..7628b39cf4eb7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vfabi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vfabi.ll @@ -7,21 +7,20 @@ define void @test_big_little_params(ptr readonly %a, ptr readonly %b, ptr noalia ; CHECK-LABEL: define void @test_big_little_params ; CHECK-SAME: (ptr readonly [[A:%.*]], ptr readonly [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 1025) +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP0]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4i8.p0(ptr [[TMP1]], i32 1, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP2:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP2]], ptr [[TMP3]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP2]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4i8.p0(ptr [[TMP3]], i32 1, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP4:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]], [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP4]], ptr [[TMP5]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_BODY]], label [[EXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] @@ -52,21 +51,20 @@ define void @test_little_big_params(ptr readonly %a, ptr readonly %b, ptr noalia ; CHECK-LABEL: define void @test_little_big_params ; CHECK-SAME: (ptr readonly [[A:%.*]], ptr readonly [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 1 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f32.p0(ptr [[TMP0]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP1]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP2:%.*]] = call @bar_vector( [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[INDEX]] -; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP2]], ptr [[TMP3]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ splat (i1 true), [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f32.p0(ptr [[TMP2]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP3]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP4:%.*]] = call @bar_vector( [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]], [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP4]], ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]]