Skip to content

Commit

Permalink
[ValueTracking] Support vscale assumes for isKnownToBeAPowerOfTwo
Browse files Browse the repository at this point in the history
This patch is separated from D154953 to see what tests are affected by this
change alone according comment.
Depend on the related updating of LangRef on D155193.

Reviewed By: paulwalker-arm, nikic, david-arm
Differential Revision: https://reviews.llvm.org/D155350
  • Loading branch information
vfdff committed Jul 15, 2023
1 parent a41e7a2 commit 4d2723b
Show file tree
Hide file tree
Showing 6 changed files with 754 additions and 767 deletions.
5 changes: 5 additions & 0 deletions llvm/lib/Analysis/ValueTracking.cpp
Expand Up @@ -2011,6 +2011,11 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
return true;
if (match(V, m_Power2()))
return true;
if (Q.CxtI && match(V, m_VScale())) {
const Function *F = Q.CxtI->getFunction();
// The vscale_range indicates vscale is a power-of-two.
return F->hasFnAttribute(Attribute::VScaleRange);
}

// 1 << X is clearly a power of two if the one is not shifted off the end. If
// it is shifted off the end then the result is undefined.
Expand Down
11 changes: 5 additions & 6 deletions llvm/test/Transforms/InstCombine/rem-mul-shl.ll
Expand Up @@ -845,7 +845,8 @@ define i64 @urem_shl_vscale_range() vscale_range(1,16) {
; CHECK-LABEL: @urem_shl_vscale_range(
; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[SHIFT:%.*]] = shl nuw nsw i64 [[VSCALE]], 2
; CHECK-NEXT: [[REM:%.*]] = urem i64 1024, [[SHIFT]]
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[SHIFT]], 2047
; CHECK-NEXT: [[REM:%.*]] = and i64 [[TMP1]], 1024
; CHECK-NEXT: ret i64 [[REM]]
;
%vscale = call i64 @llvm.vscale.i64()
Expand All @@ -858,7 +859,8 @@ define i64 @urem_vscale_range() vscale_range(1,16) {
; CHECK-LABEL: @urem_vscale_range(
; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[SHIFT:%.*]] = shl nuw nsw i64 [[VSCALE]], 2
; CHECK-NEXT: [[REM:%.*]] = urem i64 1024, [[SHIFT]]
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[SHIFT]], 2047
; CHECK-NEXT: [[REM:%.*]] = and i64 [[TMP1]], 1024
; CHECK-NEXT: ret i64 [[REM]]
;
%vscale = call i64 @llvm.vscale.i64()
Expand All @@ -869,10 +871,7 @@ define i64 @urem_vscale_range() vscale_range(1,16) {

define i64 @urem_shl_vscale_out_of_range() vscale_range(1,16) {
; CHECK-LABEL: @urem_shl_vscale_out_of_range(
; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[SHIFT:%.*]] = shl nuw nsw i64 [[VSCALE]], 20
; CHECK-NEXT: [[REM:%.*]] = urem i64 1024, [[SHIFT]]
; CHECK-NEXT: ret i64 [[REM]]
; CHECK-NEXT: ret i64 1024
;
%vscale = call i64 @llvm.vscale.i64()
%shift = shl nuw nsw i64 %vscale, 20
Expand Down
129 changes: 63 additions & 66 deletions llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
Expand Up @@ -11,48 +11,47 @@ define void @cond_inv_load_i32i32i16(ptr noalias nocapture %a, ptr noalias nocap
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[INV:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 2, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i16> poison)
; CHECK-NEXT: [[TMP7:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER]] to <vscale x 4 x i32>
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP7]], ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 2, <vscale x 4 x i1> [[TMP4]], <vscale x 4 x i16> poison)
; CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER]] to <vscale x 4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP5]], ptr [[TMP6]], i32 4, <vscale x 4 x i1> [[TMP4]])
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[I_07]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP13]], 0
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP10]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP14:%.*]] = load i16, ptr [[INV]], align 2
; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32
; CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[INV]], align 2
; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_07]]
; CHECK-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
Expand Down Expand Up @@ -91,41 +90,40 @@ define void @cond_inv_load_f64f64f64(ptr noalias nocapture %a, ptr noalias nocap
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[INV:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x double>, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <vscale x 4 x double> [[WIDE_LOAD]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 4.000000e-01, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x double> poison)
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> [[WIDE_MASKED_GATHER]], ptr [[TMP7]], i32 8, <vscale x 4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x double>, ptr [[TMP3]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <vscale x 4 x double> [[WIDE_LOAD]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 4.000000e-01, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 4 x i1> [[TMP4]], <vscale x 4 x double> poison)
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> [[WIDE_MASKED_GATHER]], ptr [[TMP5]], i32 8, <vscale x 4 x i1> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[COND]], i64 [[I_08]]
; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[TMP12]], 4.000000e-01
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[TMP9]], 4.000000e-01
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[INV]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[INV]], align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]]
; CHECK-NEXT: store double [[TMP13]], ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: store double [[TMP10]], ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
Expand Down Expand Up @@ -168,47 +166,46 @@ define void @invariant_load_cond(ptr noalias nocapture %a, ptr nocapture readonl
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 42
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[TMP4]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[DOTSPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[DOTSPLAT]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
; CHECK-NEXT: [[TMP10:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP10]], ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP7]])
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 42
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[TMP3]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP6]], i32 4, <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i32> poison)
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i32> poison)
; CHECK-NEXT: [[TMP7:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP7]], ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]]
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP16]], 0
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP12]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 42
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP17]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: br label [[FOR_INC]]
Expand Down

0 comments on commit 4d2723b

Please sign in to comment.