diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f694ab88ccab8..5b64fe4d4472f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1056,7 +1056,7 @@ static bool areAllOperandsNonInsts(Value *V) { /// require scheduling if this is not an instruction or it is an instruction /// that does not read/write memory and all users are phi nodes or instructions /// from the different blocks. -static bool isUsedOutsideBlock(Value *V) { +static bool isOnlyUsedOutsideBlock(Value *V) { auto *I = dyn_cast(V); if (!I) return true; @@ -1075,7 +1075,7 @@ static bool isUsedOutsideBlock(Value *V) { /// require scheduling if all operands and all users do not need to be scheduled /// in the current basic block. static bool doesNotNeedToBeScheduled(Value *V) { - return areAllOperandsNonInsts(V) && isUsedOutsideBlock(V); + return areAllOperandsNonInsts(V) && isOnlyUsedOutsideBlock(V); } /// Checks if the specified array of instructions does not require scheduling. @@ -1083,8 +1083,7 @@ static bool doesNotNeedToBeScheduled(Value *V) { /// scheduling or their users do not require scheduling since they are phis or /// in other basic blocks. static bool doesNotNeedToSchedule(ArrayRef VL) { - return !VL.empty() && - (all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts)); + return !VL.empty() && (all_of(VL, doesNotNeedToBeScheduled)); } namespace slpvectorizer { @@ -9518,7 +9517,7 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { return !isa(V) && isa(V); })) || all_of(E->Scalars, [](Value *V) { - return !isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V); + return !isVectorLikeInstWithConstOps(V) && isOnlyUsedOutsideBlock(V); })) Res.second = FindLastInst(); else diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll index 8987f34e561e6..b48e7e08134be 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll @@ -35,14 +35,19 @@ define void @PR28330(i32 %n) { ; ; MAX-COST-LABEL: @PR28330( ; MAX-COST-NEXT: entry: -; MAX-COST-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1 -; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer +; MAX-COST-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1 +; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer +; MAX-COST-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 5), align 1 +; MAX-COST-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[TMP2]], zeroinitializer ; MAX-COST-NEXT: br label [[FOR_BODY:%.*]] ; MAX-COST: for.body: -; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> -; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) -; MAX-COST-NEXT: [[OP_RDX]] = add i32 [[TMP3]], [[P17]] +; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX1:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; MAX-COST-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> +; MAX-COST-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> , <4 x i32> +; MAX-COST-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; MAX-COST-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) +; MAX-COST-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP6]], [[TMP7]] +; MAX-COST-NEXT: [[OP_RDX1]] = add i32 [[OP_RDX]], [[P17]] ; MAX-COST-NEXT: br label [[FOR_BODY]] ; entry: @@ -112,14 +117,19 @@ define void @PR32038(i32 %n) { ; ; MAX-COST-LABEL: @PR32038( ; MAX-COST-NEXT: entry: -; MAX-COST-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1 -; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer +; MAX-COST-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1 +; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer +; MAX-COST-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 5), align 1 +; MAX-COST-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[TMP2]], zeroinitializer ; MAX-COST-NEXT: br label [[FOR_BODY:%.*]] ; MAX-COST: for.body: -; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> -; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) -; MAX-COST-NEXT: [[OP_RDX]] = add i32 [[TMP3]], -5 +; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX1:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; MAX-COST-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> +; MAX-COST-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> , <4 x i32> +; MAX-COST-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; MAX-COST-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) +; MAX-COST-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP6]], [[TMP7]] +; MAX-COST-NEXT: [[OP_RDX1]] = add i32 [[OP_RDX]], -5 ; MAX-COST-NEXT: br label [[FOR_BODY]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll index 1198bb1d509eb..d4b9b01b6983b 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll @@ -40,8 +40,8 @@ declare float @llvm.fabs.f32(float) define <4 x float> @insertelement_poison_lanes(ptr %0) { ; CHECK-LABEL: @insertelement_poison_lanes( ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x float> zeroinitializer, float poison, i64 0 -; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float 0.000000e+00, i64 0 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0:%.*]], i64 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float 0.000000e+00, i64 0 ; CHECK-NEXT: store <2 x double> , ptr [[GEP_1]], align 8 ; CHECK-NEXT: ret <4 x float> [[INS_2]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll index 0b26c53ca4503..608066d75698c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll @@ -215,8 +215,8 @@ define float @slp_not_profitable_in_loop(float %x, ptr %A) { ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[RED_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]] ; CHECK-NEXT: [[MUL12:%.*]] = fmul fast float 3.000000e+00, [[L_1]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]] ; CHECK-NEXT: [[MUL16:%.*]] = fmul fast float 3.000000e+00, [[L_3]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[MUL12]], [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll index f3638b5d087f4..a2dd0a8ba1659 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -8,18 +8,18 @@ define void @test() #0 { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_RDX1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP3:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3]] = extractelement <4 x i64> [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP4:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1 -; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP4]], 32 -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> , [[TMP2]] -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP7]], [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP4]] = extractelement <4 x i64> [[TMP3]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 +; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP5]], 32 +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> , [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = ashr exact <4 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP7]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP8]], [[TMP4]] ; CHECK-NEXT: [[OP_RDX1]] = add i64 [[OP_RDX]], 0 ; CHECK-NEXT: br label [[LOOP]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll index 2ea7f191947b4..38ba06f7f5cdd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll @@ -7,17 +7,17 @@ define void @mainTest(i32 %param, ptr %vals, i32 %len) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[PARAM:%.*]], i32 0 ; CHECK-NEXT: br label [[BCI_15:%.*]] ; CHECK: bci_15: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP7:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i32 0 ; CHECK-NEXT: store atomic i32 [[TMP4]], ptr [[VALS:%.*]] unordered, align 4 -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP3]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP5]], [[TMP2]] -; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[OP_RDX]], i32 0 -; CHECK-NEXT: [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[V44]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP5]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP3]], 16 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[OP_RDX]], i32 0 +; CHECK-NEXT: [[TMP8]] = insertelement <2 x i32> [[TMP7]], i32 [[V44]], i32 1 ; CHECK-NEXT: br i1 true, label [[BCI_15]], label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll index 001da64c60a93..0237f699e4042 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll @@ -9,28 +9,28 @@ define double @test() { ; CHECK: cond.true: ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> , double [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> zeroinitializer, [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> zeroinitializer, [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> zeroinitializer, [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fsub <2 x double> [[TMP10]], [[TMP2]] +; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP10]], [[TMP2]] ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x double> [[TMP13]], [[TMP2]] -; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[TMP13]], [[TMP2]] -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP14]], <2 x double> [[TMP15]], <2 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = fsub <2 x double> [[TMP16]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = fmul <2 x double> zeroinitializer, [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[TMP19]], [[TMP17]] -; CHECK-NEXT: [[TMP21:%.*]] = fsub <2 x double> [[TMP20]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = fmul <2 x double> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x double> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x double> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = fmul <2 x double> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> zeroinitializer, [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = fadd <2 x double> [[TMP18]], [[TMP14]] +; CHECK-NEXT: [[TMP20:%.*]] = fsub <2 x double> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = fmul <2 x double> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = fmul <2 x double> [[TMP21]], zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = fadd <2 x double> [[TMP23]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = fadd <2 x double> [[TMP23]], [[TMP20]] ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP24]], i32 0 ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x double> [[TMP24]], i32 1 ; CHECK-NEXT: [[ADD29:%.*]] = fadd double [[TMP25]], [[TMP26]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll index 18d9f8c903c5b..90a761dcd996b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -4,20 +4,12 @@ define void @exceed(double %0, double %1) { ; CHECK-LABEL: @exceed( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <2 x double> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 -; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP7]], undef ; CHECK-NEXT: [[IXX0:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX1:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX2:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX3:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX4:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX5:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP7]], undef ; CHECK-NEXT: [[IXX10:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX11:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX12:%.*]] = fsub double undef, undef @@ -27,15 +19,23 @@ define void @exceed(double %0, double %1) { ; CHECK-NEXT: [[IXX20:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX21:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX22:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <2 x double> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP7]], undef +; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP7]], undef ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 ; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP8]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP5]], <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP9]] -; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP5]], <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <2 x double> [[TMP13]], undef +; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef ; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB2:%.*]] ; CHECK-NEXT: ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll index cac0491d0b643..7d837bfa5dc02 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll @@ -4,23 +4,23 @@ define i1 @test(float %0, double %1) { ; CHECK-LABEL: define i1 @test ; CHECK-SAME: (float [[TMP0:%.*]], double [[TMP1:%.*]]) { -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> , float [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP3]] to <4 x double> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> zeroinitializer, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> , float [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = fpext <4 x float> [[TMP6]] to <4 x double> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> , <4 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x double> [[TMP9]], double [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP7]], i32 3 +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP5]], i32 3 ; CHECK-NEXT: [[TMP13:%.*]] = fmul <4 x double> [[TMP10]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = fmul <4 x double> zeroinitializer, [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = fmul <4 x double> zeroinitializer, [[TMP7]] ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x double> [[TMP13]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP15]], <8 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x double> [[TMP14]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP17]], <8 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x double> [[TMP18]], <8 x double> [[TMP19]], <8 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = fsub <8 x double> [[TMP16]], [[TMP20]] ; CHECK-NEXT: [[TMP22:%.*]] = fmul <8 x double> [[TMP16]], [[TMP20]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll index 6c4572593027d..b446b89536942 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll @@ -5,13 +5,13 @@ define void @inst_size(ptr %a, <2 x i64> %b) { ; CHECK-LABEL: @inst_size( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAL:%.*]] = extractelement <2 x i64> [[B:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[A:%.*]], align 4 ; CHECK-NEXT: [[T41:%.*]] = icmp sgt i64 0, [[VAL]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i64> zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i64> zeroinitializer, [[TMP0]] ; CHECK-NEXT: br label [[BLOCK:%.*]] ; CHECK: block: ; CHECK-NEXT: [[PHI1:%.*]] = phi i1 [ [[T41]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x i1> [ [[TMP2]], [[ENTRY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i1> [ [[TMP1]], [[ENTRY]] ] ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-uses-with-deps-in-first.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-uses-with-deps-in-first.ll index 998efaa5c0351..53f82fcd98379 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/multi-uses-with-deps-in-first.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-uses-with-deps-in-first.ll @@ -11,10 +11,10 @@ define void @test(double %add) { ; CHECK-NEXT: br label [[COND_TRUE45:%.*]] ; CHECK: cond.true45: ; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> zeroinitializer, [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP6]], [[TMP4]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> zeroinitializer, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll index 7a929d6dc4b4c..9b74afb135ea5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll @@ -27,11 +27,11 @@ define i32 @bar(ptr nocapture %A, i32 %d) { ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 (...) @foo() ; CHECK-NEXT: br label [[TMP6]] ; CHECK: 6: -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x float> [[TMP2]], -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[A]], i64 8 -; CHECK-NEXT: [[TMP9:%.*]] = fpext <2 x float> [[TMP7]] to <2 x double> +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP2]], +; CHECK-NEXT: [[TMP9:%.*]] = fpext <2 x float> [[TMP8]] to <2 x double> ; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], -; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[TMP8]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[TMP7]], align 8 ; CHECK-NEXT: ret i32 undef ; %1 = load double, ptr %A, align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll index 89051c7aba42c..7823912a2c10b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll @@ -10,16 +10,16 @@ define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 ; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3 ; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4 ; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[A0:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[A1:%.*]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], ; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 ; CHECK-NEXT: [[Y3:%.*]] = add nsw i32 [[A3:%.*]], 0 ; CHECK-NEXT: [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]] ; CHECK-NEXT: [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]] -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP2]] -; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr [[ARR2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A0:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i32> [[TMP0]], [[TMP3]] +; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr [[ARR2]], align 4 ; CHECK-NEXT: store i32 [[RES2]], ptr [[GEP2_2]], align 4 ; CHECK-NEXT: store i32 [[RES3]], ptr [[GEP2_3]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll b/llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll index 681d131c50727..cf7c603ee4e28 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll @@ -11,19 +11,19 @@ define void @foo() { ; CHECK: bci_252: ; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, [[BCI_0:%.*]] ], [ [[TMP16:%.*]], [[BCI_252_1:%.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i32> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i32> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i32> [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i32> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i32> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i32> [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer ; CHECK-NEXT: br i1 false, label [[NOT_ZERO70:%.*]], label [[BCI_252_1]] ; CHECK: bci_252.1: ; CHECK-NEXT: [[TMP10:%.*]] = or <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = or <2 x i32> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i32> [[TMP10]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <2 x i32> [[TMP2]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = or <2 x i32> [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP11:%.*]] = mul <2 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = or <2 x i32> [[TMP2]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = or <2 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <2 x i32> [[TMP14]], [[TMP13]] ; CHECK-NEXT: [[TMP16]] = or <2 x i32> [[TMP15]], zeroinitializer ; CHECK-NEXT: br label [[BCI_252]] ; CHECK: not_zero70: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll index 8e17088b27569..8c01f8d2fb331 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll @@ -693,22 +693,22 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; AVX512F-LABEL: @gather_load_div( ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP7:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP8]] ; AVX512F-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_div( ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP8]] ; AVX512VL-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll index b93174da6a569..2449335468cd5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll @@ -693,22 +693,22 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; AVX512F-LABEL: @gather_load_div( ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP7:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP8]] ; AVX512F-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_div( ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP8]] ; AVX512VL-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll index 83457cc4966f7..9c91686304ace 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll @@ -10,19 +10,19 @@ declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32 immarg define void @test(ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr nocapture %arg2) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ARG:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, <8 x ptr> [[SHUFFLE]], <8 x i64> ; CHECK-NEXT: [[GEP2_0:%.*]] = getelementptr inbounds double, ptr [[ARG1:%.*]], i64 16 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> [[TMP1]], i32 8, <8 x i1> , <8 x double> poison) +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ARG:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, <8 x ptr> [[TMP1]], <8 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> , <8 x double> poison) ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x double>, ptr [[GEP2_0]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <8 x double> [[TMP4]], [[TMP2]] -; CHECK-NEXT: [[TMP7:%.*]] = load <8 x double>, ptr [[ARG1]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <8 x double> [[TMP7]], [[TMP2]] -; CHECK-NEXT: [[TMP9:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[TMP8]]) -; CHECK-NEXT: [[TMP10:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[TMP5]]) -; CHECK-NEXT: [[I142:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i64 0 -; CHECK-NEXT: [[I143:%.*]] = insertelement <2 x double> [[I142]], double [[TMP10]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <8 x double> [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = load <8 x double>, ptr [[ARG1]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <8 x double> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[TMP5]]) +; CHECK-NEXT: [[I142:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0 +; CHECK-NEXT: [[I143:%.*]] = insertelement <2 x double> [[I142]], double [[TMP9]], i64 1 ; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds double, ptr [[ARG2:%.*]], <2 x i64> ; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[I143]], <2 x ptr> [[P]], i32 8, <2 x i1> ) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll index f65f61975a61f..49b723aa3b753 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll @@ -3,17 +3,17 @@ define void @test(ptr noalias %0, ptr %p) { ; CHECK-LABEL: @test( -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x ptr> poison, ptr [[P:%.*]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x ptr> [[TMP2]], <8 x ptr> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[TMP3]], <8 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP0:%.*]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x float> poison) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP0:%.*]], i64 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[P:%.*]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison) ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP8]], <16 x float> , <16 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = fadd reassoc nsz arcp contract afn <16 x float> [[TMP7]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: store <16 x float> [[TMP11]], ptr [[TMP5]], align 4 +; CHECK-NEXT: store <16 x float> [[TMP11]], ptr [[TMP2]], align 4 ; CHECK-NEXT: ret void ; %2 = getelementptr inbounds float, ptr %p, i64 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll index 75431c13a7703..7fe2b3bfdee3e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll @@ -6,17 +6,17 @@ target triple = "x86_64-unknown-linux-gnu" define void @"foo"(ptr addrspace(1) %0, ptr addrspace(1) %1) #0 { ; CHECK-LABEL: @foo( -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr addrspace(1)> poison, ptr addrspace(1) [[TMP0:%.*]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr addrspace(1)> [[TMP3]], <4 x ptr addrspace(1)> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <4 x ptr addrspace(1)> [[TMP4]], <4 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1:%.*]], i64 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p1(<4 x ptr addrspace(1)> [[TMP5]], i32 4, <4 x i1> , <4 x float> poison) +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1:%.*]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr addrspace(1)> poison, ptr addrspace(1) [[TMP0:%.*]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x ptr addrspace(1)> [[TMP4]], <4 x ptr addrspace(1)> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, <4 x ptr addrspace(1)> [[TMP5]], <4 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p1(<4 x ptr addrspace(1)> [[TMP6]], i32 4, <4 x i1> , <4 x float> poison) ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, ptr addrspace(1) [[TMP6]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, ptr addrspace(1) [[TMP3]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = fmul <8 x float> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <8 x float> [[TMP10]], zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x float> [[TMP11]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x ptr addrspace(1)> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x ptr addrspace(1)> [[TMP6]], i32 0 ; CHECK-NEXT: store <8 x float> [[TMP12]], ptr addrspace(1) [[TMP13]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll index 6bbc33bac80f9..085bb3e83ca73 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll @@ -10,14 +10,14 @@ define i32 @foo(i32 %0, ptr %1, ptr %2) { ; CHECK-NEXT: [[T8:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[T4]], i64 0, i32 1 ; CHECK-NEXT: [[T9:%.*]] = getelementptr inbounds [3 x float], ptr [[T8]], i64 1, i64 0 ; CHECK-NEXT: [[T14:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[T4]], i64 0, i32 1, i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = load <2 x float>, ptr [[T14]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[T14]], align 4 ; CHECK-NEXT: br label [[T37:%.*]] ; CHECK: t37: -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x float> [ [[TMP5]], [[TMP3:%.*]] ], [ [[T89:%.*]], [[T37]] ] -; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast <2 x float> , [[TMP6]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x float> [ [[TMP4]], [[TMP3:%.*]] ], [ [[T89:%.*]], [[T37]] ] ; CHECK-NEXT: [[T21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[T4]], i64 0, i32 2, i64 0 -; CHECK-NEXT: store <4 x float> [[SHUFFLE]], ptr [[T21]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <2 x float> , [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[T21]], align 4 ; CHECK-NEXT: [[T89]] = load <2 x float>, ptr [[T9]], align 4 ; CHECK-NEXT: br i1 undef, label [[T37]], label [[T55:%.*]] ; CHECK: t55: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-extracts.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-extracts.ll index 1820407daf078..6c2ba5934211f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-extracts.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-extracts.ll @@ -4,11 +4,11 @@ define void @test() { ; CHECK-LABEL: define void @test() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> poison, i1 false, i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([17 x i64], ptr null, i64 0, i64 9), align 8 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i1> [[TMP0]], i1 false, i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([17 x i64], ptr null, i64 0, i64 9), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i1> poison, i1 false, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i1> [[TMP1]], i1 false, i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP0]], [[TMP3]] ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr getelementptr inbounds ([17 x i64], ptr null, i64 0, i64 9), align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll index c79e9b94278cd..90b99faed881c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll @@ -8,13 +8,13 @@ define void @test() { ; CHECK-NEXT: [[ARRAYIDX21_I:%.*]] = getelementptr inbounds [4 x float], ptr undef, i64 2 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr undef, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> zeroinitializer, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX10_I_I86]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr undef, align 4 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> , <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX10_I_I86]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr undef, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr undef, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x float> zeroinitializer, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> , <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> , <2 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP6]], <2 x float> [[TMP7]]) ; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]] @@ -24,7 +24,7 @@ define void @test() { ; CHECK: bb3: ; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x float> [ [[TMP9]], [[BB2]] ], [ zeroinitializer, [[BB1]] ] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x float> [[TMP1]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x float> [[TMP3]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x float> [[TMP12]], zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = fsub <2 x float> [[TMP14]], zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scheduling-cond.ll b/llvm/test/Transforms/SLPVectorizer/X86/scheduling-cond.ll new file mode 100644 index 0000000000000..ac8f4521444d0 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/scheduling-cond.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-sie-ps5 < %s | FileCheck %s + +define void @_Z3fooP3bar() { +; CHECK-LABEL: define void @_Z3fooP3bar() { +; CHECK-NEXT: .thread: +; CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <2 x double> zeroinitializer, zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> zeroinitializer, <2 x i1> [[TMP0]], <4 x i32> +; CHECK-NEXT: br label [[TMP2:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i1 false, i1 false +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 false, i1 [[TMP5]], i1 false +; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP3]], i1 [[OP_RDX]], i1 false +; CHECK-NEXT: br i1 [[OP_RDX1]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; CHECK: 6: +; CHECK-NEXT: ret void +; CHECK: 7: +; CHECK-NEXT: ret void +; +.thread: + %0 = extractelement <2 x i1> zeroinitializer, i64 0 + %1 = extractelement <2 x i1> zeroinitializer, i64 0 + %2 = fcmp ole <2 x double> zeroinitializer, zeroinitializer + %3 = extractelement <2 x i1> %2, i64 0 + %4 = extractelement <2 x i1> zeroinitializer, i64 0 + br label %5 + +5: ; preds = %.thread + %6 = select i1 false, i1 false, i1 false + %7 = select i1 %6, i1 %0, i1 false + %8 = select i1 %7, i1 %1, i1 false + %9 = select i1 %8, i1 false, i1 false + %10 = select i1 %9, i1 %3, i1 false + %11 = select i1 %10, i1 %4, i1 false + br i1 %11, label %12, label %13 + +12: ; preds = %5 + ret void + +13: ; preds = %5 + ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll index 8d1d257820f0c..a9f3c16f831d0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll @@ -15,9 +15,9 @@ define void @foo(ptr %this, ptr %p, i32 %add7) { ; CHECK-NEXT: i32 2, label [[SW_BB]] ; CHECK-NEXT: ] ; CHECK: sw.bb: -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[THIS:%.*]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[THIS:%.*]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]] ; CHECK-NEXT: br label [[SW_EPILOG]] ; CHECK: sw.epilog: ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ undef, [[ENTRY:%.*]] ], [ [[TMP5]], [[SW_BB]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll index ba83ff096c9ac..208b12192c2f4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll @@ -5,19 +5,19 @@ define void @test(ptr noalias %p, ptr noalias %addr, ptr noalias %s) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ADDR:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE1]], <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE1]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> , <8 x i32> poison) ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> poison, ptr [[P:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x ptr> [[TMP4]], <8 x ptr> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE2]], <8 x i32> [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison) -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP1]], i32 8, <8 x i1> , <8 x i32> poison) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE2]], <8 x i32> [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP8]], i32 4, <8 x i1> , <8 x i32> poison) -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <8 x i32> [[TMP9]], [[TMP6]] -; CHECK-NEXT: store <8 x i32> [[TMP10]], ptr [[S:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x ptr> [[TMP4]], <8 x ptr> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, <8 x ptr> [[TMP5]], <8 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x i32> poison) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, <8 x ptr> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP8]], i32 8, <8 x i1> , <8 x i32> poison) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, <8 x ptr> [[TMP5]], <8 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP10]], i32 4, <8 x i1> , <8 x i32> poison) +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <8 x i32> [[TMP11]], [[TMP7]] +; CHECK-NEXT: store <8 x i32> [[TMP12]], ptr [[S:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll index aa3c2be7dc9c2..477da585dbe37 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -16,10 +16,10 @@ define void @foo() { ; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8 ; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]] ; CHECK: bb4: -; CHECK-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP2]] to <4 x double> ; CHECK-NEXT: [[CONV2:%.*]] = uitofp i16 undef to double ; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[TMP3]], [[CONV2]] ; CHECK-NEXT: [[SUB1:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP2]] to <4 x double> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> , double [[SUB1]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[ADD1]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <4 x double> [[TMP6]], [[TMP4]]