diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 031b195189d8a..dc81309549796 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7073,10 +7073,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { /// extracted values from \p VL. InstructionCost computeExtractCost(ArrayRef VL, ArrayRef Mask, TTI::ShuffleKind ShuffleKind) { - auto *VecTy = cast( - cast(*find_if(VL, [](Value *V) { - return isa(V); - }))->getVectorOperandType()); + auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size()); unsigned NumOfParts = TTI.getNumberOfParts(VecTy); if (ShuffleKind != TargetTransformInfo::SK_PermuteSingleSrc || diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll index 290560151b79a..9fd20da8e4bbf 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll @@ -291,19 +291,14 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) { ; ; GFX8-LABEL: @uadd_sat_v4i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 2 -; GFX8-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3 -; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 2 -; GFX8-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3 -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) -; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) -; GFX8-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]]) -; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> -; GFX8-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[ADD_2]], i64 2 -; GFX8-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3 -; GFX8-NEXT: ret <4 x i16> [[INS_3]] +; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]]) +; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> +; GFX8-NEXT: ret <4 x i16> [[INS_31]] ; bb: %arg0.0 = extractelement <4 x i16> %arg0, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll index e62749c4c71f1..34bc2b338df5c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll @@ -291,19 +291,14 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) { ; ; GFX8-LABEL: @uadd_sat_v4i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 2 -; GFX8-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3 -; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 2 -; GFX8-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3 -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) -; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) -; GFX8-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]]) -; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> -; GFX8-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[ADD_2]], i64 2 -; GFX8-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3 -; GFX8-NEXT: ret <4 x i16> [[INS_3]] +; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]]) +; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> +; GFX8-NEXT: ret <4 x i16> [[INS_31]] ; bb: %arg0.0 = extractelement <4 x i16> %arg0, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll index 0a020c855cc22..e474bab2ad965 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll @@ -4,15 +4,10 @@ define <2 x i16> @uadd_sat_v9i16_combine_vi16(<9 x i16> %arg0, <9 x i16> %arg1) { ; CHECK-LABEL: @uadd_sat_v9i16_combine_vi16( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[ARG0_1:%.*]] = extractelement <9 x i16> undef, i64 7 -; CHECK-NEXT: [[ARG0_2:%.*]] = extractelement <9 x i16> [[ARG0:%.*]], i64 8 -; CHECK-NEXT: [[ARG1_1:%.*]] = extractelement <9 x i16> [[ARG1:%.*]], i64 7 -; CHECK-NEXT: [[ARG1_2:%.*]] = extractelement <9 x i16> [[ARG1]], i64 8 -; CHECK-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) -; CHECK-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) -; CHECK-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> undef, i16 [[ADD_1]], i64 0 -; CHECK-NEXT: [[INS_2:%.*]] = insertelement <2 x i16> [[INS_1]], i16 [[ADD_2]], i64 1 -; CHECK-NEXT: ret <2 x i16> [[INS_2]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x i16> [[ARG0:%.*]], <9 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <9 x i16> [[ARG1:%.*]], <9 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +; CHECK-NEXT: ret <2 x i16> [[TMP2]] ; bb: %arg0.1 = extractelement <9 x i16> undef, i64 7 diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll index 46980b33e4018..3b63c1e35610f 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll @@ -4,23 +4,20 @@ define <4 x half> @phis(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-LABEL: @phis( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 2 -; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> ; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 2 -; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb1: -; CHECK-NEXT: [[C2:%.*]] = phi half [ [[A2]], [[ENTRY:%.*]] ], [ [[B2]], [[BB0]] ] -; CHECK-NEXT: [[C3:%.*]] = phi half [ [[A3]], [[ENTRY]] ], [ [[B3]], [[BB0]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[BB0]] ] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x half> [[TMP2]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[O2:%.*]] = insertelement <4 x half> [[TMP3]], half [[C2]], i64 2 -; CHECK-NEXT: [[O3:%.*]] = insertelement <4 x half> [[O2]], half [[C3]], i64 3 -; CHECK-NEXT: ret <4 x half> [[O3]] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> [[TMP5]], <4 x i32> +; CHECK-NEXT: ret <4 x half> [[TMP8]] ; entry: %a0 = extractelement <4 x half> %in1, i64 0 @@ -52,23 +49,20 @@ bb1: define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-LABEL: @phis_reverse( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 2 -; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> ; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 2 -; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb1: -; CHECK-NEXT: [[C3:%.*]] = phi half [ [[A3]], [[ENTRY:%.*]] ], [ [[B3]], [[BB0]] ] -; CHECK-NEXT: [[C2:%.*]] = phi half [ [[A2]], [[ENTRY]] ], [ [[B2]], [[BB0]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[BB0]] ] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x half> [[TMP2]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[O2:%.*]] = insertelement <4 x half> [[TMP3]], half [[C2]], i64 2 -; CHECK-NEXT: [[O3:%.*]] = insertelement <4 x half> [[O2]], half [[C3]], i64 3 -; CHECK-NEXT: ret <4 x half> [[O3]] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> +; CHECK-NEXT: ret <4 x half> [[TMP8]] ; entry: %a0 = extractelement <4 x half> %in1, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll index 059e4c38b519b..9608608a18098 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll @@ -155,13 +155,11 @@ define <4 x float> @exp_4x(ptr %a) { ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]]) -; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) -; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINS_31]] ; ; DEFAULT-LABEL: define <4 x float> @exp_4x ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { @@ -173,13 +171,11 @@ define <4 x float> @exp_4x(ptr %a) { ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) +; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -212,13 +208,11 @@ define <4 x float> @int_exp_4x(ptr %a) { ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]]) ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]]) -; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]]) -; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINS_31]] ; ; DEFAULT-LABEL: define <4 x float> @int_exp_4x ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { @@ -230,13 +224,11 @@ define <4 x float> @int_exp_4x(ptr %a) { ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) +; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -269,13 +261,11 @@ define <4 x float> @log_4x(ptr %a) { ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]]) -; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) -; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINS_31]] ; ; DEFAULT-LABEL: define <4 x float> @log_4x ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { @@ -287,13 +277,11 @@ define <4 x float> @log_4x(ptr %a) { ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) +; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -326,13 +314,11 @@ define <4 x float> @int_log_4x(ptr %a) { ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]]) ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]]) -; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]]) -; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINS_31]] ; ; DEFAULT-LABEL: define <4 x float> @int_log_4x ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { @@ -344,13 +330,11 @@ define <4 x float> @int_log_4x(ptr %a) { ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) +; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -383,13 +367,11 @@ define <4 x float> @sin_4x(ptr %a) { ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]]) -; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) -; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINS_31]] ; ; DEFAULT-LABEL: define <4 x float> @sin_4x ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { @@ -401,13 +383,11 @@ define <4 x float> @sin_4x(ptr %a) { ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) +; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -440,13 +420,11 @@ define <4 x float> @int_sin_4x(ptr %a) { ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]]) -; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) -; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINS_31]] ; ; DEFAULT-LABEL: define <4 x float> @int_sin_4x ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { @@ -458,13 +436,11 @@ define <4 x float> @int_sin_4x(ptr %a) { ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) +; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] ; entry: %0 = load <4 x float>, ptr %a, align 16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll index 6c21cc1cfc5be..da6f4e70c7c6f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll @@ -2,9 +2,9 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX define <8 x float> @ceil_floor(<8 x float> %a) { ; SSE-LABEL: @ceil_floor( @@ -51,47 +51,24 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; ; AVX-LABEL: @ceil_floor( ; AVX-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 -; AVX-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i64 1 -; AVX-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i64 2 ; AVX-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; AVX-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; AVX-NEXT: [[AB1:%.*]] = call float @llvm.floor.f32(float [[A1]]) -; AVX-NEXT: [[AB2:%.*]] = call float @llvm.floor.f32(float [[A2]]) +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; AVX-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP1]]) -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP3]]) +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0 -; AVX-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i64 1 -; AVX-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i64 2 -; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i64 3 -; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> -; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP5]], <8 x i32> -; AVX-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> -; AVX-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP6]], <8 x i32> +; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> +; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3 +; AVX-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> +; AVX-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> ; AVX-NEXT: ret <8 x float> [[R71]] -; -; AVX2-LABEL: @ceil_floor( -; AVX2-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 -; AVX2-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 -; AVX2-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) -; AVX2-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX2-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX2-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) -; AVX2-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0 -; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> -; AVX2-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> -; AVX2-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3 -; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> -; AVX2-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> -; AVX2-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> -; AVX2-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> -; AVX2-NEXT: ret <8 x float> [[R71]] ; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll index 435c677c3afbc..3b84e70d847ff 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll @@ -2,9 +2,9 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX define <8 x float> @ceil_floor(<8 x float> %a) { ; SSE-LABEL: @ceil_floor( @@ -51,47 +51,24 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; ; AVX-LABEL: @ceil_floor( ; AVX-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 -; AVX-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i64 1 -; AVX-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i64 2 ; AVX-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; AVX-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; AVX-NEXT: [[AB1:%.*]] = call float @llvm.floor.f32(float [[A1]]) -; AVX-NEXT: [[AB2:%.*]] = call float @llvm.floor.f32(float [[A2]]) +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; AVX-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP1]]) -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP3]]) +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i64 0 -; AVX-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i64 1 -; AVX-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i64 2 -; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i64 3 -; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> -; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP5]], <8 x i32> -; AVX-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> -; AVX-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP6]], <8 x i32> +; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> +; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3 +; AVX-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> +; AVX-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> ; AVX-NEXT: ret <8 x float> [[R71]] -; -; AVX2-LABEL: @ceil_floor( -; AVX2-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 -; AVX2-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 -; AVX2-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) -; AVX2-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX2-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX2-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) -; AVX2-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i64 0 -; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> -; AVX2-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> -; AVX2-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3 -; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> -; AVX2-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> -; AVX2-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> -; AVX2-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> -; AVX2-NEXT: ret <8 x float> [[R71]] ; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll index cf9b974a724cf..5fcd339f333e8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll @@ -167,42 +167,24 @@ define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { ; SSE-LABEL: @test_v4f64( -; SSE-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 -; SSE-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 -; SSE-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 -; SSE-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 -; SSE-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 -; SSE-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 -; SSE-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 -; SSE-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 -; SSE-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] -; SSE-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]] -; SSE-NEXT: [[R2:%.*]] = fadd double [[A2]], [[A3]] -; SSE-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] -; SSE-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0 -; SSE-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 -; SSE-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 -; SSE-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 -; SSE-NEXT: ret <4 x double> [[R03]] +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> +; SSE-NEXT: ret <4 x double> [[R031]] ; ; SLM-LABEL: @test_v4f64( -; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 -; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 -; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 -; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 -; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 -; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 -; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 -; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 -; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] -; SLM-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]] -; SLM-NEXT: [[R2:%.*]] = fadd double [[A2]], [[A3]] -; SLM-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] -; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0 -; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 -; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 -; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 -; SLM-NEXT: ret <4 x double> [[R03]] +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> +; SLM-NEXT: ret <4 x double> [[R031]] ; ; AVX-LABEL: @test_v4f64( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> @@ -231,29 +213,16 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { ; PR50392 define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b) { -; SSE-LABEL: @test_v4f64_partial_swizzle( -; SSE-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 -; SSE-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 -; SSE-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> -; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] -; SSE-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0 -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; SSE-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> -; SSE-NEXT: ret <4 x double> [[R031]] -; -; SLM-LABEL: @test_v4f64_partial_swizzle( -; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 -; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 -; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0 -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; SLM-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> -; SLM-NEXT: ret <4 x double> [[R031]] +; CHECK-LABEL: @test_v4f64_partial_swizzle( +; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3 +; CHECK-NEXT: ret <4 x double> [[R03]] ; %a0 = extractelement <4 x double> %a, i64 0 %a1 = extractelement <4 x double> %a, i64 1 @@ -278,23 +247,13 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) { ; SSE-NEXT: ret <8 x float> [[TMP3]] ; ; SLM-LABEL: @test_v8f32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[TMP8]] -; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP12:%.*]] = fadd <2 x float> [[TMP10]], [[TMP11]] -; SLM-NEXT: [[R033:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP6]], <8 x i32> -; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <8 x i32> -; SLM-NEXT: [[R052:%.*]] = shufflevector <8 x float> [[R033]], <8 x float> [[TMP13]], <8 x i32> -; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <8 x i32> -; SLM-NEXT: [[R071:%.*]] = shufflevector <8 x float> [[R052]], <8 x float> [[TMP14]], <8 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[R071]] ; ; AVX-LABEL: @test_v8f32( @@ -408,48 +367,14 @@ define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) { define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { ; SSE-LABEL: @test_v16i16( -; SSE-NEXT: [[B0:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 0 -; SSE-NEXT: [[B1:%.*]] = extractelement <16 x i16> [[B]], i64 1 -; SSE-NEXT: [[B2:%.*]] = extractelement <16 x i16> [[B]], i64 2 -; SSE-NEXT: [[B3:%.*]] = extractelement <16 x i16> [[B]], i64 3 -; SSE-NEXT: [[B4:%.*]] = extractelement <16 x i16> [[B]], i64 4 -; SSE-NEXT: [[B5:%.*]] = extractelement <16 x i16> [[B]], i64 5 -; SSE-NEXT: [[B6:%.*]] = extractelement <16 x i16> [[B]], i64 6 -; SSE-NEXT: [[B7:%.*]] = extractelement <16 x i16> [[B]], i64 7 -; SSE-NEXT: [[B8:%.*]] = extractelement <16 x i16> [[B]], i64 8 -; SSE-NEXT: [[B9:%.*]] = extractelement <16 x i16> [[B]], i64 9 -; SSE-NEXT: [[B10:%.*]] = extractelement <16 x i16> [[B]], i64 10 -; SSE-NEXT: [[B11:%.*]] = extractelement <16 x i16> [[B]], i64 11 -; SSE-NEXT: [[B12:%.*]] = extractelement <16 x i16> [[B]], i64 12 -; SSE-NEXT: [[B13:%.*]] = extractelement <16 x i16> [[B]], i64 13 -; SSE-NEXT: [[B14:%.*]] = extractelement <16 x i16> [[B]], i64 14 -; SSE-NEXT: [[B15:%.*]] = extractelement <16 x i16> [[B]], i64 15 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP3:%.*]] = add <4 x i16> [[TMP1]], [[TMP2]] -; SSE-NEXT: [[R4:%.*]] = add i16 [[B0]], [[B1]] -; SSE-NEXT: [[R5:%.*]] = add i16 [[B2]], [[B3]] -; SSE-NEXT: [[R6:%.*]] = add i16 [[B4]], [[B5]] -; SSE-NEXT: [[R7:%.*]] = add i16 [[B6]], [[B7]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[R12:%.*]] = add i16 [[B8]], [[B9]] -; SSE-NEXT: [[R13:%.*]] = add i16 [[B10]], [[B11]] -; SSE-NEXT: [[R14:%.*]] = add i16 [[B12]], [[B13]] -; SSE-NEXT: [[R15:%.*]] = add i16 [[B14]], [[B15]] -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <16 x i32> -; SSE-NEXT: [[RV4:%.*]] = insertelement <16 x i16> [[TMP7]], i16 [[R4]], i64 4 -; SSE-NEXT: [[RV5:%.*]] = insertelement <16 x i16> [[RV4]], i16 [[R5]], i64 5 -; SSE-NEXT: [[RV6:%.*]] = insertelement <16 x i16> [[RV5]], i16 [[R6]], i64 6 -; SSE-NEXT: [[RV7:%.*]] = insertelement <16 x i16> [[RV6]], i16 [[R7]], i64 7 -; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <16 x i32> -; SSE-NEXT: [[RV111:%.*]] = shufflevector <16 x i16> [[RV7]], <16 x i16> [[TMP8]], <16 x i32> -; SSE-NEXT: [[RV12:%.*]] = insertelement <16 x i16> [[RV111]], i16 [[R12]], i64 12 -; SSE-NEXT: [[RV13:%.*]] = insertelement <16 x i16> [[RV12]], i16 [[R13]], i64 13 -; SSE-NEXT: [[RV14:%.*]] = insertelement <16 x i16> [[RV13]], i16 [[R14]], i64 14 -; SSE-NEXT: [[RV15:%.*]] = insertelement <16 x i16> [[RV14]], i16 [[R15]], i64 15 -; SSE-NEXT: ret <16 x i16> [[RV15]] +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> +; SSE-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> +; SSE-NEXT: [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> +; SSE-NEXT: ret <16 x i16> [[RV151]] ; ; SLM-LABEL: @test_v16i16( ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll index ee60ffe39cc16..c3e03973cdac3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll @@ -167,42 +167,24 @@ define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { ; SSE-LABEL: @test_v4f64( -; SSE-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 -; SSE-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 -; SSE-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 -; SSE-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 -; SSE-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 -; SSE-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 -; SSE-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 -; SSE-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 -; SSE-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] -; SSE-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]] -; SSE-NEXT: [[R2:%.*]] = fadd double [[A2]], [[A3]] -; SSE-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] -; SSE-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i64 0 -; SSE-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 -; SSE-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 -; SSE-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 -; SSE-NEXT: ret <4 x double> [[R03]] +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> +; SSE-NEXT: ret <4 x double> [[R031]] ; ; SLM-LABEL: @test_v4f64( -; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 -; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 -; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 -; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 -; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 -; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 -; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 -; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 -; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] -; SLM-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]] -; SLM-NEXT: [[R2:%.*]] = fadd double [[A2]], [[A3]] -; SLM-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] -; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i64 0 -; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 -; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 -; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 -; SLM-NEXT: ret <4 x double> [[R03]] +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> +; SLM-NEXT: ret <4 x double> [[R031]] ; ; AVX-LABEL: @test_v4f64( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> @@ -231,29 +213,16 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { ; PR50392 define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b) { -; SSE-LABEL: @test_v4f64_partial_swizzle( -; SSE-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 -; SSE-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 -; SSE-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> -; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] -; SSE-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i64 0 -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; SSE-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> -; SSE-NEXT: ret <4 x double> [[R031]] -; -; SLM-LABEL: @test_v4f64_partial_swizzle( -; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 -; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 -; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i64 0 -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; SLM-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> -; SLM-NEXT: ret <4 x double> [[R031]] +; CHECK-LABEL: @test_v4f64_partial_swizzle( +; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> undef, <4 x i32> +; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3 +; CHECK-NEXT: ret <4 x double> [[R03]] ; %a0 = extractelement <4 x double> %a, i64 0 %a1 = extractelement <4 x double> %a, i64 1 @@ -278,23 +247,13 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) { ; SSE-NEXT: ret <8 x float> [[TMP3]] ; ; SLM-LABEL: @test_v8f32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[TMP8]] -; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP12:%.*]] = fadd <2 x float> [[TMP10]], [[TMP11]] -; SLM-NEXT: [[R033:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP6]], <8 x i32> -; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <8 x i32> -; SLM-NEXT: [[R052:%.*]] = shufflevector <8 x float> [[R033]], <8 x float> [[TMP13]], <8 x i32> -; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <8 x i32> -; SLM-NEXT: [[R071:%.*]] = shufflevector <8 x float> [[R052]], <8 x float> [[TMP14]], <8 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[R071]] ; ; AVX-LABEL: @test_v8f32( @@ -408,48 +367,14 @@ define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) { define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { ; SSE-LABEL: @test_v16i16( -; SSE-NEXT: [[B0:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 0 -; SSE-NEXT: [[B1:%.*]] = extractelement <16 x i16> [[B]], i64 1 -; SSE-NEXT: [[B2:%.*]] = extractelement <16 x i16> [[B]], i64 2 -; SSE-NEXT: [[B3:%.*]] = extractelement <16 x i16> [[B]], i64 3 -; SSE-NEXT: [[B4:%.*]] = extractelement <16 x i16> [[B]], i64 4 -; SSE-NEXT: [[B5:%.*]] = extractelement <16 x i16> [[B]], i64 5 -; SSE-NEXT: [[B6:%.*]] = extractelement <16 x i16> [[B]], i64 6 -; SSE-NEXT: [[B7:%.*]] = extractelement <16 x i16> [[B]], i64 7 -; SSE-NEXT: [[B8:%.*]] = extractelement <16 x i16> [[B]], i64 8 -; SSE-NEXT: [[B9:%.*]] = extractelement <16 x i16> [[B]], i64 9 -; SSE-NEXT: [[B10:%.*]] = extractelement <16 x i16> [[B]], i64 10 -; SSE-NEXT: [[B11:%.*]] = extractelement <16 x i16> [[B]], i64 11 -; SSE-NEXT: [[B12:%.*]] = extractelement <16 x i16> [[B]], i64 12 -; SSE-NEXT: [[B13:%.*]] = extractelement <16 x i16> [[B]], i64 13 -; SSE-NEXT: [[B14:%.*]] = extractelement <16 x i16> [[B]], i64 14 -; SSE-NEXT: [[B15:%.*]] = extractelement <16 x i16> [[B]], i64 15 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP3:%.*]] = add <4 x i16> [[TMP1]], [[TMP2]] -; SSE-NEXT: [[R4:%.*]] = add i16 [[B0]], [[B1]] -; SSE-NEXT: [[R5:%.*]] = add i16 [[B2]], [[B3]] -; SSE-NEXT: [[R6:%.*]] = add i16 [[B4]], [[B5]] -; SSE-NEXT: [[R7:%.*]] = add i16 [[B6]], [[B7]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[R12:%.*]] = add i16 [[B8]], [[B9]] -; SSE-NEXT: [[R13:%.*]] = add i16 [[B10]], [[B11]] -; SSE-NEXT: [[R14:%.*]] = add i16 [[B12]], [[B13]] -; SSE-NEXT: [[R15:%.*]] = add i16 [[B14]], [[B15]] -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> undef, <16 x i32> -; SSE-NEXT: [[RV4:%.*]] = insertelement <16 x i16> [[TMP7]], i16 [[R4]], i64 4 -; SSE-NEXT: [[RV5:%.*]] = insertelement <16 x i16> [[RV4]], i16 [[R5]], i64 5 -; SSE-NEXT: [[RV6:%.*]] = insertelement <16 x i16> [[RV5]], i16 [[R6]], i64 6 -; SSE-NEXT: [[RV7:%.*]] = insertelement <16 x i16> [[RV6]], i16 [[R7]], i64 7 -; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <16 x i32> -; SSE-NEXT: [[RV111:%.*]] = shufflevector <16 x i16> [[RV7]], <16 x i16> [[TMP8]], <16 x i32> -; SSE-NEXT: [[RV12:%.*]] = insertelement <16 x i16> [[RV111]], i16 [[R12]], i64 12 -; SSE-NEXT: [[RV13:%.*]] = insertelement <16 x i16> [[RV12]], i16 [[R13]], i64 13 -; SSE-NEXT: [[RV14:%.*]] = insertelement <16 x i16> [[RV13]], i16 [[R14]], i64 14 -; SSE-NEXT: [[RV15:%.*]] = insertelement <16 x i16> [[RV14]], i16 [[R15]], i64 15 -; SSE-NEXT: ret <16 x i16> [[RV15]] +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> +; SSE-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> +; SSE-NEXT: [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> +; SSE-NEXT: ret <16 x i16> [[RV151]] ; ; SLM-LABEL: @test_v16i16( ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll index fa305da9730fe..4822d52086e27 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll @@ -146,42 +146,24 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { ; SSE-LABEL: @test_v4f64( -; SSE-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 -; SSE-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 -; SSE-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 -; SSE-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 -; SSE-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 -; SSE-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 -; SSE-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 -; SSE-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 -; SSE-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]] -; SSE-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]] -; SSE-NEXT: [[R2:%.*]] = fsub double [[A2]], [[A3]] -; SSE-NEXT: [[R3:%.*]] = fsub double [[B2]], [[B3]] -; SSE-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0 -; SSE-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 -; SSE-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 -; SSE-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 -; SSE-NEXT: ret <4 x double> [[R03]] +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> +; SSE-NEXT: ret <4 x double> [[R031]] ; ; SLM-LABEL: @test_v4f64( -; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 -; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 -; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 -; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 -; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 -; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 -; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 -; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 -; SLM-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]] -; SLM-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]] -; SLM-NEXT: [[R2:%.*]] = fsub double [[A2]], [[A3]] -; SLM-NEXT: [[R3:%.*]] = fsub double [[B2]], [[B3]] -; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0 -; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 -; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 -; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 -; SLM-NEXT: ret <4 x double> [[R03]] +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> +; SLM-NEXT: ret <4 x double> [[R031]] ; ; AVX-LABEL: @test_v4f64( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> @@ -216,23 +198,13 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) { ; SSE-NEXT: ret <8 x float> [[TMP3]] ; ; SLM-LABEL: @test_v8f32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fsub <2 x float> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP6:%.*]] = fsub <2 x float> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP9:%.*]] = fsub <2 x float> [[TMP7]], [[TMP8]] -; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP12:%.*]] = fsub <2 x float> [[TMP10]], [[TMP11]] -; SLM-NEXT: [[R033:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP6]], <8 x i32> -; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <8 x i32> -; SLM-NEXT: [[R052:%.*]] = shufflevector <8 x float> [[R033]], <8 x float> [[TMP13]], <8 x i32> -; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <8 x i32> -; SLM-NEXT: [[R071:%.*]] = shufflevector <8 x float> [[R052]], <8 x float> [[TMP14]], <8 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[R071]] ; ; AVX-LABEL: @test_v8f32( @@ -346,48 +318,14 @@ define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) { define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { ; SSE-LABEL: @test_v16i16( -; SSE-NEXT: [[B0:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 0 -; SSE-NEXT: [[B1:%.*]] = extractelement <16 x i16> [[B]], i64 1 -; SSE-NEXT: [[B2:%.*]] = extractelement <16 x i16> [[B]], i64 2 -; SSE-NEXT: [[B3:%.*]] = extractelement <16 x i16> [[B]], i64 3 -; SSE-NEXT: [[B4:%.*]] = extractelement <16 x i16> [[B]], i64 4 -; SSE-NEXT: [[B5:%.*]] = extractelement <16 x i16> [[B]], i64 5 -; SSE-NEXT: [[B6:%.*]] = extractelement <16 x i16> [[B]], i64 6 -; SSE-NEXT: [[B7:%.*]] = extractelement <16 x i16> [[B]], i64 7 -; SSE-NEXT: [[B8:%.*]] = extractelement <16 x i16> [[B]], i64 8 -; SSE-NEXT: [[B9:%.*]] = extractelement <16 x i16> [[B]], i64 9 -; SSE-NEXT: [[B10:%.*]] = extractelement <16 x i16> [[B]], i64 10 -; SSE-NEXT: [[B11:%.*]] = extractelement <16 x i16> [[B]], i64 11 -; SSE-NEXT: [[B12:%.*]] = extractelement <16 x i16> [[B]], i64 12 -; SSE-NEXT: [[B13:%.*]] = extractelement <16 x i16> [[B]], i64 13 -; SSE-NEXT: [[B14:%.*]] = extractelement <16 x i16> [[B]], i64 14 -; SSE-NEXT: [[B15:%.*]] = extractelement <16 x i16> [[B]], i64 15 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP3:%.*]] = sub <4 x i16> [[TMP1]], [[TMP2]] -; SSE-NEXT: [[R4:%.*]] = sub i16 [[B0]], [[B1]] -; SSE-NEXT: [[R5:%.*]] = sub i16 [[B2]], [[B3]] -; SSE-NEXT: [[R6:%.*]] = sub i16 [[B4]], [[B5]] -; SSE-NEXT: [[R7:%.*]] = sub i16 [[B6]], [[B7]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i16> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[R12:%.*]] = sub i16 [[B8]], [[B9]] -; SSE-NEXT: [[R13:%.*]] = sub i16 [[B10]], [[B11]] -; SSE-NEXT: [[R14:%.*]] = sub i16 [[B12]], [[B13]] -; SSE-NEXT: [[R15:%.*]] = sub i16 [[B14]], [[B15]] -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <16 x i32> -; SSE-NEXT: [[RV4:%.*]] = insertelement <16 x i16> [[TMP7]], i16 [[R4]], i64 4 -; SSE-NEXT: [[RV5:%.*]] = insertelement <16 x i16> [[RV4]], i16 [[R5]], i64 5 -; SSE-NEXT: [[RV6:%.*]] = insertelement <16 x i16> [[RV5]], i16 [[R6]], i64 6 -; SSE-NEXT: [[RV7:%.*]] = insertelement <16 x i16> [[RV6]], i16 [[R7]], i64 7 -; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <16 x i32> -; SSE-NEXT: [[RV111:%.*]] = shufflevector <16 x i16> [[RV7]], <16 x i16> [[TMP8]], <16 x i32> -; SSE-NEXT: [[RV12:%.*]] = insertelement <16 x i16> [[RV111]], i16 [[R12]], i64 12 -; SSE-NEXT: [[RV13:%.*]] = insertelement <16 x i16> [[RV12]], i16 [[R13]], i64 13 -; SSE-NEXT: [[RV14:%.*]] = insertelement <16 x i16> [[RV13]], i16 [[R14]], i64 14 -; SSE-NEXT: [[RV15:%.*]] = insertelement <16 x i16> [[RV14]], i16 [[R15]], i64 15 -; SSE-NEXT: ret <16 x i16> [[RV15]] +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> +; SSE-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> +; SSE-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> +; SSE-NEXT: ret <16 x i16> [[RV151]] ; ; SLM-LABEL: @test_v16i16( ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll index 27c2fb7b66b2e..38be89c0901ec 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll @@ -146,42 +146,24 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { ; SSE-LABEL: @test_v4f64( -; SSE-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 -; SSE-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 -; SSE-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 -; SSE-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 -; SSE-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 -; SSE-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 -; SSE-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 -; SSE-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 -; SSE-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]] -; SSE-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]] -; SSE-NEXT: [[R2:%.*]] = fsub double [[A2]], [[A3]] -; SSE-NEXT: [[R3:%.*]] = fsub double [[B2]], [[B3]] -; SSE-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i64 0 -; SSE-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 -; SSE-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 -; SSE-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 -; SSE-NEXT: ret <4 x double> [[R03]] +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> +; SSE-NEXT: ret <4 x double> [[R031]] ; ; SLM-LABEL: @test_v4f64( -; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 -; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 -; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 -; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 -; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 -; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 -; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 -; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 -; SLM-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]] -; SLM-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]] -; SLM-NEXT: [[R2:%.*]] = fsub double [[A2]], [[A3]] -; SLM-NEXT: [[R3:%.*]] = fsub double [[B2]], [[B3]] -; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i64 0 -; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 -; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 -; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 -; SLM-NEXT: ret <4 x double> [[R03]] +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> +; SLM-NEXT: ret <4 x double> [[R031]] ; ; AVX-LABEL: @test_v4f64( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> @@ -216,23 +198,13 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) { ; SSE-NEXT: ret <8 x float> [[TMP3]] ; ; SLM-LABEL: @test_v8f32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fsub <2 x float> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP6:%.*]] = fsub <2 x float> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP9:%.*]] = fsub <2 x float> [[TMP7]], [[TMP8]] -; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> -; SLM-NEXT: [[TMP12:%.*]] = fsub <2 x float> [[TMP10]], [[TMP11]] -; SLM-NEXT: [[R033:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP6]], <8 x i32> -; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <8 x i32> -; SLM-NEXT: [[R052:%.*]] = shufflevector <8 x float> [[R033]], <8 x float> [[TMP13]], <8 x i32> -; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <8 x i32> -; SLM-NEXT: [[R071:%.*]] = shufflevector <8 x float> [[R052]], <8 x float> [[TMP14]], <8 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[R071]] ; ; AVX-LABEL: @test_v8f32( @@ -346,48 +318,14 @@ define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) { define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { ; SSE-LABEL: @test_v16i16( -; SSE-NEXT: [[B0:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 0 -; SSE-NEXT: [[B1:%.*]] = extractelement <16 x i16> [[B]], i64 1 -; SSE-NEXT: [[B2:%.*]] = extractelement <16 x i16> [[B]], i64 2 -; SSE-NEXT: [[B3:%.*]] = extractelement <16 x i16> [[B]], i64 3 -; SSE-NEXT: [[B4:%.*]] = extractelement <16 x i16> [[B]], i64 4 -; SSE-NEXT: [[B5:%.*]] = extractelement <16 x i16> [[B]], i64 5 -; SSE-NEXT: [[B6:%.*]] = extractelement <16 x i16> [[B]], i64 6 -; SSE-NEXT: [[B7:%.*]] = extractelement <16 x i16> [[B]], i64 7 -; SSE-NEXT: [[B8:%.*]] = extractelement <16 x i16> [[B]], i64 8 -; SSE-NEXT: [[B9:%.*]] = extractelement <16 x i16> [[B]], i64 9 -; SSE-NEXT: [[B10:%.*]] = extractelement <16 x i16> [[B]], i64 10 -; SSE-NEXT: [[B11:%.*]] = extractelement <16 x i16> [[B]], i64 11 -; SSE-NEXT: [[B12:%.*]] = extractelement <16 x i16> [[B]], i64 12 -; SSE-NEXT: [[B13:%.*]] = extractelement <16 x i16> [[B]], i64 13 -; SSE-NEXT: [[B14:%.*]] = extractelement <16 x i16> [[B]], i64 14 -; SSE-NEXT: [[B15:%.*]] = extractelement <16 x i16> [[B]], i64 15 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP3:%.*]] = sub <4 x i16> [[TMP1]], [[TMP2]] -; SSE-NEXT: [[R4:%.*]] = sub i16 [[B0]], [[B1]] -; SSE-NEXT: [[R5:%.*]] = sub i16 [[B2]], [[B3]] -; SSE-NEXT: [[R6:%.*]] = sub i16 [[B4]], [[B5]] -; SSE-NEXT: [[R7:%.*]] = sub i16 [[B6]], [[B7]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> -; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i16> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[R12:%.*]] = sub i16 [[B8]], [[B9]] -; SSE-NEXT: [[R13:%.*]] = sub i16 [[B10]], [[B11]] -; SSE-NEXT: [[R14:%.*]] = sub i16 [[B12]], [[B13]] -; SSE-NEXT: [[R15:%.*]] = sub i16 [[B14]], [[B15]] -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> undef, <16 x i32> -; SSE-NEXT: [[RV4:%.*]] = insertelement <16 x i16> [[TMP7]], i16 [[R4]], i64 4 -; SSE-NEXT: [[RV5:%.*]] = insertelement <16 x i16> [[RV4]], i16 [[R5]], i64 5 -; SSE-NEXT: [[RV6:%.*]] = insertelement <16 x i16> [[RV5]], i16 [[R6]], i64 6 -; SSE-NEXT: [[RV7:%.*]] = insertelement <16 x i16> [[RV6]], i16 [[R7]], i64 7 -; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <16 x i32> -; SSE-NEXT: [[RV111:%.*]] = shufflevector <16 x i16> [[RV7]], <16 x i16> [[TMP8]], <16 x i32> -; SSE-NEXT: [[RV12:%.*]] = insertelement <16 x i16> [[RV111]], i16 [[R12]], i64 12 -; SSE-NEXT: [[RV13:%.*]] = insertelement <16 x i16> [[RV12]], i16 [[R13]], i64 13 -; SSE-NEXT: [[RV14:%.*]] = insertelement <16 x i16> [[RV13]], i16 [[R14]], i64 14 -; SSE-NEXT: [[RV15:%.*]] = insertelement <16 x i16> [[RV14]], i16 [[R15]], i64 15 -; SSE-NEXT: ret <16 x i16> [[RV15]] +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> +; SSE-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> +; SSE-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> +; SSE-NEXT: ret <16 x i16> [[RV151]] ; ; SLM-LABEL: @test_v16i16( ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll index 7cbb8261d126b..6c7d5e6324ca6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll @@ -28,13 +28,15 @@ define i32 @reduce_and4(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, < ; ; SSE42-LABEL: @reduce_and4( ; SSE42-NEXT: entry: -; SSE42-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; SSE42-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> -; SSE42-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) -; SSE42-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]]) -; SSE42-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP3]] -; SSE42-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]] -; SSE42-NEXT: ret i32 [[OP_RDX1]] +; SSE42-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V4:%.*]]) +; SSE42-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V3:%.*]]) +; SSE42-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP0]], [[TMP1]] +; SSE42-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V2:%.*]]) +; SSE42-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[TMP2]] +; SSE42-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V1:%.*]]) +; SSE42-NEXT: [[OP_RDX2:%.*]] = and i32 [[OP_RDX1]], [[TMP3]] +; SSE42-NEXT: [[OP_RDX3:%.*]] = and i32 [[OP_RDX2]], [[ACC:%.*]] +; SSE42-NEXT: ret i32 [[OP_RDX3]] ; ; AVX-LABEL: @reduce_and4( ; AVX-NEXT: entry: @@ -101,13 +103,15 @@ define i32 @reduce_and4_transpose(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i ; SSE2-NEXT: ret i32 [[OP_RDX1]] ; ; SSE42-LABEL: @reduce_and4_transpose( -; SSE42-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; SSE42-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> -; SSE42-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]]) -; SSE42-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) -; SSE42-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3]], [[TMP4]] -; SSE42-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]] -; SSE42-NEXT: ret i32 [[OP_RDX1]] +; SSE42-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V4:%.*]]) +; SSE42-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V3:%.*]]) +; SSE42-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP1]], [[TMP2]] +; SSE42-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V2:%.*]]) +; SSE42-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[TMP3]] +; SSE42-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V1:%.*]]) +; SSE42-NEXT: [[OP_RDX2:%.*]] = and i32 [[OP_RDX1]], [[TMP4]] +; SSE42-NEXT: [[OP_RDX3:%.*]] = and i32 [[OP_RDX2]], [[ACC:%.*]] +; SSE42-NEXT: ret i32 [[OP_RDX3]] ; ; AVX-LABEL: @reduce_and4_transpose( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32>