diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index e4aaa1e1b594a..6c6e8386e4b58 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5707,6 +5707,31 @@ let Predicates = [UseSSE41, OptForSize] in { (ROUNDSDmi addr:$src1, timm:$src2)>; } +multiclass scalar_unary_math_patterns_with_immediate< + SDPatternOperator OpNode, string OpcPrefix, SDNode Move, ValueType VT> { + let Predicates = [UseSSE41] in { + def : Pat<(VT(Move VT:$dst, (scalar_to_vector(OpNode + (extractelt VT:$src, (i64 0)), + i32:$imm)))), + (!cast(OpcPrefix#ri_Int) VT:$dst, VT:$src, + i32:$imm)>; + } + + // Repeat for AVX versions of the instructions. + let Predicates = [UseAVX] in { + def : Pat<(VT(Move VT:$dst, (scalar_to_vector(OpNode + (extractelt VT:$src, (i64 0)), + i32:$imm)))), + (!cast("V"#OpcPrefix#ri_Int) VT:$dst, VT:$src, + i32:$imm)>; + } +} + +defm : scalar_unary_math_patterns_with_immediate; +defm : scalar_unary_math_patterns_with_immediate; + //===----------------------------------------------------------------------===// // SSE4.1 - Packed Bit Test //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/vec_floor.ll b/llvm/test/CodeGen/X86/vec_floor.ll index 7f4ed3394d10d..ffe493cdac1a8 100644 --- a/llvm/test/CodeGen/X86/vec_floor.ll +++ b/llvm/test/CodeGen/X86/vec_floor.ll @@ -821,14 +821,13 @@ define <4 x float> @const_trunc_v4f32() { define <4 x float> @floor_ss(<4 x float> %x, <4 x float> %y) nounwind { ; SSE41-LABEL: floor_ss: ; SSE41: ## %bb.0: -; SSE41-NEXT: roundss $9, %xmm0, %xmm0 -; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; SSE41-NEXT: roundss $9, %xmm0, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: floor_ss: ; AVX: ## %bb.0: -; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX-NEXT: vroundss $9, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: floor_ss: @@ -846,14 +845,13 @@ declare float @llvm.floor.f32(float %s) define <2 x double> @floor_sd(<2 x double> %x, <2 x double> %y) nounwind { ; SSE41-LABEL: floor_sd: ; SSE41: ## %bb.0: -; SSE41-NEXT: roundsd $9, %xmm0, %xmm0 -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: roundsd $9, %xmm0, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: floor_sd: ; AVX: ## %bb.0: -; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vroundsd $9, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: floor_sd: @@ -1811,14 +1809,13 @@ define <2 x double> @floor_maskz_sd_mask8(<2 x double> %x, <2 x double> %y) noun define <4 x float> @ceil_ss(<4 x float> %x, <4 x float> %y) nounwind { ; SSE41-LABEL: ceil_ss: ; SSE41: ## %bb.0: -; SSE41-NEXT: roundss $10, %xmm0, %xmm0 -; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; SSE41-NEXT: roundss $10, %xmm0, %xmm1 +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: ceil_ss: ; AVX: ## %bb.0: -; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX-NEXT: vroundss $10, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: ceil_ss: @@ -1836,14 +1833,13 @@ declare float @llvm.ceil.f32(float %s) define <2 x double> @ceil_sd(<2 x double> %x, <2 x double> %y) nounwind { ; SSE41-LABEL: ceil_sd: ; SSE41: ## %bb.0: -; SSE41-NEXT: roundsd $10, %xmm0, %xmm0 -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: roundsd $10, %xmm0, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: ceil_sd: ; AVX: ## %bb.0: -; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vroundsd $10, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: ceil_sd: