diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 35ea6aa68bbc3c..74a6baba374ca0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3383,6 +3383,8 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } case ISD::MUL: + case ISD::MULHU: + case ISD::MULHS: case ISD::AND: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index d9e2a1c409eb15..c2f5860bbaa4da 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -766,18 +766,11 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) { define i32 @extractelt_udiv_v4i32(<4 x i32> %x) { ; RV32NOM-LABEL: extractelt_udiv_v4i32: ; RV32NOM: # %bb.0: -; RV32NOM-NEXT: li a0, 1 ; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32NOM-NEXT: vmv.s.x v9, a0 -; RV32NOM-NEXT: vmv.v.i v10, 0 -; RV32NOM-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; RV32NOM-NEXT: vslideup.vi v10, v9, 3 -; RV32NOM-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV32NOM-NEXT: lui a0, %hi(.LCPI39_0) -; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI39_0) -; RV32NOM-NEXT: vle32.v v9, (a0) -; RV32NOM-NEXT: vsrl.vv v8, v8, v10 -; RV32NOM-NEXT: vmulhu.vv v8, v8, v9 +; RV32NOM-NEXT: vsrl.vi v8, v8, 0 +; RV32NOM-NEXT: lui a0, 322639 +; RV32NOM-NEXT: addi a0, a0, -945 +; RV32NOM-NEXT: vmulhu.vx v8, v8, a0 ; RV32NOM-NEXT: vsetivli zero, 1, e32, m1, ta, mu ; RV32NOM-NEXT: vslidedown.vi v8, v8, 2 ; RV32NOM-NEXT: vmv.x.s a0, v8 @@ -797,18 +790,11 @@ define i32 @extractelt_udiv_v4i32(<4 x i32> %x) { ; ; RV64-LABEL: extractelt_udiv_v4i32: ; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; RV64-NEXT: vslideup.vi v10, v9, 3 -; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; RV64-NEXT: lui a0, %hi(.LCPI39_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI39_0) -; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vsrl.vv v8, v8, v10 -; RV64-NEXT: vmulhu.vv v8, v8, v9 +; RV64-NEXT: vsrl.vi v8, v8, 0 +; RV64-NEXT: lui a0, 322639 +; RV64-NEXT: addiw a0, a0, -945 +; RV64-NEXT: vmulhu.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu ; RV64-NEXT: vslidedown.vi v8, v8, 2 diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll index b60c1d3f1317d0..504cf244348ed7 100644 --- a/llvm/test/CodeGen/X86/combine-sdiv.ll +++ b/llvm/test/CodeGen/X86/combine-sdiv.ll @@ -2678,7 +2678,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform5(<8 x i16> %x) { ; SSE41-NEXT: pmullw %xmm0, %xmm1 ; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: paddw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <256,16384,4096,u,u,32768,512,256> +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <256,16384,4096,u,u,u,512,256> ; SSE41-NEXT: pmulhw %xmm0, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3,4],xmm1[5,6,7] ; SSE41-NEXT: movdqa %xmm0, %xmm2 diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll index 8d1aff586c7c94..0de0fbdf5da081 100644 --- a/llvm/test/CodeGen/X86/combine-udiv.ll +++ b/llvm/test/CodeGen/X86/combine-udiv.ll @@ -688,18 +688,20 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { define <8 x i16> @pr38477(<8 x i16> %a0) { ; SSE2-LABEL: pr38477: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,4957,57457,4103,16385,35545,2048,2115] -; SSE2-NEXT: pmulhuw %xmm0, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4957,57457,4103,16385,35545,2048,2115] +; SSE2-NEXT: pmulhuw %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: psubw %xmm1, %xmm2 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: paddw %xmm1, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,0,65535] +; SSE2-NEXT: pandn %xmm2, %xmm1 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: por %xmm1, %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535] +; SSE2-NEXT: pand %xmm1, %xmm2 ; SSE2-NEXT: pandn %xmm0, %xmm1 -; SSE2-NEXT: psubw %xmm2, %xmm0 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: paddw %xmm2, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,0,65535] -; SSE2-NEXT: pandn %xmm0, %xmm2 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: por %xmm2, %xmm1 -; SSE2-NEXT: por %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/pmulh.ll b/llvm/test/CodeGen/X86/pmulh.ll index cac800de1a6f52..442d853cf5e369 100644 --- a/llvm/test/CodeGen/X86/pmulh.ll +++ b/llvm/test/CodeGen/X86/pmulh.ll @@ -55,10 +55,9 @@ define <4 x i16> @and_mulhuw_v4i16(<4 x i64> %a, <4 x i64> %b) { ; ; AVX2-LABEL: and_mulhuw_v4i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15] -; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15] ; AVX2-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 @@ -67,9 +66,6 @@ define <4 x i16> @and_mulhuw_v4i16(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512-LABEL: and_mulhuw_v4i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15] -; AVX512-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15] ; AVX512-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll index f455e4b88c7158..5b6aac24991982 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -1398,7 +1398,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; ; SSE41-LABEL: constant_shift_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = ; SSE41-NEXT: pmulhw %xmm0, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; SSE41-NEXT: psraw $1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll index 1ba47a7adbbe78..f50ea52c948d97 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll @@ -1790,7 +1790,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { ; ; SSE41-LABEL: constant_shift_v4i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = ; SSE41-NEXT: pmulhw %xmm0, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; SSE41-NEXT: psraw $1, %xmm0