diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 962f0d98e3be9..8bb9541bfe102 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3524,6 +3524,10 @@ bool TargetLowering::SimplifyDemandedVectorElts( } [[fallthrough]]; } + case ISD::AVGCEILS: + case ISD::AVGCEILU: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: case ISD::OR: case ISD::XOR: case ISD::SUB: diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll index 491bf40ea4aab..c0f76784eb375 100644 --- a/llvm/test/CodeGen/AArch64/hadd-combine.ll +++ b/llvm/test/CodeGen/AArch64/hadd-combine.ll @@ -903,6 +903,58 @@ define <8 x i16> @shadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %res } +define <8 x i16> @shadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: shadd_demandedelts: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer + %op = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) + %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r0 +} + +define <8 x i16> @srhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: srhadd_demandedelts: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer + %op = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) + %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r0 +} + +define <8 x i16> @uhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: uhadd_demandedelts: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer + %op = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) + %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r0 +} + +define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: urhadd_demandedelts: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer + %op = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) + %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r0 +} + declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>) declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>) declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>) @@ -927,4 +979,4 @@ declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>) declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>) declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>) -declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) \ No newline at end of file diff --git a/llvm/test/CodeGen/X86/combine-pavg.ll b/llvm/test/CodeGen/X86/combine-pavg.ll index 7a8ddf5178d3d..cb2d426a52b4b 100644 --- a/llvm/test/CodeGen/X86/combine-pavg.ll +++ b/llvm/test/CodeGen/X86/combine-pavg.ll @@ -84,25 +84,22 @@ define <16 x i8> @combine_pavgw_knownbits(<8 x i16> %a0, <8 x i16> %a1, <8 x i16 define <8 x i16> @combine_pavgw_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: combine_pavgw_demandedelts: ; SSE: # %bb.0: -; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] -; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13] ; SSE-NEXT: pavgw %xmm1, %xmm0 +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-NEXT: retq ; ; AVX1-LABEL: combine_pavgw_demandedelts: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13] ; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: retq ; ; AVX2-LABEL: combine_pavgw_demandedelts: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 ; AVX2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 ; AVX2-NEXT: retq %s0 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> %avg = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %s0, <8 x i16> %a1)