diff --git a/llvm/test/CodeGen/AArch64/sve2-hadd.ll b/llvm/test/CodeGen/AArch64/sve2-hadd.ll new file mode 100644 index 0000000000000..2d494c43d4ce7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-hadd.ll @@ -0,0 +1,672 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple aarch64-none-eabi -mattr=+sve2 -o - | FileCheck %s + +define @hadds_v2i32( %s0, %s1) { +; CHECK-LABEL: hadds_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: adr z0.d, [z0.d, z1.d, sxtw] +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v2i32( %s0, %s1) { +; CHECK-LABEL: haddu_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.d, z0.d, #0xffffffff +; CHECK-NEXT: adr z0.d, [z0.d, z1.d, uxtw] +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v4i32( %s0, %s1) { +; CHECK-LABEL: hadds_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sunpkhi z2.d, z0.s +; CHECK-NEXT: sunpklo z0.d, z0.s +; CHECK-NEXT: sunpkhi z3.d, z1.s +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: add z1.d, z2.d, z3.d +; CHECK-NEXT: lsr z1.d, z1.d, #1 +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v4i32( %s0, %s1) { +; CHECK-LABEL: haddu_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpkhi z3.d, z1.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: add z1.d, z2.d, z3.d +; CHECK-NEXT: lsr z1.d, z1.d, #1 +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v2i16( %s0, %s1) { +; CHECK-LABEL: hadds_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: sxth z1.d, p0/m, z1.d +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: and z0.d, z0.d, #0xffffffff +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v2i16( %s0, %s1) { +; CHECK-LABEL: haddu_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.d, z0.d, #0xffff +; CHECK-NEXT: and z1.d, z1.d, #0xffff +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v4i16( %s0, %s1) { +; CHECK-LABEL: hadds_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxth z0.s, p0/m, z0.s +; CHECK-NEXT: sxth z1.s, p0/m, z1.s +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v4i16( %s0, %s1) { +; CHECK-LABEL: haddu_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.s, z0.s, #0xffff +; CHECK-NEXT: and z1.s, z1.s, #0xffff +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v8i16( %s0, %s1) { +; CHECK-LABEL: hadds_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sunpkhi z2.s, z0.h +; CHECK-NEXT: sunpklo z0.s, z0.h +; CHECK-NEXT: sunpkhi z3.s, z1.h +; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: add z1.s, z2.s, z3.s +; CHECK-NEXT: lsr z1.s, z1.s, #1 +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v8i16( %s0, %s1) { +; CHECK-LABEL: haddu_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpkhi z3.s, z1.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: add z1.s, z2.s, z3.s +; CHECK-NEXT: lsr z1.s, z1.s, #1 +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v4i8( %s0, %s1) { +; CHECK-LABEL: hadds_v4i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxtb z0.s, p0/m, z0.s +; CHECK-NEXT: sxtb z1.s, p0/m, z1.s +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: and z0.s, z0.s, #0xffff +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v4i8( %s0, %s1) { +; CHECK-LABEL: haddu_v4i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.s, z0.s, #0xff +; CHECK-NEXT: and z1.s, z1.s, #0xff +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v8i8( %s0, %s1) { +; CHECK-LABEL: hadds_v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sxtb z0.h, p0/m, z0.h +; CHECK-NEXT: sxtb z1.h, p0/m, z1.h +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: lsr z0.h, z0.h, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v8i8( %s0, %s1) { +; CHECK-LABEL: haddu_v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.h, z0.h, #0xff +; CHECK-NEXT: and z1.h, z1.h, #0xff +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: lsr z0.h, z0.h, #1 +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v16i8( %s0, %s1) { +; CHECK-LABEL: hadds_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sunpkhi z2.h, z0.b +; CHECK-NEXT: sunpklo z0.h, z0.b +; CHECK-NEXT: sunpkhi z3.h, z1.b +; CHECK-NEXT: sunpklo z1.h, z1.b +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: add z1.h, z2.h, z3.h +; CHECK-NEXT: lsr z1.h, z1.h, #1 +; CHECK-NEXT: lsr z0.h, z0.h, #1 +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v16i8( %s0, %s1) { +; CHECK-LABEL: haddu_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uunpkhi z2.h, z0.b +; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: uunpkhi z3.h, z1.b +; CHECK-NEXT: uunpklo z1.h, z1.b +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: add z1.h, z2.h, z3.h +; CHECK-NEXT: lsr z1.h, z1.h, #1 +; CHECK-NEXT: lsr z0.h, z0.h, #1 +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @rhadds_v2i32( %s0, %s1) { +; CHECK-LABEL: rhadds_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: sxtw z1.d, p0/m, z1.d +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v2i32( %s0, %s1) { +; CHECK-LABEL: rhaddu_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: and z0.d, z0.d, #0xffffffff +; CHECK-NEXT: and z1.d, z1.d, #0xffffffff +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v4i32( %s0, %s1) { +; CHECK-LABEL: rhadds_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sunpkhi z3.d, z0.s +; CHECK-NEXT: sunpklo z0.d, z0.s +; CHECK-NEXT: sunpkhi z4.d, z1.s +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: eor z2.d, z3.d, z2.d +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: sub z1.d, z4.d, z2.d +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: lsr z1.d, z1.d, #1 +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v4i32( %s0, %s1) { +; CHECK-LABEL: rhaddu_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: uunpkhi z3.d, z0.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpkhi z4.d, z1.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: eor z2.d, z3.d, z2.d +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: sub z1.d, z4.d, z2.d +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: lsr z1.d, z1.d, #1 +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v2i16( %s0, %s1) { +; CHECK-LABEL: rhadds_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: sxth z1.d, p0/m, z1.d +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: and z0.d, z0.d, #0xffffffff +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v2i16( %s0, %s1) { +; CHECK-LABEL: rhaddu_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: and z0.d, z0.d, #0xffff +; CHECK-NEXT: and z1.d, z1.d, #0xffff +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v4i16( %s0, %s1) { +; CHECK-LABEL: rhadds_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxth z0.s, p0/m, z0.s +; CHECK-NEXT: sxth z1.s, p0/m, z1.s +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v4i16( %s0, %s1) { +; CHECK-LABEL: rhaddu_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: and z0.s, z0.s, #0xffff +; CHECK-NEXT: and z1.s, z1.s, #0xffff +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v8i16( %s0, %s1) { +; CHECK-LABEL: rhadds_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sunpkhi z3.s, z0.h +; CHECK-NEXT: sunpklo z0.s, z0.h +; CHECK-NEXT: sunpkhi z4.s, z1.h +; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: eor z2.d, z3.d, z2.d +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: sub z1.s, z4.s, z2.s +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: lsr z1.s, z1.s, #1 +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v8i16( %s0, %s1) { +; CHECK-LABEL: rhaddu_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpkhi z4.s, z1.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: eor z2.d, z3.d, z2.d +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: sub z1.s, z4.s, z2.s +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: lsr z1.s, z1.s, #1 +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v4i8( %s0, %s1) { +; CHECK-LABEL: rhadds_v4i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxtb z0.s, p0/m, z0.s +; CHECK-NEXT: sxtb z1.s, p0/m, z1.s +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: and z0.s, z0.s, #0xffff +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v4i8( %s0, %s1) { +; CHECK-LABEL: rhaddu_v4i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: and z0.s, z0.s, #0xff +; CHECK-NEXT: and z1.s, z1.s, #0xff +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v8i8( %s0, %s1) { +; CHECK-LABEL: rhadds_v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxtb z0.h, p0/m, z0.h +; CHECK-NEXT: sxtb z1.h, p0/m, z1.h +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.h, z1.h, z0.h +; CHECK-NEXT: lsr z0.h, z0.h, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v8i8( %s0, %s1) { +; CHECK-LABEL: rhaddu_v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: and z0.h, z0.h, #0xff +; CHECK-NEXT: and z1.h, z1.h, #0xff +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.h, z1.h, z0.h +; CHECK-NEXT: lsr z0.h, z0.h, #1 +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v16i8( %s0, %s1) { +; CHECK-LABEL: rhadds_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sunpkhi z3.h, z0.b +; CHECK-NEXT: sunpklo z0.h, z0.b +; CHECK-NEXT: sunpkhi z4.h, z1.b +; CHECK-NEXT: sunpklo z1.h, z1.b +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: eor z2.d, z3.d, z2.d +; CHECK-NEXT: sub z0.h, z1.h, z0.h +; CHECK-NEXT: sub z1.h, z4.h, z2.h +; CHECK-NEXT: lsr z0.h, z0.h, #1 +; CHECK-NEXT: lsr z1.h, z1.h, #1 +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v16i8( %s0, %s1) { +; CHECK-LABEL: rhaddu_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: uunpkhi z3.h, z0.b +; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: uunpkhi z4.h, z1.b +; CHECK-NEXT: uunpklo z1.h, z1.b +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: eor z2.d, z3.d, z2.d +; CHECK-NEXT: sub z0.h, z1.h, z0.h +; CHECK-NEXT: sub z1.h, z4.h, z2.h +; CHECK-NEXT: lsr z0.h, z0.h, #1 +; CHECK-NEXT: lsr z1.h, z1.h, #1 +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add %s0s, %s1s + %add2 = add %add, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +}