diff --git a/llvm/test/CodeGen/AArch64/neon-rshrn.ll b/llvm/test/CodeGen/AArch64/neon-rshrn.ll new file mode 100644 index 0000000000000..94056d363060f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/neon-rshrn.ll @@ -0,0 +1,976 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple aarch64-none-eabi -o - | FileCheck %s + +define <16 x i8> @rshrn_v16i16_1(<16 x i16> %a) { +; CHECK-LABEL: rshrn_v16i16_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.8h, #1 +; CHECK-NEXT: add v0.8h, v0.8h, v2.8h +; CHECK-NEXT: add v1.8h, v1.8h, v2.8h +; CHECK-NEXT: shrn v0.8b, v0.8h, #1 +; CHECK-NEXT: shrn2 v0.16b, v1.8h, #1 +; CHECK-NEXT: ret +entry: + %b = add <16 x i16> %a, + %s = lshr <16 x i16> %b, + %m = trunc <16 x i16> %s to <16 x i8> + ret <16 x i8> %m +} + +define <16 x i8> @rshrn_v16i16_2(<16 x i16> %a) { +; CHECK-LABEL: rshrn_v16i16_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.8h, #2 +; CHECK-NEXT: add v0.8h, v0.8h, v2.8h +; CHECK-NEXT: add v1.8h, v1.8h, v2.8h +; CHECK-NEXT: shrn v0.8b, v0.8h, #2 +; CHECK-NEXT: shrn2 v0.16b, v1.8h, #2 +; CHECK-NEXT: ret +entry: + %b = add <16 x i16> %a, + %s = lshr <16 x i16> %b, + %m = trunc <16 x i16> %s to <16 x i8> + ret <16 x i8> %m +} + +define <16 x i8> @rshrn_v16i16_3(<16 x i16> %a) { +; CHECK-LABEL: rshrn_v16i16_3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.8h, #4 +; CHECK-NEXT: add v0.8h, v0.8h, v2.8h +; CHECK-NEXT: add v1.8h, v1.8h, v2.8h +; CHECK-NEXT: shrn v0.8b, v0.8h, #3 +; CHECK-NEXT: shrn2 v0.16b, v1.8h, #3 +; CHECK-NEXT: ret +entry: + %b = add <16 x i16> %a, + %s = lshr <16 x i16> %b, + %m = trunc <16 x i16> %s to <16 x i8> + ret <16 x i8> %m +} + +define <16 x i8> @rshrn_v16i16_4(<16 x i16> %a) { +; CHECK-LABEL: rshrn_v16i16_4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.8h, #8 +; CHECK-NEXT: add v0.8h, v0.8h, v2.8h +; CHECK-NEXT: add v1.8h, v1.8h, v2.8h +; CHECK-NEXT: shrn v0.8b, v0.8h, #4 +; CHECK-NEXT: shrn2 v0.16b, v1.8h, #4 +; CHECK-NEXT: ret +entry: + %b = add <16 x i16> %a, + %s = lshr <16 x i16> %b, + %m = trunc <16 x i16> %s to <16 x i8> + ret <16 x i8> %m +} + +define <16 x i8> @rshrn_v16i16_5(<16 x i16> %a) { +; CHECK-LABEL: rshrn_v16i16_5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.8h, #16 +; CHECK-NEXT: add v0.8h, v0.8h, v2.8h +; CHECK-NEXT: add v1.8h, v1.8h, v2.8h +; CHECK-NEXT: shrn v0.8b, v0.8h, #5 +; CHECK-NEXT: shrn2 v0.16b, v1.8h, #5 +; CHECK-NEXT: ret +entry: + %b = add <16 x i16> %a, + %s = lshr <16 x i16> %b, + %m = trunc <16 x i16> %s to <16 x i8> + ret <16 x i8> %m +} + +define <16 x i8> @rshrn_v16i16_6(<16 x i16> %a) { +; CHECK-LABEL: rshrn_v16i16_6: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.8h, #32 +; CHECK-NEXT: add v0.8h, v0.8h, v2.8h +; CHECK-NEXT: add v1.8h, v1.8h, v2.8h +; CHECK-NEXT: shrn v0.8b, v0.8h, #6 +; CHECK-NEXT: shrn2 v0.16b, v1.8h, #6 +; CHECK-NEXT: ret +entry: + %b = add <16 x i16> %a, + %s = lshr <16 x i16> %b, + %m = trunc <16 x i16> %s to <16 x i8> + ret <16 x i8> %m +} + +define <16 x i8> @rshrn_v16i16_7(<16 x i16> %a) { +; CHECK-LABEL: rshrn_v16i16_7: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.8h, #64 +; CHECK-NEXT: add v0.8h, v0.8h, v2.8h +; CHECK-NEXT: add v1.8h, v1.8h, v2.8h +; CHECK-NEXT: shrn v0.8b, v0.8h, #7 +; CHECK-NEXT: shrn2 v0.16b, v1.8h, #7 +; CHECK-NEXT: ret +entry: + %b = add <16 x i16> %a, + %s = lshr <16 x i16> %b, + %m = trunc <16 x i16> %s to <16 x i8> + ret <16 x i8> %m +} + +define <16 x i8> @rshrn_v16i16_8(<16 x i16> %a) { +; CHECK-LABEL: rshrn_v16i16_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.8h, #128 +; CHECK-NEXT: addhn v0.8b, v0.8h, v2.8h +; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h +; CHECK-NEXT: ret +entry: + %b = add <16 x i16> %a, + %s = lshr <16 x i16> %b, + %m = trunc <16 x i16> %s to <16 x i8> + ret <16 x i8> %m +} + +define <16 x i8> @rshrn_v16i16_9(<16 x i16> %a) { +; CHECK-LABEL: rshrn_v16i16_9: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.8h, #1, lsl #8 +; CHECK-NEXT: add v0.8h, v0.8h, v2.8h +; CHECK-NEXT: add v1.8h, v1.8h, v2.8h +; CHECK-NEXT: ushr v0.8h, v0.8h, #9 +; CHECK-NEXT: ushr v1.8h, v1.8h, #9 +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret +entry: + %b = add <16 x i16> %a, + %s = lshr <16 x i16> %b, + %m = trunc <16 x i16> %s to <16 x i8> + ret <16 x i8> %m +} + +define <8 x i16> @rshrn_v8i32_1(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #1 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #1 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #1 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_2(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #2 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #2 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #2 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_3(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #4 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #3 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #3 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_4(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #8 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #4 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #4 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_5(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #16 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #5 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #5 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_6(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_6: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #32 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #6 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #6 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_7(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_7: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #64 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #7 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #7 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_8(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #128 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #8 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #8 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_9(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_9: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #1, lsl #8 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #9 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #9 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_10(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_10: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #2, lsl #8 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #10 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #10 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_11(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_11: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #4, lsl #8 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #11 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #11 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_12(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_12: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #8, lsl #8 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #12 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #12 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_13(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_13: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #16, lsl #8 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #13 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #13 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_14(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_14: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #32, lsl #8 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #14 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #14 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_15(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_15: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #64, lsl #8 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: shrn v0.4h, v0.4s, #15 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #15 +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_16(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #128, lsl #8 +; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s +; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <8 x i16> @rshrn_v8i32_17(<8 x i32> %a) { +; CHECK-LABEL: rshrn_v8i32_17: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.4s, #1, lsl #16 +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: ushr v0.4s, v0.4s, #17 +; CHECK-NEXT: ushr v1.4s, v1.4s, #17 +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %b = add <8 x i32> %a, + %s = lshr <8 x i32> %b, + %m = trunc <8 x i32> %s to <8 x i16> + ret <8 x i16> %m +} + +define <4 x i32> @rshrn_v4i64_1(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #1 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #1 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_2(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #2 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #2 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_3(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #3 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #3 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_4(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #4 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #4 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_5(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #5 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #5 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_6(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_6: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #32 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #6 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #6 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_7(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_7: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #64 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #7 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #7 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_8(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #128 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #8 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #8 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_9(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_9: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #256 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #9 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #9 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_10(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_10: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #512 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #10 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #10 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_11(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_11: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #1024 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #11 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #11 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_12(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_12: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #2048 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #12 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #12 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_13(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_13: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #4096 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #13 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #13 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_14(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_14: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #8192 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #14 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #14 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_15(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_15: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #16384 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #15 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #15 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_16(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #32768 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #16 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #16 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_17(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_17: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #65536 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #17 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #17 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_18(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_18: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #131072 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #18 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #18 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_19(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_19: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #262144 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #19 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #19 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_20(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_20: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #524288 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #20 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #20 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_21(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_21: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #1048576 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #21 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #21 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_22(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_22: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #2097152 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #22 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #22 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_23(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_23: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #4194304 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #23 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #23 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_24(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_24: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #8388608 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #24 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #24 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_25(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_25: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #16777216 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #25 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #25 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_26(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_26: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #33554432 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #26 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #26 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_27(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_27: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #67108864 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #27 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #27 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_28(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_28: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #134217728 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #28 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #28 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_29(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_29: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #268435456 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #29 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #29 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_30(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_30: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #536870912 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #30 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #30 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_31(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_31: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #1073741824 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: shrn v0.2s, v0.2d, #31 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #31 +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_32(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #-2147483648 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: addhn v0.2s, v0.2d, v2.2d +; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +} + +define <4 x i32> @rshrn_v4i64_33(<4 x i64> %a) { +; CHECK-LABEL: rshrn_v4i64_33: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, #4294967296 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: ushr v0.2d, v0.2d, #33 +; CHECK-NEXT: ushr v1.2d, v1.2d, #33 +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret +entry: + %b = add <4 x i64> %a, + %s = lshr <4 x i64> %b, + %m = trunc <4 x i64> %s to <4 x i32> + ret <4 x i32> %m +}