diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 51a839c34598d..13677e5a47eaf 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1612,27 +1612,40 @@ def int_aarch64_sve_adrd : AdvSIMD_2VectorArg_Intrinsic; // def int_aarch64_sve_add : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_add_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_sub : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sub_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_subr : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_pmul : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_sve_mul : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_mul_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_mul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic; def int_aarch64_sve_smulh : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_smulh_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_umulh : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umulh_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_sdiv : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sdiv_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_udiv : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_udiv_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_sdivr : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_udivr : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_smax : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_smax_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_umax : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umax_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_smin : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_smin_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_umin : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umin_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_sabd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sabd_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_uabd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uabd_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_mad : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_msb : AdvSIMD_Pred3VectorArg_Intrinsic; @@ -1670,12 +1683,15 @@ def int_aarch64_sve_uqsub_x : AdvSIMD_2VectorArg_Intrinsic; // Shifts def int_aarch64_sve_asr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_asr_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_asr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic; def int_aarch64_sve_asrd : AdvSIMD_SVE_ShiftByImm_Intrinsic; def int_aarch64_sve_insr : AdvSIMD_SVE_INSR_Intrinsic; def int_aarch64_sve_lsl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_lsl_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_lsl_wide : AdvSIMD_SVE_ShiftWide_Intrinsic; def int_aarch64_sve_lsr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_lsr_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_lsr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic; // diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e1a162b22c63f..95c9db7c35906 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18143,24 +18143,54 @@ static SDValue performIntrinsicCombine(SDNode *N, return LowerSVEIntrinsicEXT(N, DAG); case Intrinsic::aarch64_sve_mul: return convertMergedOpToPredOp(N, AArch64ISD::MUL_PRED, DAG); + case Intrinsic::aarch64_sve_mul_u: + return DAG.getNode(AArch64ISD::MUL_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_smulh: return convertMergedOpToPredOp(N, AArch64ISD::MULHS_PRED, DAG); + case Intrinsic::aarch64_sve_smulh_u: + return DAG.getNode(AArch64ISD::MULHS_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_umulh: return convertMergedOpToPredOp(N, AArch64ISD::MULHU_PRED, DAG); + case Intrinsic::aarch64_sve_umulh_u: + return DAG.getNode(AArch64ISD::MULHU_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_smin: return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG); + case Intrinsic::aarch64_sve_smin_u: + return DAG.getNode(AArch64ISD::SMIN_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_umin: return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG); + case Intrinsic::aarch64_sve_umin_u: + return DAG.getNode(AArch64ISD::UMIN_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_smax: return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG); + case Intrinsic::aarch64_sve_smax_u: + return DAG.getNode(AArch64ISD::SMAX_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_umax: return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG); + case Intrinsic::aarch64_sve_umax_u: + return DAG.getNode(AArch64ISD::UMAX_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_lsl: return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG); + case Intrinsic::aarch64_sve_lsl_u: + return DAG.getNode(AArch64ISD::SHL_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_lsr: return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG); + case Intrinsic::aarch64_sve_lsr_u: + return DAG.getNode(AArch64ISD::SRL_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_asr: return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG); + case Intrinsic::aarch64_sve_asr_u: + return DAG.getNode(AArch64ISD::SRA_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_fadd: return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG); case Intrinsic::aarch64_sve_fsub: @@ -18169,8 +18199,14 @@ static SDValue performIntrinsicCombine(SDNode *N, return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG); case Intrinsic::aarch64_sve_add: return convertMergedOpToPredOp(N, ISD::ADD, DAG, true); + case Intrinsic::aarch64_sve_add_u: + return DAG.getNode(ISD::ADD, SDLoc(N), N->getValueType(0), N->getOperand(2), + N->getOperand(3)); case Intrinsic::aarch64_sve_sub: return convertMergedOpToPredOp(N, ISD::SUB, DAG, true); + case Intrinsic::aarch64_sve_sub_u: + return DAG.getNode(ISD::SUB, SDLoc(N), N->getValueType(0), N->getOperand(2), + N->getOperand(3)); case Intrinsic::aarch64_sve_subr: return convertMergedOpToPredOp(N, ISD::SUB, DAG, true, true); case Intrinsic::aarch64_sve_and: @@ -18183,8 +18219,20 @@ static SDValue performIntrinsicCombine(SDNode *N, return convertMergedOpToPredOp(N, ISD::OR, DAG, true); case Intrinsic::aarch64_sve_sabd: return convertMergedOpToPredOp(N, ISD::ABDS, DAG, true); + case Intrinsic::aarch64_sve_sabd_u: + return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0), + N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_uabd: return convertMergedOpToPredOp(N, ISD::ABDU, DAG, true); + case Intrinsic::aarch64_sve_uabd_u: + return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0), + N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_sdiv_u: + return DAG.getNode(AArch64ISD::SDIV_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_udiv_u: + return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_sqadd: return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true); case Intrinsic::aarch64_sve_sqsub: diff --git a/llvm/test/CodeGen/AArch64/sve-aba.ll b/llvm/test/CodeGen/AArch64/sve-aba.ll index f3953c98bd668..b1298e097ab6c 100644 --- a/llvm/test/CodeGen/AArch64/sve-aba.ll +++ b/llvm/test/CodeGen/AArch64/sve-aba.ll @@ -47,6 +47,16 @@ define @saba_b_from_sabd( %a, %3 } +define @saba_b_from_sabd_u( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: saba_b_from_sabd_u: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.sabd.u.nxv16i8( %pg, %b, %c) + %2 = add %1, %a + ret %2 +} + define @saba_h( %a, %b, %c) #0 { ; CHECK-LABEL: saba_h: ; CHECK: // %bb.0: @@ -88,6 +98,16 @@ define @saba_h_from_sabd( %a, %3 } +define @saba_h_from_sabd_u( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: saba_h_from_sabd_u: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.sabd.u.nxv8i16( %pg, %b, %c) + %2 = add %1, %a + ret %2 +} + define @saba_s( %a, %b, %c) #0 { ; CHECK-LABEL: saba_s: ; CHECK: // %bb.0: @@ -129,6 +149,16 @@ define @saba_s_from_sabd( %a, %3 } +define @saba_s_from_sabd_u( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: saba_s_from_sabd_u: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.sabd.u.nxv4i32( %pg, %b, %c) + %2 = add %1, %a + ret %2 +} + define @saba_d( %a, %b, %c) #0 { ; CHECK-LABEL: saba_d: ; CHECK: // %bb.0: @@ -170,6 +200,16 @@ define @saba_d_from_sabd( %a, %3 } +define @saba_d_from_sabd_u( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: saba_d_from_sabd_u: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.sabd.u.nxv2i64( %pg, %b, %c) + %2 = add %1, %a + ret %2 +} + ; ; UABA ; @@ -214,6 +254,16 @@ define @uaba_b_from_uabd( %a, %3 } +define @uaba_b_from_uabd_u( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: uaba_b_from_uabd_u: +; CHECK: // %bb.0: +; CHECK-NEXT: uaba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.uabd.u.nxv16i8( %pg, %b, %c) + %2 = add %1, %a + ret %2 +} + define @uaba_h( %a, %b, %c) #0 { ; CHECK-LABEL: uaba_h: ; CHECK: // %bb.0: @@ -254,6 +304,16 @@ define @uaba_h_from_uabd( %a, %3 } +define @uaba_h_from_uabd_u( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: uaba_h_from_uabd_u: +; CHECK: // %bb.0: +; CHECK-NEXT: uaba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.uabd.u.nxv8i16( %pg, %b, %c) + %2 = add %1, %a + ret %2 +} + define @uaba_s( %a, %b, %c) #0 { ; CHECK-LABEL: uaba_s: ; CHECK: // %bb.0: @@ -294,6 +354,16 @@ define @uaba_s_from_uabd( %a, %3 } +define @uaba_s_from_uabd_u( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: uaba_s_from_uabd_u: +; CHECK: // %bb.0: +; CHECK-NEXT: uaba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.uabd.u.nxv4i32( %pg, %b, %c) + %2 = add %1, %a + ret %2 +} + define @uaba_d( %a, %b, %c) #0 { ; CHECK-LABEL: uaba_d: ; CHECK: // %bb.0: @@ -334,6 +404,16 @@ define @uaba_d_from_uabd( %a, %3 } +define @uaba_d_from_uabd_u( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: uaba_d_from_uabd_u: +; CHECK: // %bb.0: +; CHECK-NEXT: uaba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.uabd.u.nxv2i64( %pg, %b, %c) + %2 = add %1, %a + ret %2 +} + ; A variant of uaba_s but with the add operands switched. define @uaba_s_commutative( %a, %b, %c) #0 { ; CHECK-LABEL: uaba_s_commutative: @@ -368,9 +448,19 @@ declare @llvm.aarch64.sve.sabd.nxv8i16(, @llvm.aarch64.sve.sabd.nxv4i32(, , ) declare @llvm.aarch64.sve.sabd.nxv2i64(, , ) +declare @llvm.aarch64.sve.sabd.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.sabd.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.sabd.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.sabd.u.nxv2i64(, , ) + declare @llvm.aarch64.sve.uabd.nxv16i8(, , ) declare @llvm.aarch64.sve.uabd.nxv8i16(, , ) declare @llvm.aarch64.sve.uabd.nxv4i32(, , ) declare @llvm.aarch64.sve.uabd.nxv2i64(, , ) +declare @llvm.aarch64.sve.uabd.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.uabd.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.uabd.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.uabd.u.nxv2i64(, , ) + attributes #0 = { "target-features"="+neon,+sve,+sve2" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll new file mode 100644 index 0000000000000..4645b1f640b46 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll @@ -0,0 +1,1222 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mattr=+sve < %s | FileCheck %s +; RUN: llc -mattr=+sve2 < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; +; ADD +; + +define @add_i8( %pg, %a, %b) { +; CHECK-LABEL: add_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.add.u.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @add_i16( %pg, %a, %b) { +; CHECK-LABEL: add_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.add.u.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @add_i32( %pg, %a, %b) { +; CHECK-LABEL: add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.add.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @add_i64( %pg, %a, %b) { +; CHECK-LABEL: add_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.add.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; ADD (immediate) +; + +define @add_imm_i8( %pg, %a) { +; CHECK-LABEL: add_imm_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.b, z0.b, #3 // =0x3 +; CHECK-NEXT: ret + %imm = insertelement undef, i8 3, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.add.u.nxv16i8( %pg, + %a, + %imm.splat) + ret %out +} + +define @add_imm_i16( %pg, %a) { +; CHECK-LABEL: add_imm_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.h, z0.h, #4 // =0x4 +; CHECK-NEXT: ret + %imm = insertelement undef, i16 4, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.add.u.nxv8i16( %pg, + %a, + %imm.splat) + ret %out +} + +define @add_imm_i32( %pg, %a) { +; CHECK-LABEL: add_imm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.s, z0.s, #5 // =0x5 +; CHECK-NEXT: ret + %imm = insertelement undef, i32 5, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.add.u.nxv4i32( %pg, + %a, + %imm.splat) + ret %out +} + +define @add_imm_i64( %pg, %a) { +; CHECK-LABEL: add_imm_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.d, z0.d, #6 // =0x6 +; CHECK-NEXT: ret + %imm = insertelement undef, i64 6, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.add.u.nxv2i64( %pg, + %a, + %imm.splat) + ret %out +} + +; +; MUL +; + +define @mul_i8( %pg, %a, %b) { +; CHECK-LABEL: mul_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.u.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @mul_i16( %pg, %a, %b) { +; CHECK-LABEL: mul_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.u.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @mul_i32( %pg, %a, %b) { +; CHECK-LABEL: mul_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @mul_i64( %pg, %a, %b) { +; CHECK-LABEL: mul_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; MUL (immediate) +; + +define @mul_imm_i8( %pg, %a) { +; CHECK-LABEL: mul_imm_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #3 // =0x3 +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %imm = insertelement undef, i8 3, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.mul.u.nxv16i8( %pg, + %a, + %imm.splat) + ret %out +} + +define @mul_imm_i16( %pg, %a) { +; CHECK-LABEL: mul_imm_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #4 // =0x4 +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %imm = insertelement undef, i16 4, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.mul.u.nxv8i16( %pg, + %a, + %imm.splat) + ret %out +} + +define @mul_imm_i32( %pg, %a) { +; CHECK-LABEL: mul_imm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #5 // =0x5 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %imm = insertelement undef, i32 5, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, + %a, + %imm.splat) + ret %out +} + +define @mul_imm_i64( %pg, %a) { +; CHECK-LABEL: mul_imm_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #6 // =0x6 +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %imm = insertelement undef, i64 6, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.mul.u.nxv2i64( %pg, + %a, + %imm.splat) + ret %out +} + +; +; SABD +; + +define @sabd_i8( %pg, %a, %b) { +; CHECK-LABEL: sabd_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: sabd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.u.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @sabd_i16( %pg, %a, %b) { +; CHECK-LABEL: sabd_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.u.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @sabd_i32( %pg, %a, %b) { +; CHECK-LABEL: sabd_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @sabd_i64( %pg, %a, %b) { +; CHECK-LABEL: sabd_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; SDIV +; + +define @sdiv_i32( %pg, %a, %b) { +; CHECK-LABEL: sdiv_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdiv.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @sdiv_i64( %pg, %a, %b) { +; CHECK-LABEL: sdiv_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdiv.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; SDIVR +; + +define @sdivr_i32( %pg, %a, %b) { +; CHECK-LABEL: sdivr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdiv.u.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @sdivr_i64( %pg, %a, %b) { +; CHECK-LABEL: sdivr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sdivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdiv.u.nxv2i64( %pg, + %b, + %a) + ret %out +} + +; +; SMAX +; + +define @smax_i8( %pg, %a, %b) { +; CHECK-LABEL: smax_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.u.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @smax_i16( %pg, %a, %b) { +; CHECK-LABEL: smax_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.u.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @smax_i32( %pg, %a, %b) { +; CHECK-LABEL: smax_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @smax_i64( %pg, %a, %b) { +; CHECK-LABEL: smax_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; SMAX (immediate) +; + +define @smax_imm_i8( %pg, %a) { +; CHECK-LABEL: smax_imm_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #3 // =0x3 +; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %imm = insertelement undef, i8 3, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smax.u.nxv16i8( %pg, + %a, + %imm.splat) + ret %out +} + +define @smax_imm_i16( %pg, %a) { +; CHECK-LABEL: smax_imm_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #4 // =0x4 +; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %imm = insertelement undef, i16 4, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smax.u.nxv8i16( %pg, + %a, + %imm.splat) + ret %out +} + +define @smax_imm_i32( %pg, %a) { +; CHECK-LABEL: smax_imm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #5 // =0x5 +; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %imm = insertelement undef, i32 5, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smax.u.nxv4i32( %pg, + %a, + %imm.splat) + ret %out +} + +define @smax_imm_i64( %pg, %a) { +; CHECK-LABEL: smax_imm_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #6 // =0x6 +; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %imm = insertelement undef, i64 6, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smax.u.nxv2i64( %pg, + %a, + %imm.splat) + ret %out +} + +; +; SMIN +; + +define @smin_i8( %pg, %a, %b) { +; CHECK-LABEL: smin_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.u.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @smin_i16( %pg, %a, %b) { +; CHECK-LABEL: smin_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.u.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @smin_i32( %pg, %a, %b) { +; CHECK-LABEL: smin_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @smin_i64( %pg, %a, %b) { +; CHECK-LABEL: smin_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; SMIN (immediate) +; + +define @smin_imm_i8( %pg, %a) { +; CHECK-LABEL: smin_imm_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #3 // =0x3 +; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %imm = insertelement undef, i8 3, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smin.u.nxv16i8( %pg, + %a, + %imm.splat) + ret %out +} + +define @smin_imm_i16( %pg, %a) { +; CHECK-LABEL: smin_imm_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #4 // =0x4 +; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %imm = insertelement undef, i16 4, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smin.u.nxv8i16( %pg, + %a, + %imm.splat) + ret %out +} + +define @smin_imm_i32( %pg, %a) { +; CHECK-LABEL: smin_imm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #5 // =0x5 +; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %imm = insertelement undef, i32 5, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smin.u.nxv4i32( %pg, + %a, + %imm.splat) + ret %out +} + +define @smin_imm_i64( %pg, %a) { +; CHECK-LABEL: smin_imm_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #6 // =0x6 +; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %imm = insertelement undef, i64 6, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smin.u.nxv2i64( %pg, + %a, + %imm.splat) + ret %out +} + +; +; SMULH +; + +define @smulh_i8( %pg, %a, %b) { +; CHECK-LABEL: smulh_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.u.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @smulh_i16( %pg, %a, %b) { +; CHECK-LABEL: smulh_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.u.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @smulh_i32( %pg, %a, %b) { +; CHECK-LABEL: smulh_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @smulh_i64( %pg, %a, %b) { +; CHECK-LABEL: smulh_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; SUB +; + +define @sub_i8( %pg, %a, %b) { +; CHECK-LABEL: sub_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.u.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @sub_i16( %pg, %a, %b) { +; CHECK-LABEL: sub_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.u.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @sub_i32( %pg, %a, %b) { +; CHECK-LABEL: sub_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @sub_i64( %pg, %a, %b) { +; CHECK-LABEL: sub_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; SUB (immediate) +; + +define @sub_imm_i8( %pg, %a) { +; CHECK-LABEL: sub_imm_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.b, z0.b, #3 // =0x3 +; CHECK-NEXT: ret + %imm = insertelement undef, i8 3, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.sub.u.nxv16i8( %pg, + %a, + %imm.splat) + ret %out +} + +define @sub_imm_i16( %pg, %a) { +; CHECK-LABEL: sub_imm_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.h, z0.h, #4 // =0x4 +; CHECK-NEXT: ret + %imm = insertelement undef, i16 4, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.sub.u.nxv8i16( %pg, + %a, + %imm.splat) + ret %out +} + +define @sub_imm_i32( %pg, %a) { +; CHECK-LABEL: sub_imm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.s, z0.s, #5 // =0x5 +; CHECK-NEXT: ret + %imm = insertelement undef, i32 5, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.sub.u.nxv4i32( %pg, + %a, + %imm.splat) + ret %out +} + +define @sub_imm_i64( %pg, %a) { +; CHECK-LABEL: sub_imm_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.d, z0.d, #6 // =0x6 +; CHECK-NEXT: ret + %imm = insertelement undef, i64 6, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.sub.u.nxv2i64( %pg, + %a, + %imm.splat) + ret %out +} + +; +; SUBR +; + +define @subr_i8( %pg, %a, %b) { +; CHECK-LABEL: subr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.b, z1.b, z0.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.u.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @subr_i16( %pg, %a, %b) { +; CHECK-LABEL: subr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.h, z1.h, z0.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.u.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @subr_i32( %pg, %a, %b) { +; CHECK-LABEL: subr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.u.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @subr_i64( %pg, %a, %b) { +; CHECK-LABEL: subr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.u.nxv2i64( %pg, + %b, + %a) + ret %out +} + +; +; SUBR (immediate) +; + +define @subr_imm_i8( %pg, %a) { +; CHECK-LABEL: subr_imm_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: subr z0.b, z0.b, #3 // =0x3 +; CHECK-NEXT: ret + %imm = insertelement undef, i8 3, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.sub.u.nxv16i8( %pg, + %imm.splat, + %a) + ret %out +} + +define @subr_imm_i16( %pg, %a) { +; CHECK-LABEL: subr_imm_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: subr z0.h, z0.h, #4 // =0x4 +; CHECK-NEXT: ret + %imm = insertelement undef, i16 4, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.sub.u.nxv8i16( %pg, + %imm.splat, + %a) + ret %out +} + +define @subr_imm_i32( %pg, %a) { +; CHECK-LABEL: subr_imm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subr z0.s, z0.s, #5 // =0x5 +; CHECK-NEXT: ret + %imm = insertelement undef, i32 5, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.sub.u.nxv4i32( %pg, + %imm.splat, + %a) + ret %out +} + +define @subr_imm_i64( %pg, %a) { +; CHECK-LABEL: subr_imm_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: subr z0.d, z0.d, #6 // =0x6 +; CHECK-NEXT: ret + %imm = insertelement undef, i64 6, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.sub.u.nxv2i64( %pg, + %imm.splat, + %a) + ret %out +} + +; +; UABD +; + +define @uabd_i8( %pg, %a, %b) { +; CHECK-LABEL: uabd_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: uabd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.u.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uabd_i16( %pg, %a, %b) { +; CHECK-LABEL: uabd_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: uabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.u.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uabd_i32( %pg, %a, %b) { +; CHECK-LABEL: uabd_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uabd_i64( %pg, %a, %b) { +; CHECK-LABEL: uabd_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UDIV +; + +define @udiv_i32( %pg, %a, %b) { +; CHECK-LABEL: udiv_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udiv.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @udiv_i64( %pg, %a, %b) { +; CHECK-LABEL: udiv_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udiv.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UDIVR +; + +define @udivr_i32( %pg, %a, %b) { +; CHECK-LABEL: udivr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udiv.u.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @udivr_i64( %pg, %a, %b) { +; CHECK-LABEL: udivr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: udivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udiv.u.nxv2i64( %pg, + %b, + %a) + ret %out +} + +; +; UMAX +; + +define @umax_i8( %pg, %a, %b) { +; CHECK-LABEL: umax_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.u.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @umax_i16( %pg, %a, %b) { +; CHECK-LABEL: umax_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.u.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @umax_i32( %pg, %a, %b) { +; CHECK-LABEL: umax_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @umax_i64( %pg, %a, %b) { +; CHECK-LABEL: umax_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UMAX (immediate) +; + +define @umax_imm_i8( %pg, %a) { +; CHECK-LABEL: umax_imm_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #3 // =0x3 +; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %imm = insertelement undef, i8 3, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umax.u.nxv16i8( %pg, + %a, + %imm.splat) + ret %out +} + +define @umax_imm_i16( %pg, %a) { +; CHECK-LABEL: umax_imm_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #4 // =0x4 +; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %imm = insertelement undef, i16 4, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umax.u.nxv8i16( %pg, + %a, + %imm.splat) + ret %out +} + +define @umax_imm_i32( %pg, %a) { +; CHECK-LABEL: umax_imm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #5 // =0x5 +; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %imm = insertelement undef, i32 5, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umax.u.nxv4i32( %pg, + %a, + %imm.splat) + ret %out +} + +define @umax_imm_i64( %pg, %a) { +; CHECK-LABEL: umax_imm_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #6 // =0x6 +; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %imm = insertelement undef, i64 6, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umax.u.nxv2i64( %pg, + %a, + %imm.splat) + ret %out +} + +; +; UMIN +; + +define @umin_i8( %pg, %a, %b) { +; CHECK-LABEL: umin_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.u.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @umin_i16( %pg, %a, %b) { +; CHECK-LABEL: umin_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.u.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @umin_i32( %pg, %a, %b) { +; CHECK-LABEL: umin_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @umin_i64( %pg, %a, %b) { +; CHECK-LABEL: umin_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UMIN (immediate) +; + +define @umin_imm_i8( %pg, %a) { +; CHECK-LABEL: umin_imm_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #3 // =0x3 +; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %imm = insertelement undef, i8 3, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umin.u.nxv16i8( %pg, + %a, + %imm.splat) + ret %out +} + +define @umin_imm_i16( %pg, %a) { +; CHECK-LABEL: umin_imm_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #4 // =0x4 +; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %imm = insertelement undef, i16 4, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umin.u.nxv8i16( %pg, + %a, + %imm.splat) + ret %out +} + +define @umin_imm_i32( %pg, %a) { +; CHECK-LABEL: umin_imm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #5 // =0x5 +; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %imm = insertelement undef, i32 5, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umin.u.nxv4i32( %pg, + %a, + %imm.splat) + ret %out +} + +define @umin_imm_i64( %pg, %a) { +; CHECK-LABEL: umin_imm_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #6 // =0x6 +; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %imm = insertelement undef, i64 6, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umin.u.nxv2i64( %pg, + %a, + %imm.splat) + ret %out +} + +; +; UMULH +; + +define @umulh_i8( %pg, %a, %b) { +; CHECK-LABEL: umulh_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.u.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @umulh_i16( %pg, %a, %b) { +; CHECK-LABEL: umulh_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.u.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @umulh_i32( %pg, %a, %b) { +; CHECK-LABEL: umulh_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @umulh_i64( %pg, %a, %b) { +; CHECK-LABEL: umulh_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +declare @llvm.aarch64.sve.add.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.add.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.add.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.add.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.mul.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.mul.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.mul.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.mul.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sabd.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.sabd.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.sabd.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.sabd.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sdiv.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.sdiv.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.smax.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.smax.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.smax.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.smax.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.smin.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.smin.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.smin.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.smin.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.smulh.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.smulh.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.smulh.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.smulh.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sub.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.sub.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.sub.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.sub.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uabd.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.uabd.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.uabd.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.uabd.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.udiv.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.udiv.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.umax.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.umax.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.umax.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.umax.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.umin.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.umin.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.umin.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.umin.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.umulh.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.umulh.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.umulh.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.umulh.u.nxv2i64(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-undef.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-undef.ll new file mode 100644 index 0000000000000..6f06fe8912f65 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-undef.ll @@ -0,0 +1,331 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mattr=+sve < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; +; ASR +; + +define @asr_i8( %pg, %a, %b) { +; CHECK-LABEL: asr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.u.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @asr_i16( %pg, %a, %b) { +; CHECK-LABEL: asr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.u.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @asr_i32( %pg, %a, %b) { +; CHECK-LABEL: asr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @asr_i64( %pg, %a, %b) { +; CHECK-LABEL: asr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; ASR (immediate) +; + +define @asr_imm_i8( %pg, %a) { +; CHECK-LABEL: asr_imm_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #3 +; CHECK-NEXT: ret + %imm = insertelement undef, i8 3, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.u.nxv16i8( %pg, + %a, + %imm.splat) + ret %out +} + +define @asr_imm_i16( %pg, %a) { +; CHECK-LABEL: asr_imm_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #4 +; CHECK-NEXT: ret + %imm = insertelement undef, i16 4, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.u.nxv8i16( %pg, + %a, + %imm.splat) + ret %out +} + +define @asr_imm_i32( %pg, %a) { +; CHECK-LABEL: asr_imm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #5 +; CHECK-NEXT: ret + %imm = insertelement undef, i32 5, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.u.nxv4i32( %pg, + %a, + %imm.splat) + ret %out +} + +define @asr_imm_i64( %pg, %a) { +; CHECK-LABEL: asr_imm_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #6 +; CHECK-NEXT: ret + %imm = insertelement undef, i64 6, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.u.nxv2i64( %pg, + %a, + %imm.splat) + ret %out +} + +; +; LSL +; + +define @lsl_i8( %pg, %a, %b) { +; CHECK-LABEL: lsl_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.u.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @lsl_i16( %pg, %a, %b) { +; CHECK-LABEL: lsl_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.u.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @lsl_i32( %pg, %a, %b) { +; CHECK-LABEL: lsl_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @lsl_i64( %pg, %a, %b) { +; CHECK-LABEL: lsl_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; LSL (immediate) +; + +define @lsl_imm_i8( %pg, %a) { +; CHECK-LABEL: lsl_imm_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: ret + %imm = insertelement undef, i8 7, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.u.nxv16i8( %pg, + %a, + %imm.splat) + ret %out +} + +define @lsl_imm_i16( %pg, %a) { +; CHECK-LABEL: lsl_imm_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: ret + %imm = insertelement undef, i16 8, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.u.nxv8i16( %pg, + %a, + %imm.splat) + ret %out +} + +define @lsl_imm_i32( %pg, %a) { +; CHECK-LABEL: lsl_imm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #9 +; CHECK-NEXT: ret + %imm = insertelement undef, i32 9, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.u.nxv4i32( %pg, + %a, + %imm.splat) + ret %out +} + +define @lsl_imm_i64( %pg, %a) { +; CHECK-LABEL: lsl_imm_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #10 +; CHECK-NEXT: ret + %imm = insertelement undef, i64 10, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.u.nxv2i64( %pg, + %a, + %imm.splat) + ret %out +} + +; +; LSR +; + +define @lsr_i8( %pg, %a, %b) { +; CHECK-LABEL: lsr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.u.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @lsr_i16( %pg, %a, %b) { +; CHECK-LABEL: lsr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.u.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @lsr_i32( %pg, %a, %b) { +; CHECK-LABEL: lsr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.u.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @lsr_i64( %pg, %a, %b) { +; CHECK-LABEL: lsr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.u.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; LSR (immediate) +; + +define @lsr_imm_i8( %pg, %a) { +; CHECK-LABEL: lsr_imm_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %imm = insertelement undef, i8 8, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.u.nxv16i8( %pg, + %a, + %imm.splat) + ret %out +} + +define @lsr_imm_i16( %pg, %a) { +; CHECK-LABEL: lsr_imm_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #12 +; CHECK-NEXT: ret + %imm = insertelement undef, i16 12, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.u.nxv8i16( %pg, + %a, + %imm.splat) + ret %out +} + +define @lsr_imm_i32( %pg, %a) { +; CHECK-LABEL: lsr_imm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #13 +; CHECK-NEXT: ret + %imm = insertelement undef, i32 13, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.u.nxv4i32( %pg, + %a, + %imm.splat) + ret %out +} + +define @lsr_imm_i64( %pg, %a) { +; CHECK-LABEL: lsr_imm_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #14 +; CHECK-NEXT: ret + %imm = insertelement undef, i64 14, i32 0 + %imm.splat = shufflevector %imm, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.u.nxv2i64( %pg, + %a, + %imm.splat) + ret %out +} + +declare @llvm.aarch64.sve.asr.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.asr.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsl.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsl.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsl.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsl.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsr.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsr.u.nxv2i64(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve2-sra.ll b/llvm/test/CodeGen/AArch64/sve2-sra.ll index 38779461b2b97..9751004fa96e3 100644 --- a/llvm/test/CodeGen/AArch64/sve2-sra.ll +++ b/llvm/test/CodeGen/AArch64/sve2-sra.ll @@ -105,6 +105,58 @@ define @usra_intr_i64( %a, %add } +define @usra_intr_u_i8( %pg, %a, %b) #0 { +; CHECK-LABEL: usra_intr_u_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z1.b, p0/m, z1.b, #1 +; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %ins = insertelement poison, i8 1, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.lsr.u.nxv16i8( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + +define @usra_intr_u_i16( %pg, %a, %b) #0 { +; CHECK-LABEL: usra_intr_u_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #2 +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %ins = insertelement poison, i16 2, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.lsr.u.nxv8i16( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + +define @usra_intr_u_i32( %pg, %a, %b) #0 { +; CHECK-LABEL: usra_intr_u_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #3 +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %ins = insertelement poison, i32 3, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.lsr.u.nxv4i32( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + +define @usra_intr_u_i64( %pg, %a, %b) #0 { +; CHECK-LABEL: usra_intr_u_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #4 +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %ins = insertelement poison, i64 4, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.lsr.u.nxv2i64( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + ; SSRA define @ssra_i8( %a, %b) #0 { @@ -207,6 +259,57 @@ define @ssra_intr_i64( %a, %add } +define @ssra_intr_u_i8( %pg, %a, %b) #0 { +; CHECK-LABEL: ssra_intr_u_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z1.b, p0/m, z1.b, #1 +; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %ins = insertelement poison, i8 1, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.asr.u.nxv16i8( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + +define @ssra_intr_u_i16( %pg, %a, %b) #0 { +; CHECK-LABEL: ssra_intr_u_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z1.h, p0/m, z1.h, #2 +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %ins = insertelement poison, i16 2, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.asr.u.nxv8i16( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + +define @ssra_intr_u_i32( %pg, %a, %b) #0 { +; CHECK-LABEL: ssra_intr_u_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z1.s, p0/m, z1.s, #3 +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %ins = insertelement poison, i32 3, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.asr.u.nxv4i32( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + +define @ssra_intr_u_i64( %pg, %a, %b) #0 { +; CHECK-LABEL: ssra_intr_u_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z1.d, p0/m, z1.d, #4 +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %ins = insertelement poison, i64 4, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.asr.u.nxv2i64( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} declare @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg) declare @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg) @@ -218,9 +321,19 @@ declare @llvm.aarch64.sve.lsr.nxv8i16(, @llvm.aarch64.sve.lsr.nxv4i32(, , ) declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) +declare @llvm.aarch64.sve.lsr.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsr.u.nxv2i64(, , ) + declare @llvm.aarch64.sve.asr.nxv16i8(, , ) declare @llvm.aarch64.sve.asr.nxv8i16(, , ) declare @llvm.aarch64.sve.asr.nxv4i32(, , ) declare @llvm.aarch64.sve.asr.nxv2i64(, , ) +declare @llvm.aarch64.sve.asr.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.asr.u.nxv2i64(, , ) + attributes #0 = { "target-features"="+sve,+sve2" }