diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index ba1cba2a8c4d51..f5c202910859d1 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1053,12 +1053,31 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; + class SVE2_1VectorArg_Long_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>, + llvm_i32_ty], + [IntrNoMem, ImmArg<1>]>; + class SVE2_2VectorArg_Long_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], [IntrNoMem]>; + class SVE2_2VectorArgIndexed_Long_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>, + LLVMSubdivide2VectorType<0>, + llvm_i32_ty], + [IntrNoMem, ImmArg<2>]>; + + class SVE2_2VectorArg_Wide_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMSubdivide2VectorType<0>], + [IntrNoMem]>; + class SVE2_2VectorArg_Pred_Long_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -1766,28 +1785,34 @@ def int_aarch64_sve_usra : AdvSIMD_2VectorArgIndexed_Intrinsic; // SVE2 - Widening DSP operations // -def int_aarch64_sve_sabalb : SVE2_3VectorArg_Long_Intrinsic; -def int_aarch64_sve_sabalt : SVE2_3VectorArg_Long_Intrinsic; -def int_aarch64_sve_sabdlb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_sabdlt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_saddlb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_saddlt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_smullb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_smullt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_sqdmullb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_sqdmullt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_ssublb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_ssublt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_uabalb : SVE2_3VectorArg_Long_Intrinsic; -def int_aarch64_sve_uabalt : SVE2_3VectorArg_Long_Intrinsic; -def int_aarch64_sve_uabdlb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_uabdlt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_uaddlb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_uaddlt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_umullb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_umullt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_usublb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_usublt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_sabalb : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_sabalt : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_sabdlb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_sabdlt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_saddlb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_saddlt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_saddwb : SVE2_2VectorArg_Wide_Intrinsic; +def int_aarch64_sve_saddwt : SVE2_2VectorArg_Wide_Intrinsic; +def int_aarch64_sve_sshllb : SVE2_1VectorArg_Long_Intrinsic; +def int_aarch64_sve_sshllt : SVE2_1VectorArg_Long_Intrinsic; +def int_aarch64_sve_ssublb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_ssublt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_ssubwb : SVE2_2VectorArg_Wide_Intrinsic; +def int_aarch64_sve_ssubwt : SVE2_2VectorArg_Wide_Intrinsic; +def int_aarch64_sve_uabalb : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_uabalt : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_uabdlb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_uabdlt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_uaddlb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_uaddlt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_uaddwb : SVE2_2VectorArg_Wide_Intrinsic; +def int_aarch64_sve_uaddwt : SVE2_2VectorArg_Wide_Intrinsic; +def int_aarch64_sve_ushllb : SVE2_1VectorArg_Long_Intrinsic; +def int_aarch64_sve_ushllt : SVE2_1VectorArg_Long_Intrinsic; +def int_aarch64_sve_usublb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_usublt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_usubwb : SVE2_2VectorArg_Wide_Intrinsic; +def int_aarch64_sve_usubwt : SVE2_2VectorArg_Wide_Intrinsic; // // SVE2 - Non-widening pairwise arithmetic @@ -1933,10 +1958,16 @@ def int_aarch64_sve_smlslb_lane : SVE2_3VectorArg_Indexed_Intrinsic; def int_aarch64_sve_smlslt_lane : SVE2_3VectorArg_Indexed_Intrinsic; def int_aarch64_sve_umlslb_lane : SVE2_3VectorArg_Indexed_Intrinsic; def int_aarch64_sve_umlslt_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_smullb_lane : SVE2_2VectorArgIndexed_Long_Intrinsic; +def int_aarch64_sve_smullt_lane : SVE2_2VectorArgIndexed_Long_Intrinsic; +def int_aarch64_sve_umullb_lane : SVE2_2VectorArgIndexed_Long_Intrinsic; +def int_aarch64_sve_umullt_lane : SVE2_2VectorArgIndexed_Long_Intrinsic; def int_aarch64_sve_sqdmlalb_lane : SVE2_3VectorArg_Indexed_Intrinsic; def int_aarch64_sve_sqdmlalt_lane : SVE2_3VectorArg_Indexed_Intrinsic; def int_aarch64_sve_sqdmlslb_lane : SVE2_3VectorArg_Indexed_Intrinsic; def int_aarch64_sve_sqdmlslt_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_sqdmullb_lane : SVE2_2VectorArgIndexed_Long_Intrinsic; +def int_aarch64_sve_sqdmullt_lane : SVE2_2VectorArgIndexed_Long_Intrinsic; // SVE2 MLA Unpredicated. def int_aarch64_sve_smlalb : SVE2_3VectorArg_Long_Intrinsic; @@ -1947,11 +1978,17 @@ def int_aarch64_sve_smlslb : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_smlslt : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_umlslb : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_umlslt : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_smullb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_smullt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_umullb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_umullt : SVE2_2VectorArg_Long_Intrinsic; def int_aarch64_sve_sqdmlalb : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_sqdmlalt : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_sqdmlslb : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_sqdmlslt : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_sqdmullb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_sqdmullt : SVE2_2VectorArg_Long_Intrinsic; def int_aarch64_sve_sqdmlalbt : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_sqdmlslbt : SVE2_3VectorArg_Long_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index e188fa4e2fce59..1fa7b827941ac2 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1475,14 +1475,14 @@ let Predicates = [HasSVE2] in { defm SQRDCMLAH_ZZZ : sve2_int_cmla<0b1, "sqrdcmlah", int_aarch64_sve_sqrdcmlah_x>; // SVE2 integer multiply long (indexed) - defm SMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b000, "smullb">; - defm SMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b001, "smullt">; - defm UMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b010, "umullb">; - defm UMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b011, "umullt">; + defm SMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b000, "smullb", int_aarch64_sve_smullb_lane>; + defm SMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b001, "smullt", int_aarch64_sve_smullt_lane>; + defm UMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b010, "umullb", int_aarch64_sve_umullb_lane>; + defm UMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b011, "umullt", int_aarch64_sve_umullt_lane>; // SVE2 saturating multiply (indexed) - defm SQDMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b100, "sqdmullb">; - defm SQDMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b101, "sqdmullt">; + defm SQDMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b100, "sqdmullb", int_aarch64_sve_sqdmullb_lane>; + defm SQDMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b101, "sqdmullt", int_aarch64_sve_sqdmullt_lane>; // SVE2 integer multiply-add long (indexed) defm SMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1000, "smlalb", int_aarch64_sve_smlalb_lane>; @@ -1593,14 +1593,14 @@ let Predicates = [HasSVE2] in { defm UABDLT_ZZZ : sve2_wide_int_arith_long<0b01111, "uabdlt", int_aarch64_sve_uabdlt>; // SVE2 integer add/subtract wide - defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb">; - defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt">; - defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb">; - defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt">; - defm SSUBWB_ZZZ : sve2_wide_int_arith_wide<0b100, "ssubwb">; - defm SSUBWT_ZZZ : sve2_wide_int_arith_wide<0b101, "ssubwt">; - defm USUBWB_ZZZ : sve2_wide_int_arith_wide<0b110, "usubwb">; - defm USUBWT_ZZZ : sve2_wide_int_arith_wide<0b111, "usubwt">; + defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb", int_aarch64_sve_saddwb>; + defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt", int_aarch64_sve_saddwt>; + defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb", int_aarch64_sve_uaddwb>; + defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt", int_aarch64_sve_uaddwt>; + defm SSUBWB_ZZZ : sve2_wide_int_arith_wide<0b100, "ssubwb", int_aarch64_sve_ssubwb>; + defm SSUBWT_ZZZ : sve2_wide_int_arith_wide<0b101, "ssubwt", int_aarch64_sve_ssubwt>; + defm USUBWB_ZZZ : sve2_wide_int_arith_wide<0b110, "usubwb", int_aarch64_sve_usubwb>; + defm USUBWT_ZZZ : sve2_wide_int_arith_wide<0b111, "usubwt", int_aarch64_sve_usubwt>; // SVE2 integer multiply long defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb", int_aarch64_sve_sqdmullb>; @@ -1693,10 +1693,10 @@ let Predicates = [HasSVE2] in { defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb">; // SVE2 bitwise shift left long - defm SSHLLB_ZZI : sve2_bitwise_shift_left_long<0b00, "sshllb">; - defm SSHLLT_ZZI : sve2_bitwise_shift_left_long<0b01, "sshllt">; - defm USHLLB_ZZI : sve2_bitwise_shift_left_long<0b10, "ushllb">; - defm USHLLT_ZZI : sve2_bitwise_shift_left_long<0b11, "ushllt">; + defm SSHLLB_ZZI : sve2_bitwise_shift_left_long<0b00, "sshllb", int_aarch64_sve_sshllb>; + defm SSHLLT_ZZI : sve2_bitwise_shift_left_long<0b01, "sshllt", int_aarch64_sve_sshllt>; + defm USHLLB_ZZI : sve2_bitwise_shift_left_long<0b10, "ushllb", int_aarch64_sve_ushllb>; + defm USHLLT_ZZI : sve2_bitwise_shift_left_long<0b11, "ushllt", int_aarch64_sve_ushllt>; // SVE2 integer add/subtract interleaved long defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt", int_aarch64_sve_saddlbt>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index fe388c48b6ee9e..c2d4b1b5f5337a 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2731,9 +2731,10 @@ multiclass sve2_int_mul_by_indexed_elem opc, string asm, def : SVE_3_Op_Imm_Pat(NAME # _D)>; } -multiclass sve2_int_mul_long_by_indexed_elem opc, string asm> { +multiclass sve2_int_mul_long_by_indexed_elem opc, string asm, + SDPatternOperator op> { def _S : sve2_int_mul_by_indexed_elem<0b10, { opc{2-1}, ?, opc{0} }, asm, - ZPR32, ZPR16, ZPR3b16, VectorIndexH> { + ZPR32, ZPR16, ZPR3b16, VectorIndexH32b> { bits<3> Zm; bits<3> iop; let Inst{20-19} = iop{2-1}; @@ -2741,13 +2742,16 @@ multiclass sve2_int_mul_long_by_indexed_elem opc, string asm> { let Inst{11} = iop{0}; } def _D : sve2_int_mul_by_indexed_elem<0b11, { opc{2-1}, ?, opc{0} }, asm, - ZPR64, ZPR32, ZPR4b32, VectorIndexS> { + ZPR64, ZPR32, ZPR4b32, VectorIndexS32b> { bits<4> Zm; bits<2> iop; let Inst{20} = iop{1}; let Inst{19-16} = Zm; let Inst{11} = iop{0}; } + + def : SVE_3_Op_Imm_Pat(NAME # _S)>; + def : SVE_3_Op_Imm_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -2894,10 +2898,15 @@ multiclass sve2_wide_int_arith_long opc, string asm, def : SVE_2_Op_Pat(NAME # _D)>; } -multiclass sve2_wide_int_arith_wide opc, string asm> { +multiclass sve2_wide_int_arith_wide opc, string asm, + SDPatternOperator op> { def _H : sve2_wide_int_arith<0b01, { 0b10, opc }, asm, ZPR16, ZPR16, ZPR8>; def _S : sve2_wide_int_arith<0b10, { 0b10, opc }, asm, ZPR32, ZPR32, ZPR16>; def _D : sve2_wide_int_arith<0b11, { 0b10, opc }, asm, ZPR64, ZPR64, ZPR32>; + + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } multiclass sve2_pmul_long opc, string asm> { @@ -2992,7 +3001,8 @@ class sve2_bitwise_shift_left_long tsz8_64, bits<2> opc, string asm, let Inst{4-0} = Zd; } -multiclass sve2_bitwise_shift_left_long opc, string asm> { +multiclass sve2_bitwise_shift_left_long opc, string asm, + SDPatternOperator op> { def _H : sve2_bitwise_shift_left_long<{0,0,1}, opc, asm, ZPR16, ZPR8, vecshiftL8>; def _S : sve2_bitwise_shift_left_long<{0,1,?}, opc, asm, @@ -3003,6 +3013,9 @@ multiclass sve2_bitwise_shift_left_long opc, string asm> { ZPR64, ZPR32, vecshiftL32> { let Inst{20-19} = imm{4-3}; } + def : SVE_2_Op_Imm_Pat(NAME # _H)>; + def : SVE_2_Op_Imm_Pat(NAME # _S)>; + def : SVE_2_Op_Imm_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll index 0c98614b7c41c0..b90c19e02dd496 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll @@ -192,6 +192,69 @@ define @saddlt_s( %a, %b ret %out } +; +; SADDWB +; + +define @saddwb_b( %a, %b) { +; CHECK-LABEL: saddwb_b: +; CHECK: saddwb z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddwb.nxv8i16( %a, + %b) + ret %out +} + +define @saddwb_h( %a, %b) { +; CHECK-LABEL: saddwb_h: +; CHECK: saddwb z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddwb.nxv4i32( %a, + %b) + ret %out +} + +define @saddwb_s( %a, %b) { +; CHECK-LABEL: saddwb_s: +; CHECK: saddwb z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddwb.nxv2i64( %a, + %b) + ret %out +} + +; +; SADDWT +; + +define @saddwt_b( %a, %b) { +; CHECK-LABEL: saddwt_b: +; CHECK: saddwt z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddwt.nxv8i16( %a, + %b) + ret %out +} + +define @saddwt_h( %a, %b) { +; CHECK-LABEL: saddwt_h: +; CHECK: saddwt z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddwt.nxv4i32( %a, + %b) + ret %out +} + +define @saddwt_s( %a, %b) { +; CHECK-LABEL: saddwt_s: +; CHECK: saddwt z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddwt.nxv2i64( %a, + %b) + ret %out +} + + ; ; SMULLB (Vectors) ; @@ -223,6 +286,30 @@ define @smullb_s( %a, %b ret %out } +; +; SMULLB (Indexed) +; + +define @smullb_lane_h( %a, %b) { +; CHECK-LABEL: smullb_lane_h: +; CHECK: smullb z0.s, z0.h, z1.h[4] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smullb.lane.nxv4i32( %a, + %b, + i32 4) + ret %out +} + +define @smullb_lane_s( %a, %b) { +; CHECK-LABEL: smullb_lane_s: +; CHECK: smullb z0.d, z0.s, z1.s[3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smullb.lane.nxv2i64( %a, + %b, + i32 3) + ret %out +} + ; ; SMULLT (Vectors) ; @@ -254,6 +341,30 @@ define @smullt_s( %a, %b ret %out } +; +; SMULLT (Indexed) +; + +define @smullt_lane_h( %a, %b) { +; CHECK-LABEL: smullt_lane_h: +; CHECK: smullt z0.s, z0.h, z1.h[5] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smullt.lane.nxv4i32( %a, + %b, + i32 5) + ret %out +} + +define @smullt_lane_s( %a, %b) { +; CHECK-LABEL: smullt_lane_s: +; CHECK: smullt z0.d, z0.s, z1.s[2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smullt.lane.nxv2i64( %a, + %b, + i32 2) + ret %out +} + ; ; SQDMULLB (Vectors) ; @@ -285,6 +396,30 @@ define @sqdmullb_s( %a, ret %out } +; +; SQDMULLB (Indexed) +; + +define @sqdmullb_lane_h( %a, %b) { +; CHECK-LABEL: sqdmullb_lane_h: +; CHECK: sqdmullb z0.s, z0.h, z1.h[2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmullb.lane.nxv4i32( %a, + %b, + i32 2) + ret %out +} + +define @sqdmullb_lane_s( %a, %b) { +; CHECK-LABEL: sqdmullb_lane_s: +; CHECK: sqdmullb z0.d, z0.s, z1.s[1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmullb.lane.nxv2i64( %a, + %b, + i32 1) + ret %out +} + ; ; SQDMULLT (Vectors) ; @@ -316,6 +451,30 @@ define @sqdmullt_s( %a, ret %out } +; +; SQDMULLT (Indexed) +; + +define @sqdmullt_lane_h( %a, %b) { +; CHECK-LABEL: sqdmullt_lane_h: +; CHECK: sqdmullt z0.s, z0.h, z1.h[3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmullt.lane.nxv4i32( %a, + %b, + i32 3) + ret %out +} + +define @sqdmullt_lane_s( %a, %b) { +; CHECK-LABEL: sqdmullt_lane_s: +; CHECK: sqdmullt z0.d, z0.s, z1.s[0] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmullt.lane.nxv2i64( %a, + %b, + i32 0) + ret %out +} + ; ; SSUBLB ; @@ -347,6 +506,62 @@ define @ssublb_s( %a, %b ret %out } +; +; SSHLLB +; + +define @sshllb_b( %a) { +; CHECK-LABEL: sshllb_b: +; CHECK: sshllb z0.h, z0.b, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sshllb.nxv8i16( %a, i32 0) + ret %out +} + +define @sshllb_h( %a) { +; CHECK-LABEL: sshllb_h: +; CHECK: sshllb z0.s, z0.h, #1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sshllb.nxv4i32( %a, i32 1) + ret %out +} + +define @sshllb_s( %a) { +; CHECK-LABEL: sshllb_s: +; CHECK: sshllb z0.d, z0.s, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sshllb.nxv2i64( %a, i32 2) + ret %out +} + +; +; SSHLLT +; + +define @sshllt_b( %a) { +; CHECK-LABEL: sshllt_b: +; CHECK: sshllt z0.h, z0.b, #3 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sshllt.nxv8i16( %a, i32 3) + ret %out +} + +define @sshllt_h( %a) { +; CHECK-LABEL: sshllt_h: +; CHECK: sshllt z0.s, z0.h, #4 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sshllt.nxv4i32( %a, i32 4) + ret %out +} + +define @sshllt_s( %a) { +; CHECK-LABEL: sshllt_s: +; CHECK: sshllt z0.d, z0.s, #5 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sshllt.nxv2i64( %a, i32 5) + ret %out +} + ; ; SSUBLT ; @@ -378,6 +593,68 @@ define @ssublt_s( %a, %b ret %out } +; +; SSUBWB +; + +define @ssubwb_b( %a, %b) { +; CHECK-LABEL: ssubwb_b: +; CHECK: ssubwb z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubwb.nxv8i16( %a, + %b) + ret %out +} + +define @ssubwb_h( %a, %b) { +; CHECK-LABEL: ssubwb_h: +; CHECK: ssubwb z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubwb.nxv4i32( %a, + %b) + ret %out +} + +define @ssubwb_s( %a, %b) { +; CHECK-LABEL: ssubwb_s: +; CHECK: ssubwb z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubwb.nxv2i64( %a, + %b) + ret %out +} + +; +; SSUBWT +; + +define @ssubwt_b( %a, %b) { +; CHECK-LABEL: ssubwt_b: +; CHECK: ssubwt z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubwt.nxv8i16( %a, + %b) + ret %out +} + +define @ssubwt_h( %a, %b) { +; CHECK-LABEL: ssubwt_h: +; CHECK: ssubwt z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubwt.nxv4i32( %a, + %b) + ret %out +} + +define @ssubwt_s( %a, %b) { +; CHECK-LABEL: ssubwt_s: +; CHECK: ssubwt z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubwt.nxv2i64( %a, + %b) + ret %out +} + ; ; UABALB ; @@ -570,6 +847,68 @@ define @uaddlt_s( %a, %b ret %out } +; +; UADDWB +; + +define @uaddwb_b( %a, %b) { +; CHECK-LABEL: uaddwb_b: +; CHECK: uaddwb z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddwb.nxv8i16( %a, + %b) + ret %out +} + +define @uaddwb_h( %a, %b) { +; CHECK-LABEL: uaddwb_h: +; CHECK: uaddwb z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddwb.nxv4i32( %a, + %b) + ret %out +} + +define @uaddwb_s( %a, %b) { +; CHECK-LABEL: uaddwb_s: +; CHECK: uaddwb z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddwb.nxv2i64( %a, + %b) + ret %out +} + +; +; UADDWT +; + +define @uaddwt_b( %a, %b) { +; CHECK-LABEL: uaddwt_b: +; CHECK: uaddwt z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddwt.nxv8i16( %a, + %b) + ret %out +} + +define @uaddwt_h( %a, %b) { +; CHECK-LABEL: uaddwt_h: +; CHECK: uaddwt z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddwt.nxv4i32( %a, + %b) + ret %out +} + +define @uaddwt_s( %a, %b) { +; CHECK-LABEL: uaddwt_s: +; CHECK: uaddwt z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddwt.nxv2i64( %a, + %b) + ret %out +} + ; ; UMULLB (Vectors) ; @@ -601,6 +940,31 @@ define @umullb_s( %a, %b ret %out } +; +; UMULLB (Indexed) +; + +define @umullb_lane_h( %a, %b) { +; CHECK-LABEL: umullb_lane_h: +; CHECK: umullb z0.s, z0.h, z1.h[0] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umullb.lane.nxv4i32( %a, + %b, + i32 0) + ret %out +} + + +define @umullb_lane_s( %a, %b) { +; CHECK-LABEL: umullb_lane_s: +; CHECK: umullb z0.d, z0.s, z1.s[3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umullb.lane.nxv2i64( %a, + %b, + i32 3) + ret %out +} + ; ; UMULLT (Vectors) ; @@ -632,6 +996,86 @@ define @umullt_s( %a, %b ret %out } +; +; UMULLT (Indexed) +; + +define @umullt_lane_h( %a, %b) { +; CHECK-LABEL: umullt_lane_h: +; CHECK: umullt z0.s, z0.h, z1.h[1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umullt.lane.nxv4i32( %a, + %b, + i32 1) + ret %out +} + +define @umullt_lane_s( %a, %b) { +; CHECK-LABEL: umullt_lane_s: +; CHECK: umullt z0.d, z0.s, z1.s[2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umullt.lane.nxv2i64( %a, + %b, + i32 2) + ret %out +} + +; +; USHLLB +; + +define @ushllb_b( %a) { +; CHECK-LABEL: ushllb_b: +; CHECK: ushllb z0.h, z0.b, #6 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ushllb.nxv8i16( %a, i32 6) + ret %out +} + +define @ushllb_h( %a) { +; CHECK-LABEL: ushllb_h: +; CHECK: ushllb z0.s, z0.h, #7 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ushllb.nxv4i32( %a, i32 7) + ret %out +} + +define @ushllb_s( %a) { +; CHECK-LABEL: ushllb_s: +; CHECK: ushllb z0.d, z0.s, #8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ushllb.nxv2i64( %a, i32 8) + ret %out +} + +; +; USHLLT +; + +define @ushllt_b( %a) { +; CHECK-LABEL: ushllt_b: +; CHECK: ushllt z0.h, z0.b, #7 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ushllt.nxv8i16( %a, i32 7) + ret %out +} + +define @ushllt_h( %a) { +; CHECK-LABEL: ushllt_h: +; CHECK: ushllt z0.s, z0.h, #15 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ushllt.nxv4i32( %a, i32 15) + ret %out +} + +define @ushllt_s( %a) { +; CHECK-LABEL: ushllt_s: +; CHECK: ushllt z0.d, z0.s, #31 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ushllt.nxv2i64( %a, i32 31) + ret %out +} + ; ; USUBLB ; @@ -694,6 +1138,68 @@ define @usublt_s( %a, %b ret %out } +; +; USUBWB +; + +define @usubwb_b( %a, %b) { +; CHECK-LABEL: usubwb_b: +; CHECK: usubwb z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usubwb.nxv8i16( %a, + %b) + ret %out +} + +define @usubwb_h( %a, %b) { +; CHECK-LABEL: usubwb_h: +; CHECK: usubwb z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usubwb.nxv4i32( %a, + %b) + ret %out +} + +define @usubwb_s( %a, %b) { +; CHECK-LABEL: usubwb_s: +; CHECK: usubwb z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usubwb.nxv2i64( %a, + %b) + ret %out +} + +; +; USUBWT +; + +define @usubwt_b( %a, %b) { +; CHECK-LABEL: usubwt_b: +; CHECK: usubwt z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usubwt.nxv8i16( %a, + %b) + ret %out +} + +define @usubwt_h( %a, %b) { +; CHECK-LABEL: usubwt_h: +; CHECK: usubwt z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usubwt.nxv4i32( %a, + %b) + ret %out +} + +define @usubwt_s( %a, %b) { +; CHECK-LABEL: usubwt_s: +; CHECK: usubwt z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usubwt.nxv2i64( %a, + %b) + ret %out +} + declare @llvm.aarch64.sve.sabalb.nxv8i16(, , ) declare @llvm.aarch64.sve.sabalb.nxv4i32(, , ) declare @llvm.aarch64.sve.sabalb.nxv2i64(, , ) @@ -718,22 +1224,50 @@ declare @llvm.aarch64.sve.saddlt.nxv8i16(, declare @llvm.aarch64.sve.saddlt.nxv4i32(, ) declare @llvm.aarch64.sve.saddlt.nxv2i64(, ) +declare @llvm.aarch64.sve.saddwb.nxv8i16(, ) +declare @llvm.aarch64.sve.saddwb.nxv4i32(, ) +declare @llvm.aarch64.sve.saddwb.nxv2i64(, ) + +declare @llvm.aarch64.sve.saddwt.nxv8i16(, ) +declare @llvm.aarch64.sve.saddwt.nxv4i32(, ) +declare @llvm.aarch64.sve.saddwt.nxv2i64(, ) + declare @llvm.aarch64.sve.smullb.nxv8i16(, ) declare @llvm.aarch64.sve.smullb.nxv4i32(, ) declare @llvm.aarch64.sve.smullb.nxv2i64(, ) +declare @llvm.aarch64.sve.smullb.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.smullb.lane.nxv2i64(, , i32) + declare @llvm.aarch64.sve.smullt.nxv8i16(, ) declare @llvm.aarch64.sve.smullt.nxv4i32(, ) declare @llvm.aarch64.sve.smullt.nxv2i64(, ) +declare @llvm.aarch64.sve.smullt.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.smullt.lane.nxv2i64(, , i32) + declare @llvm.aarch64.sve.sqdmullb.nxv8i16(, ) declare @llvm.aarch64.sve.sqdmullb.nxv4i32(, ) declare @llvm.aarch64.sve.sqdmullb.nxv2i64(, ) +declare @llvm.aarch64.sve.sqdmullb.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqdmullb.lane.nxv2i64(, , i32) + declare @llvm.aarch64.sve.sqdmullt.nxv8i16(, ) declare @llvm.aarch64.sve.sqdmullt.nxv4i32(, ) declare @llvm.aarch64.sve.sqdmullt.nxv2i64(, ) +declare @llvm.aarch64.sve.sqdmullt.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqdmullt.lane.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.sshllb.nxv8i16(, i32) +declare @llvm.aarch64.sve.sshllb.nxv4i32(, i32) +declare @llvm.aarch64.sve.sshllb.nxv2i64(, i32) + +declare @llvm.aarch64.sve.sshllt.nxv8i16(, i32) +declare @llvm.aarch64.sve.sshllt.nxv4i32(, i32) +declare @llvm.aarch64.sve.sshllt.nxv2i64(, i32) + declare @llvm.aarch64.sve.ssublb.nxv8i16(, ) declare @llvm.aarch64.sve.ssublb.nxv4i32(, ) declare @llvm.aarch64.sve.ssublb.nxv2i64(, ) @@ -742,6 +1276,14 @@ declare @llvm.aarch64.sve.ssublt.nxv8i16(, declare @llvm.aarch64.sve.ssublt.nxv4i32(, ) declare @llvm.aarch64.sve.ssublt.nxv2i64(, ) +declare @llvm.aarch64.sve.ssubwb.nxv8i16(, ) +declare @llvm.aarch64.sve.ssubwb.nxv4i32(, ) +declare @llvm.aarch64.sve.ssubwb.nxv2i64(, ) + +declare @llvm.aarch64.sve.ssubwt.nxv8i16(, ) +declare @llvm.aarch64.sve.ssubwt.nxv4i32(, ) +declare @llvm.aarch64.sve.ssubwt.nxv2i64(, ) + declare @llvm.aarch64.sve.uabalb.nxv8i16(, , ) declare @llvm.aarch64.sve.uabalb.nxv4i32(, , ) declare @llvm.aarch64.sve.uabalb.nxv2i64(, , ) @@ -766,14 +1308,36 @@ declare @llvm.aarch64.sve.uaddlt.nxv8i16(, declare @llvm.aarch64.sve.uaddlt.nxv4i32(, ) declare @llvm.aarch64.sve.uaddlt.nxv2i64(, ) +declare @llvm.aarch64.sve.uaddwb.nxv8i16(, ) +declare @llvm.aarch64.sve.uaddwb.nxv4i32(, ) +declare @llvm.aarch64.sve.uaddwb.nxv2i64(, ) + +declare @llvm.aarch64.sve.uaddwt.nxv8i16(, ) +declare @llvm.aarch64.sve.uaddwt.nxv4i32(, ) +declare @llvm.aarch64.sve.uaddwt.nxv2i64(, ) + declare @llvm.aarch64.sve.umullb.nxv8i16(, ) declare @llvm.aarch64.sve.umullb.nxv4i32(, ) declare @llvm.aarch64.sve.umullb.nxv2i64(, ) +declare @llvm.aarch64.sve.umullb.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.umullb.lane.nxv2i64(, , i32) + declare @llvm.aarch64.sve.umullt.nxv8i16(, ) declare @llvm.aarch64.sve.umullt.nxv4i32(, ) declare @llvm.aarch64.sve.umullt.nxv2i64(, ) +declare @llvm.aarch64.sve.umullt.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.umullt.lane.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.ushllb.nxv8i16(, i32) +declare @llvm.aarch64.sve.ushllb.nxv4i32(, i32) +declare @llvm.aarch64.sve.ushllb.nxv2i64(, i32) + +declare @llvm.aarch64.sve.ushllt.nxv8i16(, i32) +declare @llvm.aarch64.sve.ushllt.nxv4i32(, i32) +declare @llvm.aarch64.sve.ushllt.nxv2i64(, i32) + declare @llvm.aarch64.sve.usublb.nxv8i16(, ) declare @llvm.aarch64.sve.usublb.nxv4i32(, ) declare @llvm.aarch64.sve.usublb.nxv2i64(, ) @@ -781,3 +1345,11 @@ declare @llvm.aarch64.sve.usublb.nxv2i64(, declare @llvm.aarch64.sve.usublt.nxv8i16(, ) declare @llvm.aarch64.sve.usublt.nxv4i32(, ) declare @llvm.aarch64.sve.usublt.nxv2i64(, ) + +declare @llvm.aarch64.sve.usubwb.nxv8i16(, ) +declare @llvm.aarch64.sve.usubwb.nxv4i32(, ) +declare @llvm.aarch64.sve.usubwb.nxv2i64(, ) + +declare @llvm.aarch64.sve.usubwt.nxv8i16(, ) +declare @llvm.aarch64.sve.usubwt.nxv4i32(, ) +declare @llvm.aarch64.sve.usubwt.nxv2i64(, )