diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index bcadd65fc781e..ca3eb91d3c08d 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1280,8 +1280,26 @@ instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) { return IC.replaceInstUsesWith(II, BinOp); } +// Canonicalise operations that take an all active predicate (e.g. sve.add -> +// sve.add_u). +static std::optional instCombineSVEAllActive(IntrinsicInst &II, + Intrinsic::ID IID) { + auto *OpPredicate = II.getOperand(0); + if (!match(OpPredicate, m_Intrinsic( + m_ConstantInt()))) + return std::nullopt; + + auto *Mod = II.getModule(); + auto *NewDecl = Intrinsic::getDeclaration(Mod, IID, {II.getType()}); + II.setCalledFunction(NewDecl); + + return &II; +} + static std::optional instCombineSVEVectorAdd(InstCombiner &IC, IntrinsicInst &II) { + if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_add_u)) + return II_U; if (auto MLA = instCombineSVEVectorFuseMulAddSub( IC, II, true)) @@ -1295,6 +1313,8 @@ static std::optional instCombineSVEVectorAdd(InstCombiner &IC, static std::optional instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) { + if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fadd_u)) + return II_U; if (auto FMLA = instCombineSVEVectorFuseMulAddSub(IC, II, @@ -1335,6 +1355,8 @@ instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) { static std::optional instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) { + if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fsub_u)) + return II_U; if (auto FMLS = instCombineSVEVectorFuseMulAddSub(IC, II, @@ -1375,6 +1397,8 @@ instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) { static std::optional instCombineSVEVectorSub(InstCombiner &IC, IntrinsicInst &II) { + if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sub_u)) + return II_U; if (auto MLS = instCombineSVEVectorFuseMulAddSub( IC, II, true)) @@ -1383,11 +1407,17 @@ static std::optional instCombineSVEVectorSub(InstCombiner &IC, } static std::optional instCombineSVEVectorMul(InstCombiner &IC, - IntrinsicInst &II) { + IntrinsicInst &II, + Intrinsic::ID IID) { auto *OpPredicate = II.getOperand(0); auto *OpMultiplicand = II.getOperand(1); auto *OpMultiplier = II.getOperand(2); + // Canonicalise a non _u intrinsic only. + if (II.getIntrinsicID() != IID) + if (auto II_U = instCombineSVEAllActive(II, IID)) + return II_U; + // Return true if a given instruction is a unit splat value, false otherwise. auto IsUnitSplat = [](auto *I) { auto *SplatValue = getSplatValue(I); @@ -1750,31 +1780,92 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_ptest_first: case Intrinsic::aarch64_sve_ptest_last: return instCombineSVEPTest(IC, II); - case Intrinsic::aarch64_sve_mul: - case Intrinsic::aarch64_sve_mul_u: - case Intrinsic::aarch64_sve_fmul: - case Intrinsic::aarch64_sve_fmul_u: - return instCombineSVEVectorMul(IC, II); + case Intrinsic::aarch64_sve_fabd: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u); case Intrinsic::aarch64_sve_fadd: return instCombineSVEVectorFAdd(IC, II); case Intrinsic::aarch64_sve_fadd_u: return instCombineSVEVectorFAddU(IC, II); + case Intrinsic::aarch64_sve_fdiv: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u); + case Intrinsic::aarch64_sve_fmax: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u); + case Intrinsic::aarch64_sve_fmaxnm: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u); + case Intrinsic::aarch64_sve_fmin: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u); + case Intrinsic::aarch64_sve_fminnm: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u); + case Intrinsic::aarch64_sve_fmla: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u); + case Intrinsic::aarch64_sve_fmls: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u); + case Intrinsic::aarch64_sve_fmul: + case Intrinsic::aarch64_sve_fmul_u: + return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); + case Intrinsic::aarch64_sve_fmulx: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u); + case Intrinsic::aarch64_sve_fnmla: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u); + case Intrinsic::aarch64_sve_fnmls: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u); + case Intrinsic::aarch64_sve_fsub: + return instCombineSVEVectorFSub(IC, II); + case Intrinsic::aarch64_sve_fsub_u: + return instCombineSVEVectorFSubU(IC, II); case Intrinsic::aarch64_sve_add: return instCombineSVEVectorAdd(IC, II); case Intrinsic::aarch64_sve_add_u: return instCombineSVEVectorFuseMulAddSub( IC, II, true); - case Intrinsic::aarch64_sve_fsub: - return instCombineSVEVectorFSub(IC, II); - case Intrinsic::aarch64_sve_fsub_u: - return instCombineSVEVectorFSubU(IC, II); + case Intrinsic::aarch64_sve_mla: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u); + case Intrinsic::aarch64_sve_mls: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u); + case Intrinsic::aarch64_sve_mul: + case Intrinsic::aarch64_sve_mul_u: + return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); + case Intrinsic::aarch64_sve_sabd: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u); + case Intrinsic::aarch64_sve_smax: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u); + case Intrinsic::aarch64_sve_smin: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u); + case Intrinsic::aarch64_sve_smulh: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u); case Intrinsic::aarch64_sve_sub: return instCombineSVEVectorSub(IC, II); case Intrinsic::aarch64_sve_sub_u: return instCombineSVEVectorFuseMulAddSub( IC, II, true); + case Intrinsic::aarch64_sve_uabd: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u); + case Intrinsic::aarch64_sve_umax: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umax_u); + case Intrinsic::aarch64_sve_umin: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umin_u); + case Intrinsic::aarch64_sve_umulh: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umulh_u); + case Intrinsic::aarch64_sve_asr: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_asr_u); + case Intrinsic::aarch64_sve_lsl: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsl_u); + case Intrinsic::aarch64_sve_lsr: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsr_u); + case Intrinsic::aarch64_sve_and: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_and_u); + case Intrinsic::aarch64_sve_bic: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_bic_u); + case Intrinsic::aarch64_sve_eor: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_eor_u); + case Intrinsic::aarch64_sve_orr: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_orr_u); + case Intrinsic::aarch64_sve_sqsub: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sqsub_u); + case Intrinsic::aarch64_sve_uqsub: + return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uqsub_u); case Intrinsic::aarch64_sve_tbl: return instCombineSVETBL(IC, II); case Intrinsic::aarch64_sve_uunpkhi: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll index 91b47410e8cb3..f6f60d6d64e72 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll @@ -9,7 +9,7 @@ define @replace_fadd_intrinsic_double_strictfp( @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv2f64( [[TMP1]], [[A:%.*]], [[B:%.*]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fadd.u.nxv2f64( [[TMP1]], [[A:%.*]], [[B:%.*]]) #[[ATTR2]] ; CHECK-NEXT: ret [[TMP2]] ; %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #1 @@ -23,7 +23,7 @@ define @call_replace_fadd_intrinsic_double_strictfp( @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]] -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fadd.nxv2f64( [[TMP1]], [[A:%.*]], [[B:%.*]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fadd.u.nxv2f64( [[TMP1]], [[A:%.*]], [[B:%.*]]) #[[ATTR2]] ; CHECK-NEXT: ret [[TMP2]] ; %1 = call @replace_fadd_intrinsic_double_strictfp( %a, %b) #1 @@ -36,7 +36,7 @@ define @replace_fmul_intrinsic_double_strictfp( @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]] -; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fmul.nxv2f64( [[TMP1]], [[A:%.*]], [[B:%.*]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fmul.u.nxv2f64( [[TMP1]], [[A:%.*]], [[B:%.*]]) #[[ATTR2]] ; CHECK-NEXT: ret [[TMP2]] ; %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #1 @@ -50,7 +50,7 @@ define @call_replace_fmul_intrinsic_double_strictfp( @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]] -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmul.nxv2f64( [[TMP1]], [[A:%.*]], [[B:%.*]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmul.u.nxv2f64( [[TMP1]], [[A:%.*]], [[B:%.*]]) #[[ATTR2]] ; CHECK-NEXT: ret [[TMP2]] ; %1 = call @replace_fmul_intrinsic_double_strictfp( %a, %b) #1 @@ -63,7 +63,7 @@ define @replace_fsub_intrinsic_double_strictfp( @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]] -; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fsub.nxv2f64( [[TMP1]], [[A:%.*]], [[B:%.*]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fsub.u.nxv2f64( [[TMP1]], [[A:%.*]], [[B:%.*]]) #[[ATTR2]] ; CHECK-NEXT: ret [[TMP2]] ; %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #1 @@ -77,7 +77,7 @@ define @call_replace_fsub_intrinsic_double_strictfp( @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]] -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fsub.nxv2f64( [[TMP1]], [[A:%.*]], [[B:%.*]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fsub.u.nxv2f64( [[TMP1]], [[A:%.*]], [[B:%.*]]) #[[ATTR2]] ; CHECK-NEXT: ret [[TMP2]] ; %1 = call @replace_fsub_intrinsic_double_strictfp( %a, %b) #1 diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll new file mode 100644 index 0000000000000..60b2efe27168c --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll @@ -0,0 +1,2142 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) + +; Replace with SVE merging intrinsics to their equivalent undef (_u) variants when they take an all active predicate. + +; Float arithmetic + +declare @llvm.aarch64.sve.fabd.nxv8f16(, , ) +define @replace_fabd_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fabd.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fabd.nxv4f32(, , ) +define @replace_fabd_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fabd.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fabd.nxv2f64(, , ) +define @replace_fabd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fabd.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fabd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fabd_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fabd.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fabd.nxv2f64( %1, %a, %b) + ret %2 +} + +; aarch64_sve_fadd intrinsic combines to a LLVM instruction fadd. + +declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) +define @replace_fadd_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fadd_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast [[A]], [[B]] +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fadd.nxv4f32(, , ) +define @replace_fadd_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fadd_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast [[A]], [[B]] +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fadd.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fadd.nxv2f64(, , ) +define @replace_fadd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fadd_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast [[A]], [[B]] +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fadd.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fadd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fadd_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fadd.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fdiv.nxv8f16(, , ) +define @replace_fdiv_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fdiv.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fdiv.nxv4f32(, , ) +define @replace_fdiv_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fdiv.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fdiv.nxv2f64(, , ) +define @replace_fdiv_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fdiv.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fdiv_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fdiv_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fdiv.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmax.nxv8f16(, , ) +define @replace_fmax_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmax.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmax.nxv4f32(, , ) +define @replace_fmax_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmax.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmax.nxv2f64(, , ) +define @replace_fmax_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fmax_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fmax_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv8f16(, , ) +define @replace_fmaxnm_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv4f32(, , ) +define @replace_fmaxnm_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv2f64(, , ) +define @replace_fmaxnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fmaxnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fmaxnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmin.nxv8f16(, , ) +define @replace_fmin_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmin.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmin.nxv4f32(, , ) +define @replace_fmin_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmin.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmin.nxv2f64(, , ) +define @replace_fmin_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmin.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fmin_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fmin_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmin.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmin.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fminnm.nxv8f16(, , ) +define @replace_fminnm_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fminnm.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fminnm.nxv4f32(, , ) +define @replace_fminnm_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fminnm.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fminnm.nxv2f64(, , ) +define @replace_fminnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fminnm.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fminnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fminnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fminnm.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmla.nxv8f16(, , , ) +define @replace_fmla_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv8f16( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmla.nxv8f16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmla.nxv4f32(, , , ) +define @replace_fmla_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv4f32( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmla.nxv4f32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmla.nxv2f64(, , , ) +define @replace_fmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmla.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_fmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_fmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmla.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmla.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmls.nxv8f16(, , , ) +define @replace_fmls_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv8f16( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmls.nxv8f16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmls.nxv4f32(, , , ) +define @replace_fmls_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv4f32( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmls.nxv4f32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmls.nxv2f64(, , , ) +define @replace_fmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmls.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_fmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_fmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmls.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmls.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +; aarch64_sve_fmul intrinsic combines to a LLVM instruction fmul. + +declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) +define @replace_fmul_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmul_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast [[A]], [[B]] +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmul.nxv4f32(, , ) +define @replace_fmul_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmul_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast [[A]], [[B]] +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmul.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmul.nxv2f64(, , ) +define @replace_fmul_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmul_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast [[A]], [[B]] +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmul.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fmul_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fmul_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmul.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmul.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmulx.nxv8f16(, , ) +define @replace_fmulx_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmulx_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmulx.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmulx.nxv4f32(, , ) +define @replace_fmulx_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmulx_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmulx.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmulx.nxv2f64(, , ) +define @replace_fmulx_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmulx_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmulx.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fmulx_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fmulx_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmulx.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fnmla.nxv8f16(, , , ) +define @replace_fnmla_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv8f16( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmla.nxv8f16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmla.nxv4f32(, , , ) +define @replace_fnmla_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv4f32( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmla.nxv4f32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmla.nxv2f64(, , , ) +define @replace_fnmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmla.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_fnmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_fnmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fnmla.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmls.nxv8f16(, , , ) +define @replace_fnmls_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv8f16( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmls.nxv8f16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmls.nxv4f32(, , , ) +define @replace_fnmls_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv4f32( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmls.nxv4f32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmls.nxv2f64(, , , ) +define @replace_fnmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmls.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_fnmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_fnmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fnmls.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +; aarch64_sve_fsub intrinsic combines to a LLVM instruction fsub. + +declare @llvm.aarch64.sve.fsub.nxv8f16(, , ) +define @replace_fsub_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fsub_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = fsub fast [[A]], [[B]] +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fsub.nxv4f32(, , ) +define @replace_fsub_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fsub_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = fsub fast [[A]], [[B]] +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fsub.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fsub.nxv2f64(, , ) +define @replace_fsub_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fsub_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = fsub fast [[A]], [[B]] +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fsub_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fsub_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( %1, %a, %b) + ret %2 +} + +; Integer arithmetic + +declare @llvm.aarch64.sve.add.nxv16i8(, , ) +define @replace_add_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.add.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.add.nxv8i16(, , ) +define @replace_add_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.add.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.add.nxv4i32(, , ) +define @replace_add_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.add.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.add.nxv2i64(, , ) +define @replace_add_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.add.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_add_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_add_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.add.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.mla.nxv16i8(, , , ) +define @replace_mla_intrinsic_i8( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv16i8( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mla.nxv16i8( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mla.nxv8i16(, , , ) +define @replace_mla_intrinsic_i16( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv8i16( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mla.nxv8i16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mla.nxv4i32(, , , ) +define @replace_mla_intrinsic_i32( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv4i32( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mla.nxv4i32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mla.nxv2i64(, , , ) +define @replace_mla_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv2i64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mla.nxv2i64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_mla_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_mla_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mla.nxv2i64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.mla.nxv2i64( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mls.nxv16i8(, , , ) +define @replace_mls_intrinsic_i8( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv16i8( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mls.nxv16i8( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mls.nxv8i16(, , , ) +define @replace_mls_intrinsic_i16( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv8i16( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mls.nxv8i16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mls.nxv4i32(, , , ) +define @replace_mls_intrinsic_i32( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv4i32( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mls.nxv4i32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mls.nxv2i64(, , , ) +define @replace_mls_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv2i64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mls.nxv2i64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_mls_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_mls_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mls.nxv2i64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.mls.nxv2i64( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mul.nxv16i8(, , ) +define @replace_mul_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mul.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.mul.nxv8i16(, , ) +define @replace_mul_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mul.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.mul.nxv4i32(, , ) +define @replace_mul_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mul.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.mul.nxv2i64(, , ) +define @replace_mul_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mul.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_mul_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_mul_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mul.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.mul.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sabd.nxv16i8(, , ) +define @replace_sabd_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sabd.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sabd.nxv8i16(, , ) +define @replace_sabd_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sabd.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sabd.nxv4i32(, , ) +define @replace_sabd_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sabd.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sabd.nxv2i64(, , ) +define @replace_sabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sabd.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_sabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_sabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sabd.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.sabd.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smax.nxv16i8(, , ) +define @replace_smax_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smax.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smax.nxv8i16(, , ) +define @replace_smax_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smax.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smax.nxv4i32(, , ) +define @replace_smax_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smax.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smax.nxv2i64(, , ) +define @replace_smax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smax.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_smax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_smax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smax.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.smax.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smin.nxv16i8(, , ) +define @replace_smin_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smin.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smin.nxv8i16(, , ) +define @replace_smin_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smin.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smin.nxv4i32(, , ) +define @replace_smin_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smin.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smin.nxv2i64(, , ) +define @replace_smin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smin.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_smin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_smin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smin.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.smin.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smulh.nxv16i8(, , ) +define @replace_smulh_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smulh.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smulh.nxv8i16(, , ) +define @replace_smulh_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smulh.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smulh.nxv4i32(, , ) +define @replace_smulh_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smulh.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smulh.nxv2i64(, , ) +define @replace_smulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smulh.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_smulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_smulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smulh.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.smulh.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sub.nxv16i8(, , ) +define @replace_sub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sub.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sub.nxv8i16(, , ) +define @replace_sub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sub.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sub.nxv4i32(, , ) +define @replace_sub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sub.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sub.nxv2i64(, , ) +define @replace_sub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sub.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_sub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_sub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sub.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.sub.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uabd.nxv16i8(, , ) +define @replace_uabd_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uabd.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uabd.nxv8i16(, , ) +define @replace_uabd_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uabd.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uabd.nxv4i32(, , ) +define @replace_uabd_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uabd.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uabd.nxv2i64(, , ) +define @replace_uabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uabd.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_uabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_uabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uabd.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.uabd.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umax.nxv16i8(, , ) +define @replace_umax_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umax.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umax.nxv8i16(, , ) +define @replace_umax_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umax.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umax.nxv4i32(, , ) +define @replace_umax_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umax.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umax.nxv2i64(, , ) +define @replace_umax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umax.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_umax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_umax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umax.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.umax.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umin.nxv16i8(, , ) +define @replace_umin_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umin.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umin.nxv8i16(, , ) +define @replace_umin_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umin.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umin.nxv4i32(, , ) +define @replace_umin_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umin.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umin.nxv2i64(, , ) +define @replace_umin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umin.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_umin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_umin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umin.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.umin.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umulh.nxv16i8(, , ) +define @replace_umulh_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umulh.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umulh.nxv8i16(, , ) +define @replace_umulh_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umulh.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umulh.nxv4i32(, , ) +define @replace_umulh_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umulh.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umulh.nxv2i64(, , ) +define @replace_umulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umulh.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_umulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_umulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umulh.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.umulh.nxv2i64( %1, %a, %b) + ret %2 +} + +; Shifts + +declare @llvm.aarch64.sve.asr.nxv16i8(, , ) +define @replace_asr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.asr.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.asr.nxv8i16(, , ) +define @replace_asr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.asr.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.asr.nxv4i32(, , ) +define @replace_asr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.asr.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.asr.nxv2i64(, , ) +define @replace_asr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.asr.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_asr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_asr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.asr.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.asr.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsl.nxv16i8(, , ) +define @replace_lsl_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsl.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsl.nxv8i16(, , ) +define @replace_lsl_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsl.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsl.nxv4i32(, , ) +define @replace_lsl_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsl.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsl.nxv2i64(, , ) +define @replace_lsl_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsl.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_lsl_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_lsl_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.lsl.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.lsl.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) +define @replace_lsr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsr.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) +define @replace_lsr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsr.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) +define @replace_lsr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsr.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) +define @replace_lsr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsr.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_lsr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_lsr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.lsr.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.lsr.nxv2i64( %1, %a, %b) + ret %2 +} + +; Logical operations + +declare @llvm.aarch64.sve.and.nxv16i8(, , ) +define @replace_and_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.and.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.and.nxv8i16(, , ) +define @replace_and_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.and.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.and.nxv4i32(, , ) +define @replace_and_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.and.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.and.nxv2i64(, , ) +define @replace_and_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.and.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_and_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_and_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.and.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.and.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.bic.nxv16i8(, , ) +define @replace_bic_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.bic.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.bic.nxv8i16(, , ) +define @replace_bic_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.bic.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.bic.nxv4i32(, , ) +define @replace_bic_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.bic.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.bic.nxv2i64(, , ) +define @replace_bic_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.bic.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_bic_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_bic_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.bic.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.bic.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.eor.nxv16i8(, , ) +define @replace_eor_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.eor.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.eor.nxv8i16(, , ) +define @replace_eor_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.eor.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.eor.nxv4i32(, , ) +define @replace_eor_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.eor.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.eor.nxv2i64(, , ) +define @replace_eor_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.eor.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_eor_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_eor_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.eor.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.eor.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.orr.nxv16i8(, , ) +define @replace_orr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.orr.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.orr.nxv8i16(, , ) +define @replace_orr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.orr.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.orr.nxv4i32(, , ) +define @replace_orr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.orr.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.orr.nxv2i64(, , ) +define @replace_orr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.orr.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_orr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_orr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.orr.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.orr.nxv2i64( %1, %a, %b) + ret %2 +} + +; SVE2 - Uniform DSP operations + +declare @llvm.aarch64.sve.sqsub.nxv16i8(, , ) +define @replace_sqsub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sqsub.nxv8i16(, , ) +define @replace_sqsub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sqsub.nxv4i32(, , ) +define @replace_sqsub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sqsub.nxv2i64(, , ) +define @replace_sqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_sqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_sqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqsub.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uqsub.nxv16i8(, , ) +define @replace_uqsub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uqsub.nxv8i16(, , ) +define @replace_uqsub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uqsub.nxv4i32(, , ) +define @replace_uqsub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uqsub.nxv2i64(, , ) +define @replace_uqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_uqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_uqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqsub.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv2i64( %1, %a, %b) + ret %2 +} + +attributes #0 = { "target-features"="+sve,+sve2" }