diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladdsub.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladdsub.ll index 4201b27a6beba..a1c6a37933254 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladdsub.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladdsub.ll @@ -3,226 +3,442 @@ target triple = "aarch64-unknown-linux-gnu" -define @combine_fmla( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @combine_fmla( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = call fast @llvm.aarch64.sve.fmla.nxv8f16( [[TMP1]], [[C:%.*]], [[A:%.*]], [[B:%.*]]) +; fadd(a, fmul(b, c)) -> fmla(a, b, c) +define @combine_fmuladd_1( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmuladd_1( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fmla.nxv8f16( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %p, %a, %1) + ret %2 +} + +; TODO: Test highlights an invalid combine! +; fadd(a, fmul_u(b, c)) -> fmla(a, b, c) +define @combine_fmuladd_2( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmuladd_2( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fmla.u.nxv8f16( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %p, %a, %1) + ret %2 +} + +; fadd_u(a, fmul_u(b, c)) -> fmla_u(a, b, c) +define @combine_fmuladd_3( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmuladd_3( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fmla.u.nxv8f16( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( %p, %a, %1) + ret %2 +} + +; TODO: Missing combine! +; fadd_u(a, fmul(b, c)) -> fmla_u(a, b, c) +define @combine_fmuladd_4( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmuladd_4( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fmla.nxv8f16( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( %p, %a, %1) + ret %2 +} + +; fadd(fmul(b, c), a) -> fmad(b, c, a) +define @combine_fmuladd_5( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmuladd_5( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fmad.nxv8f16( [[P:%.*]], [[B:%.*]], [[C:%.*]], [[A:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %p, %1, %a) + ret %2 +} + +; TODO: Missing combine! +; fadd(fmul_u(b, c), a) -> fmla_u(a, b, c) +define @combine_fmuladd_6( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmuladd_6( +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( [[P]], [[TMP1]], [[A:%.*]]) ; CHECK-NEXT: ret [[TMP2]] ; - %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) - %2 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %1, %a, %b) - %3 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %1, %c, %2) - ret %3 + %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %p, %1, %a) + ret %2 } -define @combine_fmla_u( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @combine_fmla_u( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = call fast @llvm.aarch64.sve.fmla.u.nxv8f16( [[TMP1]], [[C:%.*]], [[A:%.*]], [[B:%.*]]) +; TODO: Missing combine! +; fadd_u(fmul_u(b, c), a) -> fmla_u(a, b, c) +define @combine_fmuladd_7( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmuladd_7( +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( [[P]], [[TMP1]], [[A:%.*]]) ; CHECK-NEXT: ret [[TMP2]] ; - %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) - %2 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( %1, %a, %b) - %3 = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( %1, %c, %2) - ret %3 + %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( %p, %1, %a) + ret %2 +} + +; TODO: Missing combine! +; fadd_u(fmul(b, c), a) -> fmla_u(a, b, c) +define @combine_fmuladd_8( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmuladd_8( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fmad.nxv8f16( [[P:%.*]], [[B:%.*]], [[C:%.*]], [[A:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( %p, %1, %a) + ret %2 +} + +; fsub(a, fmul(b, c)) -> fmls(a, b, c) +define @combine_fmulsub_1( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmulsub_1( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fmls.nxv8f16( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( %p, %a, %1) + ret %2 +} + +; TODO: Test highlights an invalid combine! +; fsub(a, fmul_u(b, c)) -> fmls(a, b, c) +define @combine_fmulsub_2( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmulsub_2( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fmls.u.nxv8f16( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( %p, %a, %1) + ret %2 } -define @combine_mla_i8( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @combine_mla_i8( -; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mla.nxv16i8( [[P:%.*]], [[C:%.*]], [[A:%.*]], [[B:%.*]]) +; fsub_u(a, fmul_u(b, c)) -> fmls_u(a, b, c) +define @combine_fmulsub_3( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmulsub_3( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fmls.u.nxv8f16( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( %p, %a, %1) + ret %2 +} + +; TODO: Missing combine! +; fsub_u(a, fmul(b, c)) -> fmls_u(a, b, c) +define @combine_fmulsub_4( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmulsub_4( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fmls.nxv8f16( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( %p, %a, %1) + ret %2 +} + +; fsub(mul(b, c), a) -> fnmsb(b, c, a) +define @combine_fmulsub_5( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmulsub_5( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fnmsb.nxv8f16( [[P:%.*]], [[B:%.*]], [[C:%.*]], [[A:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( %p, %1, %a) + ret %2 +} + +; TODO: Missing combine! +; fsub(fmul_u(b, c), a) -> fnmls_u(a, b, c) +define @combine_fmulsub_6( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmulsub_6( +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( [[P]], [[TMP1]], [[A:%.*]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( %p, %1, %a) + ret %2 +} + +; TODO: Missing combine! +; fsub_u(fmul_u(b, c), a) -> fnmls_u(a, b, c) +define @combine_fmulsub_7( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmulsub_7( +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( [[P]], [[TMP1]], [[A:%.*]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( %p, %1, %a) + ret %2 +} + +; TODO: Missing combine! +; fsub_u(fmul(b, c), a) -> fnmls_u(a, b, c) +define @combine_fmulsub_8( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_fmulsub_8( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.aarch64.sve.fnmsb.nxv8f16( [[P:%.*]], [[B:%.*]], [[C:%.*]], [[A:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( %p, %1, %a) + ret %2 +} + +; add(a, mul(b, c)) -> mla(a, b, c) +define @combine_muladd_1( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_muladd_1( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mla.nxv16i8( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( %p, %a, %b) - %2 = tail call @llvm.aarch64.sve.add.nxv16i8( %p, %c, %1) + %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.add.nxv16i8( %p, %a, %1) ret %2 } -define @combine_mla_i8_u( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @combine_mla_i8_u( -; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mla.u.nxv16i8( [[P:%.*]], [[C:%.*]], [[A:%.*]], [[B:%.*]]) +; TODO: Missing combine! +; add(a, mul_u(b, c)) -> mla(a, b, c) +define @combine_muladd_2( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_muladd_2( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv16i8( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[P]], [[A:%.*]], [[TMP1]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.mul.u.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.add.nxv16i8( %p, %a, %1) + ret %2 +} + +; add_u(a, mul_u(b, c)) -> mla_u(a, b, c) +define @combine_muladd_3( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_muladd_3( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mla.u.nxv16i8( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = tail call @llvm.aarch64.sve.mul.u.nxv16i8( %p, %a, %b) - %2 = tail call @llvm.aarch64.sve.add.u.nxv16i8( %p, %c, %1) + %1 = tail call @llvm.aarch64.sve.mul.u.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.add.u.nxv16i8( %p, %a, %1) ret %2 } -define @combine_fmad( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @combine_fmad( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = call fast @llvm.aarch64.sve.fmad.nxv8f16( [[TMP1]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) -; CHECK-NEXT: ret [[TMP2]] +; TODO: Missing combine! +; add_u(a, mul(b, c)) -> mla_u(a, b, c) +define @combine_muladd_4( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_muladd_4( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.nxv16i8( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv16i8( [[P]], [[A:%.*]], [[TMP1]]) +; CHECK-NEXT: ret [[TMP2]] ; - %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) - %2 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %1, %a, %b) - %3 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %1, %2, %c) - ret %3 + %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.add.u.nxv16i8( %p, %a, %1) + ret %2 } -define @combine_mad_i8( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @combine_mad_i8( -; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mad.nxv16i8( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; add(mul(b, c), a) -> mad(b, c, c) +define @combine_muladd_5( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_muladd_5( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mad.nxv16i8( [[P:%.*]], [[B:%.*]], [[C:%.*]], [[A:%.*]]) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( %p, %a, %b) - %2 = tail call @llvm.aarch64.sve.add.nxv16i8( %p, %1, %c) + %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.add.nxv16i8( %p, %1, %a) ret %2 } -define @combine_fmls( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @combine_fmls( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = call fast @llvm.aarch64.sve.fmls.nxv8f16( [[TMP1]], [[C:%.*]], [[A:%.*]], [[B:%.*]]) -; CHECK-NEXT: ret [[TMP2]] +; TODO: Missing combine! +; add(mul_u(b, c), a) -> mla_u(a, b, c) +define @combine_muladd_6( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_muladd_6( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv16i8( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[P]], [[TMP1]], [[A:%.*]]) +; CHECK-NEXT: ret [[TMP2]] ; - %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) - %2 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %1, %a, %b) - %3 = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( %1, %c, %2) - ret %3 + %1 = tail call @llvm.aarch64.sve.mul.u.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.add.nxv16i8( %p, %1, %a) + ret %2 } -define @combine_fmls_u( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @combine_fmls_u( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = call fast @llvm.aarch64.sve.fmls.u.nxv8f16( [[TMP1]], [[C:%.*]], [[A:%.*]], [[B:%.*]]) -; CHECK-NEXT: ret [[TMP2]] +; TODO: Missing combine! +; add_u(mul_u(b, c), a) -> mla_u(a, b, c) +define @combine_muladd_7( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_muladd_7( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv16i8( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv16i8( [[P]], [[TMP1]], [[A:%.*]]) +; CHECK-NEXT: ret [[TMP2]] ; - %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) - %2 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( %1, %a, %b) - %3 = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( %1, %c, %2) - ret %3 + %1 = tail call @llvm.aarch64.sve.mul.u.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.add.u.nxv16i8( %p, %1, %a) + ret %2 +} + +; TODO: Missing combine! +; add_u(mul(b, c), a) -> mla_u(a, b, c) +define @combine_muladd_8( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_muladd_8( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.nxv16i8( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv16i8( [[P]], [[TMP1]], [[A:%.*]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.add.u.nxv16i8( %p, %1, %a) + ret %2 } -define @combine_mls_i8( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @combine_mls_i8( -; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mls.nxv16i8( [[P:%.*]], [[C:%.*]], [[A:%.*]], [[B:%.*]]) +; sub(a, mul(b, c)) -> mls(a, b, c) +define @combine_mulsub_1( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_mulsub_1( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mls.nxv16i8( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( %p, %a, %b) - %2 = tail call @llvm.aarch64.sve.sub.nxv16i8( %p, %c, %1) + %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.sub.nxv16i8( %p, %a, %1) ret %2 } -define @combine_mls_i8_u( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @combine_mls_i8_u( -; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mls.u.nxv16i8( [[P:%.*]], [[C:%.*]], [[A:%.*]], [[B:%.*]]) +; TODO: Missing combine! +; sub(a, mul_u(b, c)) -> mls(a, b, c) +define @combine_mulsub_2( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_mulsub_2( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv16i8( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sub.nxv16i8( [[P]], [[A:%.*]], [[TMP1]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.mul.u.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.sub.nxv16i8( %p, %a, %1) + ret %2 +} + +; sub_u(a, mul_u(b, c)) -> mls_u(a, b, c) +define @combine_mulsub_3( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_mulsub_3( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mls.u.nxv16i8( [[P:%.*]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = tail call @llvm.aarch64.sve.mul.u.nxv16i8( %p, %a, %b) - %2 = tail call @llvm.aarch64.sve.sub.u.nxv16i8( %p, %c, %1) + %1 = tail call @llvm.aarch64.sve.mul.u.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.sub.u.nxv16i8( %p, %a, %1) ret %2 } -define @combine_fnmsb( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @combine_fnmsb( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = call fast @llvm.aarch64.sve.fnmsb.nxv8f16( [[TMP1]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) -; CHECK-NEXT: ret [[TMP2]] +; TODO: Missing combine! +; sub_u(a, mul(b, c)) -> mls_u(a, b, c) +define @combine_mulsub_4( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_mulsub_4( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.nxv16i8( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv16i8( [[P]], [[A:%.*]], [[TMP1]]) +; CHECK-NEXT: ret [[TMP2]] ; - %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) - %2 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %1, %a, %b) - %3 = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( %1, %2, %c) - ret %3 + %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.sub.u.nxv16i8( %p, %a, %1) + ret %2 } -; No integer variant of fnmsb exists; Do not combine -define @neg_combine_nmsb_i8( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @neg_combine_nmsb_i8( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.nxv16i8( [[P:%.*]], [[A:%.*]], [[B:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sub.nxv16i8( [[P]], [[TMP1]], [[C:%.*]]) +; sub(mul(b, c), a) -> sub(mul(b, c), a) +define @combine_mulsub_5( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_mulsub_5( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.nxv16i8( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sub.nxv16i8( [[P]], [[TMP1]], [[A:%.*]]) ; CHECK-NEXT: ret [[TMP2]] ; - %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( %p, %a, %b) - %2 = tail call @llvm.aarch64.sve.sub.nxv16i8( %p, %1, %c) + %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.sub.nxv16i8( %p, %1, %a) ret %2 } -define @combine_fmla_contract_flag_only( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @combine_fmla_contract_flag_only( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = call contract @llvm.aarch64.sve.fmla.nxv8f16( [[TMP1]], [[C:%.*]], [[A:%.*]], [[B:%.*]]) -; CHECK-NEXT: ret [[TMP2]] +; TODO: Missing combine! +; sub(mul_u(b, c), a) -> sub_u(mul_u(b, c), a) +define @combine_mulsub_6( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_mulsub_6( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv16i8( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sub.nxv16i8( [[P]], [[TMP1]], [[A:%.*]]) +; CHECK-NEXT: ret [[TMP2]] ; - %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) - %2 = tail call contract @llvm.aarch64.sve.fmul.nxv8f16( %1, %a, %b) - %3 = tail call contract @llvm.aarch64.sve.fadd.nxv8f16( %1, %c, %2) - ret %3 + %1 = tail call @llvm.aarch64.sve.mul.u.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.sub.nxv16i8( %p, %1, %a) + ret %2 } -define @neg_combine_fmla_no_flags( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @neg_combine_fmla_no_flags( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fmul.nxv8f16( [[TMP1]], [[A:%.*]], [[B:%.*]]) -; CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv8f16( [[TMP1]], [[C:%.*]], [[TMP2]]) -; CHECK-NEXT: ret [[TMP3]] +; sub_u(mul_u(b, c), a) -> sub_u(mul_u(b, c), a) +define @combine_mulsub_7( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_mulsub_7( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv16i8( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv16i8( [[P]], [[TMP1]], [[A:%.*]]) +; CHECK-NEXT: ret [[TMP2]] ; - %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) - %2 = tail call @llvm.aarch64.sve.fmul.nxv8f16( %1, %a, %b) - %3 = tail call @llvm.aarch64.sve.fadd.nxv8f16( %1, %c, %2) - ret %3 + %1 = tail call @llvm.aarch64.sve.mul.u.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.sub.u.nxv16i8( %p, %1, %a) + ret %2 } -define @neg_combine_fmla_neq_pred( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @neg_combine_fmla_neq_pred( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 5) -; CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[TMP2]]) -; CHECK-NEXT: [[TMP4:%.*]] = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( [[TMP1]], [[A:%.*]], [[B:%.*]]) -; CHECK-NEXT: [[TMP5:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( [[TMP3]], [[C:%.*]], [[TMP4]]) -; CHECK-NEXT: ret [[TMP5]] +; TODO: Missing combine! +; sub_u(mul(b, c), a) -> sub_u(mul_u(b, c), a) +define @combine_mulsub_8( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @combine_mulsub_8( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.nxv16i8( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv16i8( [[P]], [[TMP1]], [[A:%.*]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( %p, %b, %c) + %2 = tail call @llvm.aarch64.sve.sub.u.nxv16i8( %p, %1, %a) + ret %2 +} + +define @neg_combine_fmuladd_neq_pred( %p1, %p2, %a, %b, %c) #0 { +; CHECK-LABEL: @neg_combine_fmuladd_neq_pred( +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( [[P1:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( [[P2:%.*]], [[A:%.*]], [[TMP1]]) +; CHECK-NEXT: ret [[TMP2]] ; ; ret %9 - %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) - %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 5) - %3 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2) - %4 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %1, %a, %b) - %5 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %3, %c, %4) - ret %5 -} - -define @neg_combine_fmla_two_fmul_uses( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @neg_combine_fmla_two_fmul_uses( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( [[TMP1]], [[A:%.*]], [[B:%.*]]) -; CHECK-NEXT: [[TMP3:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( [[TMP1]], [[C:%.*]], [[TMP2]]) -; CHECK-NEXT: [[TMP4:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( [[TMP1]], [[TMP3]], [[TMP2]]) -; CHECK-NEXT: ret [[TMP4]] + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %p1, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %p2, %a, %1) + ret %2 +} + +define @neg_combine_fmuladd_two_fmul_uses( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @neg_combine_fmuladd_two_fmul_uses( +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( [[P]], [[A:%.*]], [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( [[P]], [[TMP2]], [[TMP1]]) +; CHECK-NEXT: ret [[TMP3]] ; ; ret %8 - %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) - %2 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %1, %a, %b) - %3 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %1, %c, %2) - %4 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %1, %3, %2) - ret %4 -} - -define @neg_combine_fmla_neq_flags( %p, %a, %b, %c) #0 { -; CHECK-LABEL: @neg_combine_fmla_neq_flags( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = tail call reassoc nnan contract @llvm.aarch64.sve.fmul.nxv8f16( [[TMP1]], [[B:%.*]], [[C:%.*]]) -; CHECK-NEXT: [[TMP3:%.*]] = tail call reassoc contract @llvm.aarch64.sve.fadd.nxv8f16( [[TMP1]], [[A:%.*]], [[TMP2]]) -; CHECK-NEXT: ret [[TMP3]] + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %p, %b, %c) + %2 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %p, %a, %1) + %3 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %p, %2, %1) + ret %3 +} + +define @neg_combine_fmuladd_neq_flags( %p, %a, %b, %c) #0 { +; CHECK-LABEL: @neg_combine_fmuladd_neq_flags( +; CHECK-NEXT: [[TMP1:%.*]] = tail call reassoc nnan contract @llvm.aarch64.sve.fmul.nxv8f16( [[P:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call reassoc contract @llvm.aarch64.sve.fadd.nxv8f16( [[P]], [[A:%.*]], [[TMP1]]) +; CHECK-NEXT: ret [[TMP2]] ; ; ret %7 - %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) - %2 = tail call reassoc nnan contract @llvm.aarch64.sve.fmul.nxv8f16( %1, %b, %c) - %3 = tail call reassoc contract @llvm.aarch64.sve.fadd.nxv8f16( %1, %a, %2) - ret %3 + %1 = tail call reassoc nnan contract @llvm.aarch64.sve.fmul.nxv8f16( %p, %b, %c) + %2 = tail call reassoc contract @llvm.aarch64.sve.fadd.nxv8f16( %p, %a, %1) + ret %2 } -declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) -declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() -declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() -declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1() declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) declare @llvm.aarch64.sve.fsub.nxv8f16(, , ) + declare @llvm.aarch64.sve.fmul.u.nxv8f16(, , ) declare @llvm.aarch64.sve.fadd.u.nxv8f16(, , ) declare @llvm.aarch64.sve.fsub.u.nxv8f16(, , ) + declare @llvm.aarch64.sve.mul.nxv16i8(, , ) declare @llvm.aarch64.sve.add.nxv16i8(, , ) declare @llvm.aarch64.sve.sub.nxv16i8(, , ) + declare @llvm.aarch64.sve.mul.u.nxv16i8(, , ) declare @llvm.aarch64.sve.add.u.nxv16i8(, , ) declare @llvm.aarch64.sve.sub.u.nxv16i8(, , )