diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 1e30735b7a56a..98239f3cfe90b 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1012,6 +1012,36 @@ let Predicates = [HasSVE_or_SME] in { defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>; defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>; + + // mul x (splat -1) -> neg x + def : Pat<(nxv16i8 (AArch64mul_m1 nxv16i1:$Op1, nxv16i8:$Op2, (nxv16i8 (splat_vector (i32 -1))))), + (NEG_ZPmZ_B $Op2, $Op1, $Op2)>; + def : Pat<(nxv8i16 (AArch64mul_m1 nxv8i1:$Op1, nxv8i16:$Op2, (nxv8i16 (splat_vector (i32 -1))))), + (NEG_ZPmZ_H $Op2, $Op1, $Op2)>; + def : Pat<(nxv4i32 (AArch64mul_m1 nxv4i1:$Op1, nxv4i32:$Op2, (nxv4i32 (splat_vector (i32 -1))))), + (NEG_ZPmZ_S $Op2, $Op1, $Op2)>; + def : Pat<(nxv2i64 (AArch64mul_m1 nxv2i1:$Op1, nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 -1))))), + (NEG_ZPmZ_D $Op2, $Op1, $Op2)>; + + let AddedComplexity = 5 in { + def : Pat<(nxv16i8 (AArch64mul_p nxv16i1:$Op1, nxv16i8:$Op2, (nxv16i8 (splat_vector (i32 -1))))), + (NEG_ZPmZ_B_UNDEF $Op2, $Op1, $Op2)>; + def : Pat<(nxv8i16 (AArch64mul_p nxv8i1:$Op1, nxv8i16:$Op2, (nxv8i16 (splat_vector (i32 -1))))), + (NEG_ZPmZ_H_UNDEF $Op2, $Op1, $Op2)>; + def : Pat<(nxv4i32 (AArch64mul_p nxv4i1:$Op1, nxv4i32:$Op2, (nxv4i32 (splat_vector (i32 -1))))), + (NEG_ZPmZ_S_UNDEF $Op2, $Op1, $Op2)>; + def : Pat<(nxv2i64 (AArch64mul_p nxv2i1:$Op1, nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 -1))))), + (NEG_ZPmZ_D_UNDEF $Op2, $Op1, $Op2)>; + } + + def : Pat<(nxv16i8 (AArch64mul_m1 nxv16i1:$Op1, (nxv16i8 (splat_vector (i32 -1))), nxv16i8:$Op2)), + (NEG_ZPmZ_B (DUP_ZI_B -1, 0), $Op1, $Op2)>; + def : Pat<(nxv8i16 (AArch64mul_m1 nxv8i1:$Op1, (nxv8i16 (splat_vector (i32 -1))), nxv8i16:$Op2)), + (NEG_ZPmZ_H (DUP_ZI_H -1, 0), $Op1, $Op2)>; + def : Pat<(nxv4i32 (AArch64mul_m1 nxv4i1:$Op1, (nxv4i32 (splat_vector (i32 -1))), nxv4i32:$Op2)), + (NEG_ZPmZ_S (DUP_ZI_S -1, 0), $Op1, $Op2)>; + def : Pat<(nxv2i64 (AArch64mul_m1 nxv2i1:$Op1, (nxv2i64 (splat_vector (i64 -1))), nxv2i64:$Op2)), + (NEG_ZPmZ_D (DUP_ZI_D -1, 0), $Op1, $Op2)>; } // End HasSVE_or_SME // COMPACT - word and doubleword diff --git a/llvm/test/CodeGen/AArch64/sve-int-mul-neg.ll b/llvm/test/CodeGen/AArch64/sve-int-mul-neg.ll new file mode 100644 index 0000000000000..a1065bcf7d7da --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-int-mul-neg.ll @@ -0,0 +1,131 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mattr=+sve < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Muls with (-1) as operand should fold to neg. +define @mul_neg_fold_i8( %pg, %a) { +; CHECK-LABEL: mul_neg_fold_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: neg z0.b, p0/m, z0.b +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.mul.nxv16i8( %pg, %a, splat(i8 -1)) + ret %1 +} + +define @mul_neg_fold_i16( %pg, %a) { +; CHECK-LABEL: mul_neg_fold_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: neg z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.mul.nxv8i16( %pg, %a, splat(i16 -1)) + ret %1 +} + +define @mul_neg_fold_i32( %pg, %a) { +; CHECK-LABEL: mul_neg_fold_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: neg z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.mul.nxv4i32( %pg, %a, splat(i32 -1)) + ret %1 +} + +define @mul_neg_fold_i64( %pg, %a) { +; CHECK-LABEL: mul_neg_fold_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: neg z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.mul.nxv2i64( %pg, %a, splat(i64 -1)) + ret %1 +} + +define @mul_neg_fold_u_i8( %pg, %a) { +; CHECK-LABEL: mul_neg_fold_u_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: neg z0.b, p0/m, z0.b +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.mul.u.nxv16i8( %pg, %a, splat(i8 -1)) + ret %1 +} + +define @mul_neg_fold_u_i16( %pg, %a) { +; CHECK-LABEL: mul_neg_fold_u_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: neg z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.mul.u.nxv8i16( %pg, %a, splat(i16 -1)) + ret %1 +} + +define @mul_neg_fold_u_i32( %pg, %a) { +; CHECK-LABEL: mul_neg_fold_u_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: neg z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, %a, splat(i32 -1)) + ret %1 +} + +define @mul_neg_fold_u_i64( %pg, %a) { +; CHECK-LABEL: mul_neg_fold_u_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: neg z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.mul.u.nxv2i64( %pg, %a, splat(i64 -1)) + ret %1 +} + +define @mul_neg_fold_different_argument_order_i8( %pg, %a) { +; CHECK-LABEL: mul_neg_fold_different_argument_order_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #-1 // =0xffffffffffffffff +; CHECK-NEXT: neg z1.b, p0/m, z0.b +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.mul.nxv16i8( %pg, splat(i8 -1), %a) + ret %1 +} + +define @mul_neg_fold_different_argument_order_i16( %pg, %a) { +; CHECK-LABEL: mul_neg_fold_different_argument_order_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: neg z1.h, p0/m, z0.h +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.mul.nxv8i16( %pg, splat(i16 -1), %a) + ret %1 +} + +define @mul_neg_fold_different_argument_order_i32( %pg, %a) { +; CHECK-LABEL: mul_neg_fold_different_argument_order_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: neg z1.s, p0/m, z0.s +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.mul.nxv4i32( %pg, splat(i32 -1), %a) + ret %1 +} + +define @mul_neg_fold_different_argument_order_i64( %pg, %a) { +; CHECK-LABEL: mul_neg_fold_different_argument_order_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: neg z1.d, p0/m, z0.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.mul.nxv2i64( %pg, splat(i64 -1), %a) + ret %1 +} + +declare @llvm.aarch64.sve.mul.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.mul.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.mul.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.mul.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.mul.nxv16i8(, , ) +declare @llvm.aarch64.sve.mul.nxv8i16(, , ) +declare @llvm.aarch64.sve.mul.nxv4i32(, , ) +declare @llvm.aarch64.sve.mul.nxv2i64(, , )