diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 97d5e28963234..bb1822314f713 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -30,6 +30,11 @@ // Altivec transformation functions and pattern fragments. // +// fneg is not legal, and desugared as an xor. +def desugared_fneg : PatFrag<(ops node:$x), (v4f32 (bitconvert (xor (bitconvert $x), + (int_ppc_altivec_vslw (bitconvert (v16i8 immAllOnesV)), + (bitconvert (v16i8 immAllOnesV))))))>; + def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isVPKUHUMShuffleMask(cast(N), 0, *CurDAG); @@ -468,11 +473,12 @@ def VMADDFP : VAForm_1<46, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB), [(set v4f32:$RT, (fma v4f32:$RA, v4f32:$RC, v4f32:$RB))]>; -// FIXME: The fma+fneg pattern won't match because fneg is not legal. +// fneg is not legal, hence we have to match on the desugared version. def VNMSUBFP: VAForm_1<47, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB), "vnmsubfp $RT, $RA, $RC, $RB", IIC_VecFP, - [(set v4f32:$RT, (fneg (fma v4f32:$RA, v4f32:$RC, - (fneg v4f32:$RB))))]>; + [(set v4f32:$RT, (desugared_fneg (fma v4f32:$RA, v4f32:$RC, + (desugared_fneg v4f32:$RB))))]>; + let hasSideEffects = 1 in { def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>; def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs, @@ -893,6 +899,13 @@ def : Pat<(mul v8i16:$vA, v8i16:$vB), (VMLADDUHM $vA, $vB, (v8i16(V_SET0H)))>; // Add def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC), (VMLADDUHM $vA, $vB, $vC)>; + +// Fused negated multiply-subtract +def : Pat<(v4f32 (desugared_fneg + (int_ppc_altivec_vmaddfp v4f32:$RA, v4f32:$RC, + (desugared_fneg v4f32:$RB)))), + (VNMSUBFP $RA, $RC, $RB)>; + // Saturating adds/subtracts. def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>; def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>; diff --git a/llvm/test/CodeGen/PowerPC/vec-nmsub.ll b/llvm/test/CodeGen/PowerPC/vec-nmsub.ll new file mode 100644 index 0000000000000..8f4ac972346b3 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vec-nmsub.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs < %s -mcpu=pwr5 -mtriple=ppc32-- -mattr=+altivec | FileCheck %s + +define dso_local <4 x float> @intrinsic(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) local_unnamed_addr { +; CHECK-LABEL: intrinsic: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnmsubfp 2, 2, 3, 4 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x float> @llvm.ppc.altivec.vnmsubfp(<4 x float> %a, <4 x float> %b, <4 x float> %c) + ret <4 x float> %0 +} + +define <4 x float> @manual_llvm_fma(<4 x float> %a, <4 x float> %b, <4 x float> %c) unnamed_addr { +; CHECK-LABEL: manual_llvm_fma: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vnmsubfp 2, 2, 3, 4 +; CHECK-NEXT: blr +start: + %0 = fneg <4 x float> %c + %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %0) + %2 = fneg <4 x float> %1 + ret <4 x float> %2 +} + +define dso_local <4 x float> @manual_vmaddfp(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) local_unnamed_addr { +; CHECK-LABEL: manual_vmaddfp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnmsubfp 2, 2, 3, 4 +; CHECK-NEXT: blr +entry: + %fneg.i3 = fneg <4 x float> %c + %0 = tail call <4 x float> @llvm.ppc.altivec.vmaddfp(<4 x float> %a, <4 x float> %b, <4 x float> %fneg.i3) + %fneg.i = fneg <4 x float> %0 + ret <4 x float> %fneg.i +}