-
Notifications
You must be signed in to change notification settings - Fork 14.9k
powerpc: recognize vmnsub
in older ppc versions
#155465
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-powerpc Author: Folkert de Vries (folkertdev) Changesfixes #129432 Turns out that fixme was (at least) 12 years old. There is a bunch of code related to fma, but most of it seems to be for older ppc versions? At least, with ppc5 I never end up there. We run into this missing optimization in https://github.com/rust-lang/stdarch, with this optimization we can use one fewer intrinsic and in general improve support for powerpc via using the cross-platform intrinsics. the DAG here is e.g.
Full diff: https://github.com/llvm/llvm-project/pull/155465.diff 2 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 79fe12e8e4b49..c2254b0ef178c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -30,6 +30,11 @@
// Altivec transformation functions and pattern fragments.
//
+// fneg is not legal, and desugared as an xor.
+def desugared_fneg : PatFrag<(ops node:$x), (v4f32 (bitconvert (xor (bitconvert $x),
+ (int_ppc_altivec_vslw (bitconvert (v16i8 immAllOnesV)),
+ (bitconvert (v16i8 immAllOnesV))))))>;
+
def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
@@ -461,11 +466,12 @@ def VMADDFP : VAForm_1<46, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB),
[(set v4f32:$RT,
(fma v4f32:$RA, v4f32:$RC, v4f32:$RB))]>;
-// FIXME: The fma+fneg pattern won't match because fneg is not legal.
+// fneg is not legal, hence we have to match on the desugared version.
def VNMSUBFP: VAForm_1<47, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB),
"vnmsubfp $RT, $RA, $RC, $RB", IIC_VecFP,
- [(set v4f32:$RT, (fneg (fma v4f32:$RA, v4f32:$RC,
- (fneg v4f32:$RB))))]>;
+ [(set v4f32:$RT, (desugared_fneg (fma v4f32:$RA, v4f32:$RC,
+ (desugared_fneg v4f32:$RB))))]>;
+
let hasSideEffects = 1 in {
def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
@@ -886,6 +892,13 @@ def : Pat<(mul v8i16:$vA, v8i16:$vB), (VMLADDUHM $vA, $vB, (v8i16(V_SET0H)))>;
// Add
def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC), (VMLADDUHM $vA, $vB, $vC)>;
+
+// Fused negated multiply-subtract
+def : Pat<(v4f32 (desugared_fneg
+ (int_ppc_altivec_vmaddfp v4f32:$RA, v4f32:$RC,
+ (desugared_fneg v4f32:$RB)))),
+ (VNMSUBFP $RA, $RC, $RB)>;
+
// Saturating adds/subtracts.
def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>;
def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>;
diff --git a/llvm/test/CodeGen/PowerPC/vec-nmsub.ll b/llvm/test/CodeGen/PowerPC/vec-nmsub.ll
new file mode 100644
index 0000000000000..8f4ac972346b3
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vec-nmsub.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs < %s -mcpu=pwr5 -mtriple=ppc32-- -mattr=+altivec | FileCheck %s
+
+define dso_local <4 x float> @intrinsic(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) local_unnamed_addr {
+; CHECK-LABEL: intrinsic:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnmsubfp 2, 2, 3, 4
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call <4 x float> @llvm.ppc.altivec.vnmsubfp(<4 x float> %a, <4 x float> %b, <4 x float> %c)
+ ret <4 x float> %0
+}
+
+define <4 x float> @manual_llvm_fma(<4 x float> %a, <4 x float> %b, <4 x float> %c) unnamed_addr {
+; CHECK-LABEL: manual_llvm_fma:
+; CHECK: # %bb.0: # %start
+; CHECK-NEXT: vnmsubfp 2, 2, 3, 4
+; CHECK-NEXT: blr
+start:
+ %0 = fneg <4 x float> %c
+ %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %0)
+ %2 = fneg <4 x float> %1
+ ret <4 x float> %2
+}
+
+define dso_local <4 x float> @manual_vmaddfp(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) local_unnamed_addr {
+; CHECK-LABEL: manual_vmaddfp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnmsubfp 2, 2, 3, 4
+; CHECK-NEXT: blr
+entry:
+ %fneg.i3 = fneg <4 x float> %c
+ %0 = tail call <4 x float> @llvm.ppc.altivec.vmaddfp(<4 x float> %a, <4 x float> %b, <4 x float> %fneg.i3)
+ %fneg.i = fneg <4 x float> %0
+ ret <4 x float> %fneg.i
+}
|
@daltenty can you point the right reviewer at this PR? |
@amy-kwan any change you could review this or recommend someone who could? (I've seen you do some stuff in the rust repo) |
41c005c
to
311627f
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
I'm still not entirely sure of the LLVM review process, I'm guessing we're waiting for a second approval? In any case, I cannot merge this PR, someone else will have to do that. |
fixes #129432
Turns out that fixme was (at least) 12 years old.
There is a bunch of code related to fma, but most of it seems to be for older ppc versions? At least, with ppc5 I never end up there. We run into this missing optimization in https://github.com/rust-lang/stdarch, with this optimization we can use one fewer intrinsic and in general improve support for powerpc via using the cross-platform intrinsics.
the DAG here is e.g.