diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 386315b3c4312..53e77ba84e92b 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -798,6 +798,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FMUL, MVT::f128, Legal); setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f128, Legal); } } @@ -13752,6 +13753,8 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { case MVT::f32: case MVT::f64: return true; + case MVT::f128: + return (EnableQuadPrecision && Subtarget.hasP9Vector()); default: break; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index d21f6b8d07b71..04a13e204ac20 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2382,6 +2382,18 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { list pattern> : X_VT5_VA5_VB5, isDOT; + // [PO VRT VRA VRB XO /] + class X_VT5_VA5_VB5_FMA opcode, bits<10> xo, string opc, + list pattern> + : XForm_1, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">; + + // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /] + class X_VT5_VA5_VB5_FMA_Ro opcode, bits<10> xo, string opc, + list pattern> + : X_VT5_VA5_VB5_FMA, isDOT; + //===--------------------------------------------------------------------===// // Quad-Precision Scalar Move Instructions: @@ -2424,14 +2436,30 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo", []>; // (Negative) Multiply-{Add/Subtract} - def XSMADDQP : X_VT5_VA5_VB5 <63, 388, "xsmaddqp" , []>; - def XSMADDQPO : X_VT5_VA5_VB5_Ro<63, 388, "xsmaddqpo" , []>; - def XSMSUBQP : X_VT5_VA5_VB5 <63, 420, "xsmsubqp" , []>; - def XSMSUBQPO : X_VT5_VA5_VB5_Ro<63, 420, "xsmsubqpo" , []>; - def XSNMADDQP : X_VT5_VA5_VB5 <63, 452, "xsnmaddqp" , []>; - def XSNMADDQPO: X_VT5_VA5_VB5_Ro<63, 452, "xsnmaddqpo", []>; - def XSNMSUBQP : X_VT5_VA5_VB5 <63, 484, "xsnmsubqp" , []>; - def XSNMSUBQPO: X_VT5_VA5_VB5_Ro<63, 484, "xsnmsubqpo", []>; + def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp", + [(set f128:$vT, + (fma f128:$vA, f128:$vB, + f128:$vTi))]>; + def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo" , []>; + def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" , + [(set f128:$vT, + (fma f128:$vA, f128:$vB, + (fneg f128:$vTi)))]>; + def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" , []>; + def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp", + [(set f128:$vT, + (fneg (fma f128:$vA, f128:$vB, + f128:$vTi)))]>; + def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo", []>; + def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp", + [(set f128:$vT, + (fneg (fma f128:$vA, f128:$vB, + (fneg f128:$vTi))))]>; + def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo", []>; + + // Additional fnmsub patterns: -a*c + b == -(a*c - b) + def : Pat<(fma (fneg f128:$A), f128:$C, f128:$B), (XSNMSUBQP $B, $C, $A)>; + def : Pat<(fma f128:$A, (fneg f128:$C), f128:$B), (XSNMSUBQP $B, $C, $A)>; //===--------------------------------------------------------------------===// // Quad/Double-Precision Compare Instructions: diff --git a/llvm/test/CodeGen/PowerPC/f128-fma.ll b/llvm/test/CodeGen/PowerPC/f128-fma.ll new file mode 100644 index 0000000000000..891b926c37c68 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/f128-fma.ll @@ -0,0 +1,203 @@ +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -enable-ppc-quad-precision -ppc-vsr-nums-as-vr < %s | FileCheck %s + +define void @qpFmadd(fp128* nocapture readonly %a, fp128* nocapture %b, + fp128* nocapture readonly %c, fp128* nocapture %res) { +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load fp128, fp128* %b, align 16 + %2 = load fp128, fp128* %c, align 16 + %madd = tail call fp128 @llvm.fmuladd.f128(fp128 %0, fp128 %1, fp128 %2) + store fp128 %madd, fp128* %res, align 16 + ret void +; CHECK-LABEL: qpFmadd +; CHECK-NOT: bl fmal +; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3) +; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4) +; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5) +; CHECK: xsmaddqp [[REG5]], [[REG3]], [[REG4]] +; CHECK-NEXT: stxv [[REG5]], 0(6) +; CHECK-NEXT: blr +} +declare fp128 @llvm.fmuladd.f128(fp128, fp128, fp128) + +; Function Attrs: norecurse nounwind +define void @qpFmadd_02(fp128* nocapture readonly %a, + fp128* nocapture readonly %b, + fp128* nocapture readonly %c, fp128* nocapture %res) { +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load fp128, fp128* %b, align 16 + %2 = load fp128, fp128* %c, align 16 + %mul = fmul contract fp128 %1, %2 + %add = fadd contract fp128 %0, %mul + store fp128 %add, fp128* %res, align 16 + ret void +; CHECK-LABEL: qpFmadd_02 +; CHECK-NOT: bl __multf3 +; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3) +; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4) +; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5) +; CHECK: xsmaddqp [[REG3]], [[REG4]], [[REG5]] +; CHECK-NEXT: stxv [[REG3]], 0(6) +; CHECK-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @qpFmadd_03(fp128* nocapture readonly %a, + fp128* nocapture readonly %b, + fp128* nocapture readonly %c, fp128* nocapture %res) { +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load fp128, fp128* %b, align 16 + %mul = fmul contract fp128 %0, %1 + %2 = load fp128, fp128* %c, align 16 + %add = fadd contract fp128 %mul, %2 + store fp128 %add, fp128* %res, align 16 + ret void +; CHECK-LABEL: qpFmadd_03 +; CHECK-NOT: bl __multf3 +; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3) +; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4) +; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5) +; CHECK: xsmaddqp [[REG5]], [[REG3]], [[REG4]] +; CHECK-NEXT: stxv [[REG5]], 0(6) +; CHECK-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @qpFnmadd(fp128* nocapture readonly %a, + fp128* nocapture readonly %b, + fp128* nocapture readonly %c, fp128* nocapture %res) { +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load fp128, fp128* %b, align 16 + %2 = load fp128, fp128* %c, align 16 + %mul = fmul contract fp128 %1, %2 + %add = fadd contract fp128 %0, %mul + %sub = fsub fp128 0xL00000000000000008000000000000000, %add + store fp128 %sub, fp128* %res, align 16 + ret void +; CHECK-LABEL: qpFnmadd +; CHECK-NOT: bl __multf3 +; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3) +; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4) +; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5) +; CHECK: xsnmaddqp [[REG3]], [[REG4]], [[REG5]] +; CHECK-NEXT: stxv [[REG3]], 0(6) +; CHECK-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @qpFnmadd_02(fp128* nocapture readonly %a, + fp128* nocapture readonly %b, + fp128* nocapture readonly %c, fp128* nocapture %res) { +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load fp128, fp128* %b, align 16 + %mul = fmul contract fp128 %0, %1 + %2 = load fp128, fp128* %c, align 16 + %add = fadd contract fp128 %mul, %2 + %sub = fsub fp128 0xL00000000000000008000000000000000, %add + store fp128 %sub, fp128* %res, align 16 + ret void +; CHECK-LABEL: qpFnmadd_02 +; CHECK-NOT: bl __multf3 +; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3) +; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4) +; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5) +; CHECK: xsnmaddqp [[REG5]], [[REG3]], [[REG4]] +; CHECK-NEXT: stxv [[REG5]], 0(6) +; CHECK-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @qpFmsub(fp128* nocapture readonly %a, + fp128* nocapture readonly %b, + fp128* nocapture readonly %c, fp128* nocapture %res) { +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load fp128, fp128* %b, align 16 + %2 = load fp128, fp128* %c, align 16 + %mul = fmul contract fp128 %1, %2 + %sub = fsub contract fp128 %0, %mul + store fp128 %sub, fp128* %res, align 16 + ret void +; CHECK-LABEL: qpFmsub +; CHECK-NOT: bl __multf3 +; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3) +; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4) +; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5) +; CHECK: xsnmsubqp [[REG3]], [[REG5]], [[REG4]] +; CHECK-NEXT: stxv [[REG3]], 0(6) +; CHECK-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @qpFmsub_02(fp128* nocapture readonly %a, + fp128* nocapture readonly %b, + fp128* nocapture readonly %c, fp128* nocapture %res) { +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load fp128, fp128* %b, align 16 + %mul = fmul contract fp128 %0, %1 + %2 = load fp128, fp128* %c, align 16 + %sub = fsub contract fp128 %mul, %2 + store fp128 %sub, fp128* %res, align 16 + ret void +; CHECK-LABEL: qpFmsub_02 +; CHECK-NOT: bl __multf3 +; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3) +; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4) +; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5) +; CHECK: xsmsubqp [[REG5]], [[REG3]], [[REG4]] +; CHECK-NEXT: stxv [[REG5]], 0(6) +; CHECK-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @qpFnmsub(fp128* nocapture readonly %a, + fp128* nocapture readonly %b, + fp128* nocapture readonly %c, fp128* nocapture %res) { +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load fp128, fp128* %b, align 16 + %2 = load fp128, fp128* %c, align 16 + %mul = fmul contract fp128 %1, %2 + %sub = fsub contract fp128 %0, %mul + %sub1 = fsub fp128 0xL00000000000000008000000000000000, %sub + store fp128 %sub1, fp128* %res, align 16 + ret void +; CHECK-LABEL: qpFnmsub +; CHECK-NOT: bl __multf3 +; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3) +; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4) +; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5) +; CHECK: xsnegqp [[REG4]], [[REG4]] +; CHECK: xsnmaddqp [[REG3]], [[REG4]], [[REG5]] +; CHECK-NEXT: stxv [[REG3]], 0(6) +; CHECK-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @qpFnmsub_02(fp128* nocapture readonly %a, + fp128* nocapture readonly %b, + fp128* nocapture readonly %c, fp128* nocapture %res) { +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load fp128, fp128* %b, align 16 + %mul = fmul contract fp128 %0, %1 + %2 = load fp128, fp128* %c, align 16 + %sub = fsub contract fp128 %mul, %2 + %sub1 = fsub fp128 0xL00000000000000008000000000000000, %sub + store fp128 %sub1, fp128* %res, align 16 + ret void +; CHECK-LABEL: qpFnmsub_02 +; CHECK-NOT: bl __multf3 +; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3) +; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4) +; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5) +; CHECK: xsnmsubqp [[REG5]], [[REG3]], [[REG4]] +; CHECK-NEXT: stxv [[REG5]], 0(6) +; CHECK-NEXT: blr +}