diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 5eeabf4c33b76..3e4f189637e0a 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -5919,6 +5919,8 @@ def mcrbits : Flag<["-"], "mcrbits">, Group, "(the enablement of CR-bit tracking support) is the default for " "POWER8 and above, as well as for all other CPUs when " "optimization is applied (-O2 and above).">; +def mfloat16 : Flag<["-"], "mfloat16">, Group, + HelpText<"Enable half-precision floating point (experimental).">; def mno_crbits : Flag<["-"], "mno-crbits">, Group; def minvariant_function_descriptors : Flag<["-"], "minvariant-function-descriptors">, Group; diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index c9a41df806aff..f999372f31a94 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -82,6 +82,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector &Features, HasQuadwordAtomics = true; } else if (Feature == "+longcall") { UseLongCalls = true; + } else if (Feature == "+float16") { + HasFloat16 = true; } // TODO: Finish this list and add an assert that we've handled them // all. @@ -504,6 +506,7 @@ static bool ppcUserFeaturesCheck(DiagnosticsEngine &Diags, Found |= FindVSXSubfeature("+crypto", "-mcrypto", "-msoft-float"); Found |= FindVSXSubfeature("+power10-vector", "-mpower10-vector", "-msoft-float"); + Found |= FindVSXSubfeature("+float16", "-mfloat16", "-msoft-float"); } if (Found) return false; @@ -555,6 +558,14 @@ bool PPCTargetInfo::initFeatureMap( return false; } + if (!(ArchDefs & ArchDefinePwr8)) { + if (llvm::is_contained(FeaturesVec, "+float16")) { + // Reject -mfloat16 on pre-Power8 CPUs. + Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfloat16" << CPU; + return false; + } + } + if (!(ArchDefs & ArchDefinePwr10)) { if (llvm::is_contained(FeaturesVec, "+mma")) { // MMA operations are not available pre-Power10. diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index a9f49aa3aebe1..df263eac0d6de 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -73,6 +73,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { bool HasPCRelativeMemops = false; bool HasQuadwordAtomics = false; bool UseLongCalls = false; + bool HasFloat16 = false; protected: std::string ABI; @@ -362,6 +363,10 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { bool hasBitIntType() const override { return true; } + // Returns true when _Float16 is available as a native type. Requires the + // explicit opt-in (-mfloat16). + bool hasFloat16Type() const override { return HasFloat16; } + bool isSPRegName(StringRef RegName) const override { return RegName == "r1" || RegName == "x1"; } diff --git a/clang/lib/Driver/ToolChains/Arch/PPC.cpp b/clang/lib/Driver/ToolChains/Arch/PPC.cpp index 17051980f34fb..f06645038cfca 100644 --- a/clang/lib/Driver/ToolChains/Arch/PPC.cpp +++ b/clang/lib/Driver/ToolChains/Arch/PPC.cpp @@ -77,6 +77,11 @@ void ppc::getPPCTargetFeatures(const Driver &D, const llvm::Triple &Triple, D.Diag(diag::err_opt_not_valid_on_target) << "-maix-shared-lib-tls-model-opt"; + // The _Float16 datatype is supported throguh the -mfloat16 flag. + if (Args.hasArg(options::OPT_mfloat16)) { + Features.push_back("+float16"); + } + // The integrated assembler counts as a "modern AIX assembler" for the // purposes of the modern-aix-as. if (Args.hasFlag(options::OPT_fintegrated_as, options::OPT_fno_integrated_as, diff --git a/clang/test/CodeGen/PowerPC/half-float16-ppc.c b/clang/test/CodeGen/PowerPC/half-float16-ppc.c new file mode 100644 index 0000000000000..45b336d03f7db --- /dev/null +++ b/clang/test/CodeGen/PowerPC/half-float16-ppc.c @@ -0,0 +1,154 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -triple powerpc-unknown-unknown -O2 -emit-llvm %s -o - \ +// RUN: -target-cpu pwr8 -target-feature +float16 | FileCheck %s + +typedef __INT8_TYPE__ int8_t; +typedef __INT16_TYPE__ int16_t; +typedef __INT32_TYPE__ int32_t; +typedef __INT64_TYPE__ int64_t; +typedef __UINT8_TYPE__ uint8_t; +typedef __UINT16_TYPE__ uint16_t; +typedef __UINT32_TYPE__ uint32_t; +typedef __UINT64_TYPE__ uint64_t; + +// ============================================ +// Arithmetic Operations +// ============================================ +// CHECK-LABEL: define dso_local noundef half @c_add( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fadd half [[A]], [[B]] +// CHECK-NEXT: ret half [[UNPROMOTION]] +// +_Float16 c_add(_Float16 a, _Float16 b) { return a + b; } + + +// CHECK-LABEL: define dso_local noundef half @c_sub( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fsub half [[A]], [[B]] +// CHECK-NEXT: ret half [[UNPROMOTION]] +// +_Float16 c_sub(_Float16 a, _Float16 b) { return a - b; } + +// CHECK-LABEL: define dso_local noundef half @c_mul( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fmul half [[A]], [[B]] +// CHECK-NEXT: ret half [[UNPROMOTION]] +// +_Float16 c_mul(_Float16 a, _Float16 b) { return a * b; } + +// CHECK-LABEL: define dso_local noundef half @c_div( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fdiv half [[A]], [[B]] +// CHECK-NEXT: ret half [[UNPROMOTION]] +// +_Float16 c_div(_Float16 a, _Float16 b) { return a / b; } + +// ============================================ +// Unary Operations +// ============================================ + +// CHECK-LABEL: define dso_local noundef half @c_neg( +// CHECK-SAME: half noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fneg half [[A]] +// CHECK-NEXT: ret half [[UNPROMOTION]] +// +_Float16 c_neg(_Float16 a) { return -a; } + +// ============================================ +// Compare Operations +// ============================================ + +// CHECK-LABEL: define dso_local range(i32 0, 2) i32 @c_eq( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP:%.*]] = fcmp oeq half [[A]], [[B]] +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int c_eq(_Float16 a, _Float16 b){ return a == b; } + +// CHECK-LABEL: define dso_local range(i32 0, 2) i32 @c_ne( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP:%.*]] = fcmp une half [[A]], [[B]] +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int c_ne(_Float16 a, _Float16 b){ return a != b; } + +// CHECK-LABEL: define dso_local range(i32 0, 2) i32 @c_lt( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP:%.*]] = fcmp olt half [[A]], [[B]] +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int c_lt(_Float16 a, _Float16 b){ return a < b; } + +// CHECK-LABEL: define dso_local range(i32 0, 2) i32 @c_le( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP:%.*]] = fcmp ole half [[A]], [[B]] +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int c_le(_Float16 a, _Float16 b){ return a <= b; } + +// CHECK-LABEL: define dso_local range(i32 0, 2) i32 @c_gt( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP:%.*]] = fcmp ogt half [[A]], [[B]] +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int c_gt(_Float16 a, _Float16 b){ return a > b; } + +// CHECK-LABEL: define dso_local range(i32 0, 2) i32 @c_ge( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP:%.*]] = fcmp oge half [[A]], [[B]] +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int c_ge(_Float16 a, _Float16 b){ return a >= b; } + +// ============================================ +// Conversion Operations: half <-> float/double +// ============================================ + +// CHECK-LABEL: define dso_local noundef float @to_f32( +// CHECK-SAME: half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV:%.*]] = fpext half [[X]] to float +// CHECK-NEXT: ret float [[CONV]] +// +float to_f32(_Float16 x){ return (float)x; } + +// CHECK-LABEL: define dso_local noundef double @to_f64( +// CHECK-SAME: half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV:%.*]] = fpext half [[X]] to double +// CHECK-NEXT: ret double [[CONV]] +// +double to_f64(_Float16 x){ return (double)x; } + +// CHECK-LABEL: define dso_local noundef half @from_f32( +// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV:%.*]] = fptrunc float [[X]] to half +// CHECK-NEXT: ret half [[CONV]] +// +_Float16 from_f32(float x){ return (_Float16)x; } + +// CHECK-LABEL: define dso_local noundef half @from_f64( +// CHECK-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[X]] to half +// CHECK-NEXT: ret half [[CONV]] +// +_Float16 from_f64(double x){ return (_Float16)x; } diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp index 3370dedbecb16..fb6eccfe043b5 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp @@ -34,7 +34,8 @@ PPCRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, LLT Ty) const { switch (RC.getID()) { case PPC::VSFRCRegClassID: - case PPC::SPILLTOVSRRC_and_VSFRCRegClassID: + case PPC::VHFRCRegClassID: + case PPC::SPILLTOVSRRC_and_VHFRCRegClassID: case PPC::SPILLTOVSRRC_and_VFRCRegClassID: case PPC::SPILLTOVSRRC_and_F4RCRegClassID: case PPC::F8RCRegClassID: diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 7b4bae60f7e74..43982c4c5c5fb 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -71,6 +71,10 @@ def FeatureModernAIXAs "AIX system assembler is modern enough to support new mnes">; def FeatureHardFloat : SubtargetFeature<"hard-float", "HasHardFloat", "true", "Enable floating-point instructions">; +def FeatureFloat16 : + SubtargetFeature<"float16", + "HasFloat16", "true", + "Enable half-precision floating point support (experimental)">; // Specifies that we are in 64-bit mode or that we should use 64-bit registers // in 32-bit mode when possible. Requires Feature64Bit to be enabled. @@ -405,6 +409,7 @@ def HasOnlySwappingMemOps : Predicate<"!Subtarget->hasP9Vector()">; def NoP10Vector : Predicate<"!Subtarget->hasP10Vector()">; def HasP10Vector : Predicate<"Subtarget->hasP10Vector()">; def HasFutureVector : Predicate<"Subtarget->hasFutureVector()">; +def HasFloat16 : Predicate<"Subtarget->hasFloat16()">; // Predicates used to differenciate between different ISAs. def IsISA2_06 : Predicate<"Subtarget->isISA2_06()">; diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td index 5d4fe06ebdddd..961da883748dc 100644 --- a/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -75,6 +75,9 @@ def RetCC_PPC : CallingConv<[ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, + // Return f16 in FPR as 16 bit value. + CCIfType<[f16], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + // Floating point types returned as "direct" go into F1 .. F8; note that // only the ELFv2 ABI fully utilizes all these registers. CCIfNotSubtarget<"hasSPE()", @@ -125,8 +128,8 @@ def CC_PPC64_ELF : CallingConv<[ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, // Handle fp types and shadow the corresponding registers as necessary. - CCIfType<[f32, f64], CCIfNotVarArg>>, - CCIfType<[f32, f64], + CCIfType<[f16, f32, f64], CCIfNotVarArg>>, + CCIfType<[f16, f32, f64], CCIfNotVarArg>>, @@ -161,7 +164,7 @@ def CC_PPC64_ELF_FIS : CallingConv<[ CCIfType<[i16], CCPromoteToType>, CCIfType<[i32], CCPromoteToType>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, - CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>> + CCIfType<[f16, f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>> ]>; // Simple return-value convention for 64-bit ELF PowerPC fast isel. @@ -178,6 +181,7 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[ CCIfType<[i32], CCPromoteToType>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, + CCIfType<[f16], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f128], @@ -218,6 +222,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[ // The first 8 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, + // Pass f16 in FPRs. + CCIfType<[f16], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + // Make sure the i64 words from a long double are either both passed in // registers or both passed on the stack. CCIfType<[f64], CCIfSplit>>, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 407093fd2b849..b20c6e25fca5e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -261,6 +261,68 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTruncStoreAction(MVT::f32, MVT::f16, Expand); } + // Only active when -mfloat16 is passed and hard float is enabled. + // This block intentionally overrides some actions set above + // because when f16 is a first-class type we handle load/store + // directly rather than through extending loads. + if (Subtarget.hasFloat16() && Subtarget.hasHardFloat()) { + // Make f16 a legal type. + addRegisterClass(MVT::f16, &PPC::VHFRCRegClass); + + // PowerPC has no native f16 arithmetic instructions. All arithmetic, + // comparisons, rounding, transcendentals, and min/max must be promoted + // to f32 for computation. On P8 this means libcalls (__extendhfsf2 / + // __truncsfhf2); on P9 this means xscvhpdp/xsaddsp.../xscvdphp sequences. + static const unsigned F16PromoteOps[] = { + ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM, + ISD::FMAXIMUM, ISD::FMINIMUM, ISD::FADD, ISD::FSUB, + ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, + ISD::FREM, ISD::FPOW, ISD::FLOG, ISD::FLOG2, + ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10, + ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, + ISD::FNEARBYINT, ISD::FROUND, ISD::FROUNDEVEN, ISD::FCANONICALIZE, + ISD::FSIN, ISD::FCOS, ISD::SETCC, ISD::SELECT_CC, + ISD::SELECT}; + + // Promote all the arithmetic operations defined above to f32. + setOperationAction(F16PromoteOps, MVT::f16, Promote); + + setOperationAction(ISD::LOAD, MVT::f16, Legal); + setOperationAction(ISD::STORE, MVT::f16, Legal); + + // Legal handling for bit manipulation. + setOperationAction(ISD::FABS, MVT::f16, Legal); + setOperationAction(ISD::FNEG, MVT::f16, Legal); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Legal); + + // Expand constant FP. + setOperationAction(ISD::ConstantFP, MVT::f16, Expand); + + // Expand extending loads and truncating stores. + for (MVT VT : {MVT::f32, MVT::f64}) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); + setTruncStoreAction(VT, MVT::f16, Expand); + } + + if (Subtarget.hasP9Vector()) { + // P9+: Hardware support for conversions. + setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); + setOperationAction(ISD::FP_ROUND, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); + } else { + // P8: Conversions via libcalls + setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::f64, Expand); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Expand); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f16, Expand); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Expand); + } + } + setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PowerPC has pre-inc load and store's. @@ -1675,6 +1737,12 @@ bool PPCTargetLowering::hasSPE() const { return Subtarget.hasSPE(); } +/// Tell the ABI lowering infrastructure to use FPRs for _Float16 parameters +/// and return values rather than GPRs. Active only when -mfloat16 is enabled. +bool PPCTargetLowering::useFPRegsForHalfType() const { + return Subtarget.hasFloat16() && Subtarget.hasHardFloat(); +} + bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const { return VT.isScalarInteger(); } @@ -4630,6 +4698,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( ArgOffset += 8; break; + case MVT::f16: case MVT::f32: case MVT::f64: // These can be scalar arguments or elements of a float array type @@ -6497,6 +6566,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( if (!IsFastCall) ArgOffset += PtrByteSize; break; + case MVT::f16: case MVT::f32: case MVT::f64: { // These can be scalar arguments or elements of a float array type @@ -6843,6 +6913,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, return false; } + case MVT::f16: case MVT::f32: case MVT::f64: { // Parameter save area (PSA) is reserved even if the float passes in fpr. @@ -6986,10 +7057,9 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, } // So far, this function is only used by LowerFormalArguments_AIX() -static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT, - bool IsPPC64, - bool HasP8Vector, - bool HasVSX) { +static const TargetRegisterClass * +getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, + bool HasVSX) { assert((IsPPC64 || SVT != MVT::i64) && "i64 should have been split for 32-bit codegen."); @@ -7000,6 +7070,10 @@ static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT, case MVT::i32: case MVT::i64: return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + case MVT::f16: + if (HasP8Vector) + return &PPC::VHFRCRegClass; + llvm_unreachable("f16 requires Power8 or later"); case MVT::f32: return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass; case MVT::f64: @@ -7144,8 +7218,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( continue; if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) { - const TargetRegisterClass *RegClass = getRegClassForSVT( - LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX()); + const TargetRegisterClass *RegClass = + getRegClassForSVT(LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), + Subtarget.hasVSX()); // On PPC64, debugger assumes extended 8-byte values are stored from GPR. MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT; const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass); @@ -7260,6 +7335,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( switch (VA.getValVT().SimpleTy) { default: report_fatal_error("Unhandled value type for argument."); + case MVT::f16: + FuncInfo->appendParameterType(PPCFunctionInfo::ShortFloatingPoint); + break; case MVT::f32: FuncInfo->appendParameterType(PPCFunctionInfo::ShortFloatingPoint); break; @@ -7367,10 +7445,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( if (VA.isRegLoc() && !VA.needsCustom()) { MVT::SimpleValueType SVT = ValVT.SimpleTy; - Register VReg = - MF.addLiveIn(VA.getLocReg(), - getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(), - Subtarget.hasVSX())); + Register VReg = MF.addLiveIn( + VA.getLocReg(), + getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(), + Subtarget.hasVSX())); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); if (ValVT.isScalarInteger() && (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 1778da5aba2fd..841aad46cffbd 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -214,6 +214,11 @@ namespace llvm { bool hasSPE() const; + // Override to tell the ABI framework that _Float16 passes through FPRs + // when -mfloat16 is active. Without this, the generic lowering would + // try to pass f16 values in integer registers. + bool useFPRegsForHalfType() const override; + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { return MVT::i32; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 1f6bccaf127b8..c8261703c2131 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -4063,6 +4063,85 @@ def : Pat<(i32 (fp_to_f16 f32:$A)), (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>; def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>; +// P9 has dedicated halfword VSX memory ops and scalar conversion instructions. +let Predicates = [HasP9Vector] in { + // Load halfword into VSX half container (no conversion on load). + def : Pat<(f16 (load ForceXForm:$src)), + (COPY_TO_REGCLASS (LXSIHZX ForceXForm:$src), VHFRC)>; + + // Store half container using STXSIHX (stores low 16 bits as halfword). + def : Pat<(store f16:$src, ForceXForm:$dst), + (STXSIHX (COPY_TO_REGCLASS $src, VHFRC), ForceXForm:$dst)>; + + // FP_ROUND to f16 container. + def : Pat<(f16 (fpround f64:$src)), + (COPY_TO_REGCLASS (XSCVDPHP $src), VHFRC)>; + + def : Pat<(f16 (fpround f32:$src)), + (COPY_TO_REGCLASS + (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), // treat as f64 container for the op + VHFRC)>; + + // FP_EXTEND from f16 container -> f64. + def : Pat<(f64 (fpextend f16:$src)), + (XSCVHPDP (COPY_TO_REGCLASS $src, VHFRC))>; + + // FP_EXTEND from f16 container -> f32. + def : Pat<(f32 (fpextend f16:$src)), + (COPY_TO_REGCLASS + (XSCVHPDP (COPY_TO_REGCLASS $src, VHFRC)), + VSSRC)>; + + // fabs, fneg, fcopysign: intentionally handled via bit manipulation on ALL + // hardware including P9+. The round-trip through f64 via XSCVHPDP/XSCVDPHP + // does not preserve NaN payloads (signalling NaN -> quiet NaN, payload bits + // corrupted). These operations must be pure bit moves. See llvm/llvm-project#97981. +} + +// P8 has no halfword VSX memory ops. Load/store goes through GPR roundtrip. +// These patterns also fire on P9 (P9 satisfies HasP8Vector), providing +// the fabs/fneg/fcopysign patterns for all hardware. +let Predicates = [HasP8Vector] in { + // Store f16: Move VSX half container -> GPR, then Store Halfword. + def : Pat<(store f16:$src, xoaddr:$dst), + (STHX (MFVSRWZ (COPY_TO_REGCLASS $src, VHFRC)), xoaddr:$dst)>; + def : Pat<(store f16:$src, iaddrX16:$dst), + (STH (MFVSRWZ (COPY_TO_REGCLASS $src, VHFRC)), iaddrX16:$dst)>; + + // Load f16: Load Halfword -> GPR, then Move GPR -> VSX half container. + def : Pat<(f16 (load xoaddr:$src)), + (COPY_TO_REGCLASS (MTVSRWZ (LHZX xoaddr:$src)), VHFRC)>; + def : Pat<(f16 (load iaddrX16:$src)), + (COPY_TO_REGCLASS (MTVSRWZ (LHZ iaddrX16:$src)), VHFRC)>; + + // fabs: clear sign bit (AND with 0x7FFF in the 16-bit lane). + def : Pat<(fabs f16:$src), + (COPY_TO_REGCLASS + (MTVSRWZ + (RLWINM (MFVSRWZ (COPY_TO_REGCLASS $src, VHFRC)), + 0, 17, 31)), + VHFRC)>; + + // fneg: flip sign bit (XOR with 0x8000). + def : Pat<(fneg f16:$src), + (COPY_TO_REGCLASS + (MTVSRWZ + (XORI (MFVSRWZ (COPY_TO_REGCLASS $src, VHFRC)), + 32768)), + VHFRC)>; + + // fcopysign: (mag & 0x7FFF) | (sign & 0x8000). + def : Pat<(fcopysign f16:$mag, f16:$sign), + (COPY_TO_REGCLASS + (MTVSRWZ + (OR + (RLWINM (MFVSRWZ (COPY_TO_REGCLASS $mag, VHFRC)), + 0, 17, 31), + (RLWINM (MFVSRWZ (COPY_TO_REGCLASS $sign, VHFRC)), + 0, 16, 16))), + VHFRC)>; +} + // Vector sign extensions def : Pat<(f64 (PPCVexts f64:$A, 1)), (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 90c7be4297935..1b2e78a0b658a 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -408,6 +408,13 @@ def SPILLTOVSRRC : PPCRegisterClass<[i64, f64], 64, // Register class for single precision scalars in VSX registers def VSSRC : PPCRegisterClass<[f32], 32, (add VSFRC)>; +// PowerPC has no f16 arithmetic instructions. F16 values must use VSX registers +// to utilize the VSX conversion instructions. Suppress pressure set generation +// to preserve VSSRC pressure index. +let GeneratePressureSet = 0 in { + def VHFRC : PPCRegisterClass<[f16], 16, (add VSFRC)>; +} + def CRBITRC : PPCRegisterClassWithSize<[i1], 32, (add CR2LT, CR2GT, CR2EQ, CR2UN, CR3LT, CR3GT, CR3EQ, CR3UN, diff --git a/llvm/test/CodeGen/PowerPC/f16-aix-psa.ll b/llvm/test/CodeGen/PowerPC/f16-aix-psa.ll new file mode 100644 index 0000000000000..c5dc260586913 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/f16-aix-psa.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix -mcpu=pwr9 \ +; RUN: -mattr=+float16,+vsx,+power9-vector < %s | FileCheck %s --check-prefix=P9-AIX-64 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix -mcpu=pwr9 \ +; RUN: -mattr=+float16,+vsx,+power9-vector < %s | FileCheck %s --check-prefix=P9-AIX-32 + +; 14th f16 argument goes to stack. +; The first 13 f16 args exhaust F1-F13. The 14th must come from the PSA. +; PSA starts at SP+48 on PPC64 AIX. With 8-byte slots, the 14th arg is at +; SP+48+(13*8)=SP+152. This confirms each preceding slot was 8 bytes wide. +define half @test_14th_arg( +; P9-AIX-64-LABEL: test_14th_arg: +; P9-AIX-64: # %bb.0: +; P9-AIX-64-NEXT: addi 3, 1, 152 +; P9-AIX-64-NEXT: lxsihzx 1, 0, 3 +; P9-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_14th_arg: +; P9-AIX-32: # %bb.0: +; P9-AIX-32-NEXT: addi 3, 1, 76 +; P9-AIX-32-NEXT: lxsihzx 1, 0, 3 +; P9-AIX-32-NEXT: blr + half %a0, half %a1, half %a2, half %a3, + half %a4, half %a5, half %a6, half %a7, + half %a8, half %a9, half %a10, half %a11, + half %a12, half %a13) { + ret half %a13 +} + +; Mixed f16 and i64 arguments. +; Verifies GPR shadow is consumed for each f16 argument. +; Expected register layout on PPC64 AIX: +; %f0 (half) -> F1, GPR shadow consumes X3 +; %i0 (i64) -> X4 +; %f1 (half) -> F2, GPR shadow consumes X5 +; %i1 (i64) -> X6 +; If GPR shadow is missing, %i1 would land in X4 instead of X6. +; The return value %i1 must come from X6 (mr 3, 6). +define i64 @test_mixed_args(half %f0, i64 %i0, half %f1, i64 %i1) { +; P9-AIX-64-LABEL: test_mixed_args: +; P9-AIX-64: # %bb.0: +; P9-AIX-64-NEXT: mr 3, 6 +; P9-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_mixed_args: +; P9-AIX-32: # %bb.0: +; P9-AIX-32-NEXT: mr 4, 8 +; P9-AIX-32-NEXT: mr 3, 7 +; P9-AIX-32-NEXT: blr + ret i64 %i1 +} + +; Vararg function receiving f16 arguments. +; Verifies GPR shadow slots are saved to PSA for vararg access. +; X4-X10 must be saved starting at SP+56 (immediately after %count slot). +; The first vararg f16 value must be loaded via lxsihzx from SP+56. +declare void @llvm.va_start(ptr) +declare void @llvm.va_end(ptr) + +define half @test_vararg_f16(i32 %count, ...) { +; P9-AIX-64-LABEL: test_vararg_f16: +; P9-AIX-64: # %bb.0: # %entry +; P9-AIX-64-NEXT: addi 3, 1, 58 +; P9-AIX-64-NEXT: std 4, 56(1) +; P9-AIX-64-NEXT: std 5, 64(1) +; P9-AIX-64-NEXT: std 6, 72(1) +; P9-AIX-64-NEXT: std 7, 80(1) +; P9-AIX-64-NEXT: std 8, 88(1) +; P9-AIX-64-NEXT: std 9, 96(1) +; P9-AIX-64-NEXT: std 10, 104(1) +; P9-AIX-64-NEXT: std 3, -8(1) +; P9-AIX-64-NEXT: addi 3, 1, 56 +; P9-AIX-64-NEXT: lxsihzx 1, 0, 3 +; P9-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_vararg_f16: +; P9-AIX-32: # %bb.0: # %entry +; P9-AIX-32-NEXT: addi 3, 1, 30 +; P9-AIX-32-NEXT: stw 4, 28(1) +; P9-AIX-32-NEXT: stw 5, 32(1) +; P9-AIX-32-NEXT: stw 6, 36(1) +; P9-AIX-32-NEXT: stw 7, 40(1) +; P9-AIX-32-NEXT: stw 8, 44(1) +; P9-AIX-32-NEXT: stw 9, 48(1) +; P9-AIX-32-NEXT: stw 10, 52(1) +; P9-AIX-32-NEXT: stw 3, -4(1) +; P9-AIX-32-NEXT: addi 3, 1, 28 +; P9-AIX-32-NEXT: lxsihzx 1, 0, 3 +; P9-AIX-32-NEXT: blr +entry: + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %val = va_arg ptr %va, half + call void @llvm.va_end(ptr %va) + ret half %val +} diff --git a/llvm/test/CodeGen/PowerPC/half-float16-ppc.ll b/llvm/test/CodeGen/PowerPC/half-float16-ppc.ll new file mode 100644 index 0000000000000..61ef84e5c9fd0 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/half-float16-ppc.ll @@ -0,0 +1,4198 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix -mcpu=pwr8 \ +; RUN: -mattr=+float16 < %s | FileCheck %s --check-prefix=P8-AIX-32 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix -mcpu=pwr8 \ +; RUN: -mattr=+float16 < %s | FileCheck %s --check-prefix=P8-AIX-64 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix -mcpu=pwr9 \ +; RUN: -mattr=+float16,+vsx,+power9-vector < %s | FileCheck %s --check-prefixes=P9,P9-AIX-32 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix -mcpu=pwr9 \ +; RUN: -mattr=+float16,+vsx,+power9-vector < %s | FileCheck %s --check-prefixes=P9,P9-AIX-64 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu -mcpu=pwr8 \ +; RUN: -mattr=+float16 < %s | FileCheck %s --check-prefix=P8-LINUX-64 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu -mcpu=pwr9 \ +; RUN: -mattr=+float16,+vsx,+power9-vector < %s | FileCheck %s --check-prefixes=P9,P9-LINUX-64 + +; ======================================================================================== +; Load/Store Operations +; ISD::LOAD, ISD::STORE +; ======================================================================================== + +define half @load_half(ptr %p) nounwind { +; P8-AIX-32-LABEL: load_half: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lhzx 3, 0, 3 +; P8-AIX-32-NEXT: mtfprwz 1, 3 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: load_half: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: lhzx 3, 0, 3 +; P8-AIX-64-NEXT: mtfprwz 1, 3 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: load_half: +; P9: # %bb.0: # %entry +; P9-NEXT: lxsihzx 1, 0, 3 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: load_half: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: lhzx 3, 0, 3 +; P8-LINUX-64-NEXT: mtfprwz 1, 3 +; P8-LINUX-64-NEXT: blr +entry: + %v = load half, ptr %p + ret half %v +} + +define void @store_half(ptr %p, half %v) nounwind { +; P8-AIX-32-LABEL: store_half: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mffprwz 4, 1 +; P8-AIX-32-NEXT: sthx 4, 0, 3 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: store_half: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mffprwz 4, 1 +; P8-AIX-64-NEXT: sthx 4, 0, 3 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: store_half: +; P9: # %bb.0: # %entry +; P9-NEXT: stxsihx 1, 0, 3 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: store_half: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mffprwz 4, 1 +; P8-LINUX-64-NEXT: sthx 4, 0, 3 +; P8-LINUX-64-NEXT: blr +entry: + store half %v, ptr %p + ret void +} + +; ======================================================================================== +; Basic Arithmetic Operations +; ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV +; ======================================================================================== + +define half @op_add(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: op_add: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xsaddsp 1, 1, 30 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_add: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xsaddsp 1, 1, 30 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_add: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 2 +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: xsaddsp 0, 1, 0 +; P9-NEXT: xscvdphp 1, 0 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_add: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xsaddsp 1, 1, 30 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %unpromotion = fadd half %a, %b + ret half %unpromotion +} + +define half @op_sub(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: op_sub: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xssubsp 1, 1, 30 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_sub: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xssubsp 1, 1, 30 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_sub: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 2 +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: xssubsp 0, 1, 0 +; P9-NEXT: xscvdphp 1, 0 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_sub: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xssubsp 1, 1, 30 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %unpromotion = fsub half %a, %b + ret half %unpromotion +} + +define half @op_mul(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: op_mul: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xsmulsp 1, 1, 30 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_mul: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xsmulsp 1, 1, 30 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_mul: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 2 +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: xsmulsp 0, 1, 0 +; P9-NEXT: xscvdphp 1, 0 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_mul: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xsmulsp 1, 1, 30 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %unpromotion = fmul half %a, %b + ret half %unpromotion +} + +define half @op_div(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: op_div: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xsdivsp 1, 1, 30 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_div: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xsdivsp 1, 1, 30 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_div: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 2 +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: xsdivsp 0, 1, 0 +; P9-NEXT: xscvdphp 1, 0 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_div: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xsdivsp 1, 1, 30 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %unpromotion = fdiv half %a, %b + ret half %unpromotion +} + +; ======================================================================================== +; Advanced Arithmetic Operations +; ISD::FMA (promoted to f64), ISD::FSQRT, ISD::FREM +; ======================================================================================== + +define half @test_fma(half %a, half %b, half %c) nounwind { +; P8-AIX-32-LABEL: test_fma: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 29, 56(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 30, 2 +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 3 +; P8-AIX-32-NEXT: bl .__extendhfdf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 29, 1 +; P8-AIX-32-NEXT: fmr 1, 30 +; P8-AIX-32-NEXT: bl .__extendhfdf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfdf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xsmaddadp 1, 29, 30 +; P8-AIX-32-NEXT: bl .__truncdfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 29, 56(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_fma: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -144(1) +; P8-AIX-64-NEXT: std 0, 160(1) +; P8-AIX-64-NEXT: stfd 29, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: stfd 30, 128(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 30, 2 +; P8-AIX-64-NEXT: stfd 31, 136(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 3 +; P8-AIX-64-NEXT: bl .__extendhfdf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 29, 1 +; P8-AIX-64-NEXT: fmr 1, 30 +; P8-AIX-64-NEXT: bl .__extendhfdf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfdf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xsmaddadp 1, 29, 30 +; P8-AIX-64-NEXT: bl .__truncdfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: lfd 31, 136(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 128(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 29, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: addi 1, 1, 144 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: test_fma: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 2 +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: xscvhpdp 2, 3 +; P9-NEXT: xsmaddadp 2, 1, 0 +; P9-NEXT: xscvdphp 1, 2 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: test_fma: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 29, -24(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -64(1) +; P8-LINUX-64-NEXT: std 0, 80(1) +; P8-LINUX-64-NEXT: fmr 31, 3 +; P8-LINUX-64-NEXT: fmr 30, 2 +; P8-LINUX-64-NEXT: bl __extendhfdf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 29, 1 +; P8-LINUX-64-NEXT: fmr 1, 30 +; P8-LINUX-64-NEXT: bl __extendhfdf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfdf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xsmaddadp 1, 29, 30 +; P8-LINUX-64-NEXT: bl __truncdfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 64 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: lfd 29, -24(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: blr +entry: + %r = call half @llvm.fma.f16(half %a, half %b, half %c) + ret half %r +} + +define half @test_sqrt(half %a) { +; P8-AIX-32-LABEL: test_sqrt: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xssqrtsp 1, 1 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_sqrt: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xssqrtsp 1, 1 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: test_sqrt: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 1 +; P9-NEXT: xssqrtsp 0, 0 +; P9-NEXT: xscvdphp 1, 0 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: test_sqrt: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: .cfi_def_cfa_offset 32 +; P8-LINUX-64-NEXT: .cfi_offset lr, 16 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xssqrtsp 1, 1 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %r = call half @llvm.sqrt.f16(half %a) + ret half %r +} + +define half @test_frem(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: test_frem: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 2, 30 +; P8-AIX-32-NEXT: bl .fmodf[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_frem: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 2, 30 +; P8-AIX-64-NEXT: bl .fmodf[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_frem: +; P9-AIX-32: # %bb.0: # %entry +; P9-AIX-32-NEXT: mflr 0 +; P9-AIX-32-NEXT: stwu 1, -64(1) +; P9-AIX-32-NEXT: stw 0, 72(1) +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: xscvhpdp 2, 2 +; P9-AIX-32-NEXT: bl .fmodf[PR] +; P9-AIX-32-NEXT: nop +; P9-AIX-32-NEXT: xscvdphp 1, 1 +; P9-AIX-32-NEXT: addi 1, 1, 64 +; P9-AIX-32-NEXT: lwz 0, 8(1) +; P9-AIX-32-NEXT: mtlr 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_frem: +; P9-AIX-64: # %bb.0: # %entry +; P9-AIX-64-NEXT: mflr 0 +; P9-AIX-64-NEXT: stdu 1, -112(1) +; P9-AIX-64-NEXT: std 0, 128(1) +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: xscvhpdp 2, 2 +; P9-AIX-64-NEXT: bl .fmodf[PR] +; P9-AIX-64-NEXT: nop +; P9-AIX-64-NEXT: xscvdphp 1, 1 +; P9-AIX-64-NEXT: addi 1, 1, 112 +; P9-AIX-64-NEXT: ld 0, 16(1) +; P9-AIX-64-NEXT: mtlr 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_frem: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 2, 30 +; P8-LINUX-64-NEXT: bl fmodf +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_frem: +; P9-LINUX-64: # %bb.0: # %entry +; P9-LINUX-64-NEXT: mflr 0 +; P9-LINUX-64-NEXT: stdu 1, -32(1) +; P9-LINUX-64-NEXT: std 0, 48(1) +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: xscvhpdp 2, 2 +; P9-LINUX-64-NEXT: bl fmodf +; P9-LINUX-64-NEXT: nop +; P9-LINUX-64-NEXT: xscvdphp 1, 1 +; P9-LINUX-64-NEXT: addi 1, 1, 32 +; P9-LINUX-64-NEXT: ld 0, 16(1) +; P9-LINUX-64-NEXT: mtlr 0 +; P9-LINUX-64-NEXT: blr +entry: + %r = frem half %a, %b + ret half %r +} + +; ======================================================================================== +; Comparison Operations +; ISD::SETCC +; ======================================================================================== + +define zeroext i1 @op_eq(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: op_eq: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fcmpu 0, 1, 30 +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: li 3, 0 +; P8-AIX-32-NEXT: li 4, 1 +; P8-AIX-32-NEXT: iseleq 3, 4, 3 +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_eq: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fcmpu 0, 1, 30 +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: li 3, 0 +; P8-AIX-64-NEXT: li 4, 1 +; P8-AIX-64-NEXT: iseleq 3, 4, 3 +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_eq: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 2 +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: li 3, 0 +; P9-NEXT: li 4, 1 +; P9-NEXT: fcmpu 0, 1, 0 +; P9-NEXT: iseleq 3, 4, 3 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_eq: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fcmpu 0, 1, 30 +; P8-LINUX-64-NEXT: li 3, 0 +; P8-LINUX-64-NEXT: li 4, 1 +; P8-LINUX-64-NEXT: iseleq 3, 4, 3 +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %cmp = fcmp oeq half %a, %b + ret i1 %cmp +} + +define zeroext i1 @op_ne(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: op_ne: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fcmpu 0, 1, 30 +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: li 3, 1 +; P8-AIX-32-NEXT: iseleq 3, 0, 3 +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_ne: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fcmpu 0, 1, 30 +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: li 3, 1 +; P8-AIX-64-NEXT: iseleq 3, 0, 3 +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_ne: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 2 +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: li 3, 1 +; P9-NEXT: fcmpu 0, 1, 0 +; P9-NEXT: iseleq 3, 0, 3 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_ne: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fcmpu 0, 1, 30 +; P8-LINUX-64-NEXT: li 3, 1 +; P8-LINUX-64-NEXT: iseleq 3, 0, 3 +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %cmp = fcmp une half %a, %b + ret i1 %cmp +} + +define zeroext i1 @op_lt(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: op_lt: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fcmpu 0, 1, 30 +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: li 3, 0 +; P8-AIX-32-NEXT: li 4, 1 +; P8-AIX-32-NEXT: isellt 3, 4, 3 +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_lt: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fcmpu 0, 1, 30 +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: li 3, 0 +; P8-AIX-64-NEXT: li 4, 1 +; P8-AIX-64-NEXT: isellt 3, 4, 3 +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_lt: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 2 +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: li 3, 0 +; P9-NEXT: li 4, 1 +; P9-NEXT: fcmpu 0, 1, 0 +; P9-NEXT: isellt 3, 4, 3 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_lt: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fcmpu 0, 1, 30 +; P8-LINUX-64-NEXT: li 3, 0 +; P8-LINUX-64-NEXT: li 4, 1 +; P8-LINUX-64-NEXT: isellt 3, 4, 3 +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %cmp = fcmp olt half %a, %b + ret i1 %cmp +} + +define zeroext i1 @op_le(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: op_le: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fcmpu 0, 1, 30 +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: li 3, 1 +; P8-AIX-32-NEXT: cror 20, 3, 1 +; P8-AIX-32-NEXT: isel 3, 0, 3, 20 +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_le: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fcmpu 0, 1, 30 +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: li 3, 1 +; P8-AIX-64-NEXT: cror 20, 3, 1 +; P8-AIX-64-NEXT: isel 3, 0, 3, 20 +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_le: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 2 +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: li 3, 1 +; P9-NEXT: fcmpu 0, 1, 0 +; P9-NEXT: cror 20, 3, 1 +; P9-NEXT: isel 3, 0, 3, 20 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_le: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fcmpu 0, 1, 30 +; P8-LINUX-64-NEXT: li 3, 1 +; P8-LINUX-64-NEXT: cror 20, 3, 1 +; P8-LINUX-64-NEXT: isel 3, 0, 3, 20 +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %cmp = fcmp ole half %a, %b + ret i1 %cmp +} + +define zeroext i1 @op_gt(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: op_gt: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fcmpu 0, 1, 30 +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: li 3, 0 +; P8-AIX-32-NEXT: li 4, 1 +; P8-AIX-32-NEXT: iselgt 3, 4, 3 +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_gt: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fcmpu 0, 1, 30 +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: li 3, 0 +; P8-AIX-64-NEXT: li 4, 1 +; P8-AIX-64-NEXT: iselgt 3, 4, 3 +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_gt: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 2 +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: li 3, 0 +; P9-NEXT: li 4, 1 +; P9-NEXT: fcmpu 0, 1, 0 +; P9-NEXT: iselgt 3, 4, 3 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_gt: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fcmpu 0, 1, 30 +; P8-LINUX-64-NEXT: li 3, 0 +; P8-LINUX-64-NEXT: li 4, 1 +; P8-LINUX-64-NEXT: iselgt 3, 4, 3 +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %cmp = fcmp ogt half %a, %b + ret i1 %cmp +} + +define zeroext i1 @op_ge(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: op_ge: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fcmpu 0, 1, 30 +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: li 3, 1 +; P8-AIX-32-NEXT: cror 20, 3, 0 +; P8-AIX-32-NEXT: isel 3, 0, 3, 20 +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_ge: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fcmpu 0, 1, 30 +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: li 3, 1 +; P8-AIX-64-NEXT: cror 20, 3, 0 +; P8-AIX-64-NEXT: isel 3, 0, 3, 20 +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_ge: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 2 +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: li 3, 1 +; P9-NEXT: fcmpu 0, 1, 0 +; P9-NEXT: cror 20, 3, 0 +; P9-NEXT: isel 3, 0, 3, 20 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_ge: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fcmpu 0, 1, 30 +; P8-LINUX-64-NEXT: li 3, 1 +; P8-LINUX-64-NEXT: cror 20, 3, 0 +; P8-LINUX-64-NEXT: isel 3, 0, 3, 20 +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %cmp = fcmp oge half %a, %b + ret i1 %cmp +} + +; ======================================================================================== +; Control Flow Operations +; ISD::SELECT, ISD::SELECT_CC +; ======================================================================================== + +define half @op_select(i1 %cond, half %a, half %b) nounwind { +; P8-AIX-32-LABEL: op_select: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mfcr 12 +; P8-AIX-32-NEXT: stw 12, 4(1) +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: andi. 3, 3, 1 +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: crmove 8, 1 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 30 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bc 12, 8, L..BB15_2 +; P8-AIX-32-NEXT: # %bb.1: # %entry +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: L..BB15_2: # %entry +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: lwz 12, 4(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: mtocrf 32, 12 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_select: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mfcr 12 +; P8-AIX-64-NEXT: stw 12, 8(1) +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: andi. 3, 3, 1 +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: crmove 8, 1 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 30 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bc 12, 8, L..BB15_2 +; P8-AIX-64-NEXT: # %bb.1: # %entry +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: L..BB15_2: # %entry +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: lwz 12, 8(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: mtocrf 32, 12 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: op_select: +; P9-AIX-32: # %bb.0: # %entry +; P9-AIX-32-NEXT: xscvhpdp 2, 2 +; P9-AIX-32-NEXT: xscvhpdp 0, 1 +; P9-AIX-32-NEXT: andi. 3, 3, 1 +; P9-AIX-32-NEXT: bc 12, 1, L..BB15_2 +; P9-AIX-32-NEXT: # %bb.1: # %entry +; P9-AIX-32-NEXT: fmr 0, 2 +; P9-AIX-32-NEXT: L..BB15_2: # %entry +; P9-AIX-32-NEXT: xscvdphp 1, 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: op_select: +; P9-AIX-64: # %bb.0: # %entry +; P9-AIX-64-NEXT: xscvhpdp 2, 2 +; P9-AIX-64-NEXT: xscvhpdp 0, 1 +; P9-AIX-64-NEXT: andi. 3, 3, 1 +; P9-AIX-64-NEXT: bc 12, 1, L..BB15_2 +; P9-AIX-64-NEXT: # %bb.1: # %entry +; P9-AIX-64-NEXT: fmr 0, 2 +; P9-AIX-64-NEXT: L..BB15_2: # %entry +; P9-AIX-64-NEXT: xscvdphp 1, 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: op_select: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mfocrf 12, 32 +; P8-LINUX-64-NEXT: stw 12, 8(1) +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: stfd 30, 32(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: andi. 3, 3, 1 +; P8-LINUX-64-NEXT: stfd 31, 40(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: crmove 8, 1 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 30 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bc 12, 8, .LBB15_2 +; P8-LINUX-64-NEXT: # %bb.1: # %entry +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: .LBB15_2: # %entry +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: lfd 31, 40(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, 32(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lwz 12, 8(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: mtocrf 32, 12 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: op_select: +; P9-LINUX-64: # %bb.0: # %entry +; P9-LINUX-64-NEXT: xscvhpdp 2, 2 +; P9-LINUX-64-NEXT: xscvhpdp 0, 1 +; P9-LINUX-64-NEXT: andi. 3, 3, 1 +; P9-LINUX-64-NEXT: bc 12, 1, .LBB15_2 +; P9-LINUX-64-NEXT: # %bb.1: # %entry +; P9-LINUX-64-NEXT: fmr 0, 2 +; P9-LINUX-64-NEXT: .LBB15_2: # %entry +; P9-LINUX-64-NEXT: xscvdphp 1, 0 +; P9-LINUX-64-NEXT: blr +entry: + %r = select i1 %cond, half %a, half %b + ret half %r +} + +define half @op_select_cc(half %a, half %b, half %c, half %d) nounwind { +; P8-AIX-32-LABEL: op_select_cc: +; P8-AIX-32: # %bb.0: +; P8-AIX-32-NEXT: mfcr 12 +; P8-AIX-32-NEXT: stw 12, 4(1) +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -96(1) +; P8-AIX-32-NEXT: stw 0, 104(1) +; P8-AIX-32-NEXT: stfd 29, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 29, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 28, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: stfd 30, 80(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 30, 3 +; P8-AIX-32-NEXT: stfd 31, 88(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 4 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 28, 1 +; P8-AIX-32-NEXT: fmr 1, 29 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fcmpu 2, 1, 28 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 30 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bc 12, 8, L..BB16_2 +; P8-AIX-32-NEXT: # %bb.1: +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: L..BB16_2: +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: lfd 31, 88(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 80(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 29, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 28, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: addi 1, 1, 96 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: lwz 12, 4(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: mtocrf 32, 12 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_select_cc: +; P8-AIX-64: # %bb.0: +; P8-AIX-64-NEXT: mfcr 12 +; P8-AIX-64-NEXT: stw 12, 8(1) +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -144(1) +; P8-AIX-64-NEXT: std 0, 160(1) +; P8-AIX-64-NEXT: stfd 29, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 29, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 28, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: stfd 30, 128(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 30, 3 +; P8-AIX-64-NEXT: stfd 31, 136(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 4 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 28, 1 +; P8-AIX-64-NEXT: fmr 1, 29 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fcmpu 2, 1, 28 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 30 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bc 12, 8, L..BB16_2 +; P8-AIX-64-NEXT: # %bb.1: +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: L..BB16_2: +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: lfd 31, 136(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 128(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 29, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 28, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: addi 1, 1, 144 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: lwz 12, 8(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: mtocrf 32, 12 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: op_select_cc: +; P9-AIX-32: # %bb.0: +; P9-AIX-32-NEXT: xscvhpdp 0, 2 +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: fcmpu 0, 1, 0 +; P9-AIX-32-NEXT: xscvhpdp 1, 4 +; P9-AIX-32-NEXT: xscvhpdp 0, 3 +; P9-AIX-32-NEXT: bc 12, 0, L..BB16_2 +; P9-AIX-32-NEXT: # %bb.1: +; P9-AIX-32-NEXT: fmr 0, 1 +; P9-AIX-32-NEXT: L..BB16_2: +; P9-AIX-32-NEXT: xscvdphp 1, 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: op_select_cc: +; P9-AIX-64: # %bb.0: +; P9-AIX-64-NEXT: xscvhpdp 0, 2 +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: fcmpu 0, 1, 0 +; P9-AIX-64-NEXT: xscvhpdp 1, 4 +; P9-AIX-64-NEXT: xscvhpdp 0, 3 +; P9-AIX-64-NEXT: bc 12, 0, L..BB16_2 +; P9-AIX-64-NEXT: # %bb.1: +; P9-AIX-64-NEXT: fmr 0, 1 +; P9-AIX-64-NEXT: L..BB16_2: +; P9-AIX-64-NEXT: xscvdphp 1, 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: op_select_cc: +; P8-LINUX-64: # %bb.0: +; P8-LINUX-64-NEXT: mfocrf 12, 32 +; P8-LINUX-64-NEXT: stw 12, 8(1) +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -64(1) +; P8-LINUX-64-NEXT: std 0, 80(1) +; P8-LINUX-64-NEXT: stfd 29, 40(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: fmr 29, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: stfd 28, 32(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 30, 48(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: fmr 30, 3 +; P8-LINUX-64-NEXT: stfd 31, 56(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: fmr 31, 4 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 28, 1 +; P8-LINUX-64-NEXT: fmr 1, 29 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fcmpu 2, 1, 28 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 30 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bc 12, 8, .LBB16_2 +; P8-LINUX-64-NEXT: # %bb.1: +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: .LBB16_2: +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: lfd 31, 56(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 29, 40(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 28, 32(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: addi 1, 1, 64 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lwz 12, 8(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: mtocrf 32, 12 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: op_select_cc: +; P9-LINUX-64: # %bb.0: +; P9-LINUX-64-NEXT: xscvhpdp 0, 2 +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: fcmpu 0, 1, 0 +; P9-LINUX-64-NEXT: xscvhpdp 1, 4 +; P9-LINUX-64-NEXT: xscvhpdp 0, 3 +; P9-LINUX-64-NEXT: bc 12, 0, .LBB16_2 +; P9-LINUX-64-NEXT: # %bb.1: +; P9-LINUX-64-NEXT: fmr 0, 1 +; P9-LINUX-64-NEXT: .LBB16_2: +; P9-LINUX-64-NEXT: xscvdphp 1, 0 +; P9-LINUX-64-NEXT: blr + %cmp = fcmp olt half %a, %b + %r = select i1 %cmp, half %c, half %d + ret half %r +} + +; ======================================================================================== +; Min/Max Operations +; ISD::FMINNUM/FMAXNUM (non-NaN), ISD::FMINIMUM/FMAXIMUM (IEEE 754-2019, NaN-propagating) +; ======================================================================================== + +define half @op_fmin(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: op_fmin: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xsmindp 1, 1, 30 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_fmin: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xsmindp 1, 1, 30 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_fmin: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 2 +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: xsmindp 0, 1, 0 +; P9-NEXT: xscvdphp 1, 0 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_fmin: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xsmindp 1, 1, 30 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %0 = tail call half @llvm.minnum.f16(half %a, half %b) + ret half %0 +} + +define half @op_fmax(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: op_fmax: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xsmaxdp 1, 1, 30 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_fmax: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xsmaxdp 1, 1, 30 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_fmax: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 2 +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: xsmaxdp 0, 1, 0 +; P9-NEXT: xscvdphp 1, 0 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_fmax: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xsmaxdp 1, 1, 30 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %0 = tail call half @llvm.maxnum.f16(half %a, half %b) + ret half %0 +} + +define half @test_minimum(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: test_minimum: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 30 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fcmpu 0, 1, 31 +; P8-AIX-32-NEXT: bc 12, 3, L..BB19_2 +; P8-AIX-32-NEXT: # %bb.1: # %entry +; P8-AIX-32-NEXT: xsmindp 1, 1, 31 +; P8-AIX-32-NEXT: b L..BB19_3 +; P8-AIX-32-NEXT: L..BB19_2: +; P8-AIX-32-NEXT: lwz 3, L..C0(2) # %const.0 +; P8-AIX-32-NEXT: lfs 1, 0(3) +; P8-AIX-32-NEXT: L..BB19_3: # %entry +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_minimum: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 30 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fcmpu 0, 1, 31 +; P8-AIX-64-NEXT: bc 12, 3, L..BB19_2 +; P8-AIX-64-NEXT: # %bb.1: # %entry +; P8-AIX-64-NEXT: xsmindp 1, 1, 31 +; P8-AIX-64-NEXT: b L..BB19_3 +; P8-AIX-64-NEXT: L..BB19_2: +; P8-AIX-64-NEXT: ld 3, L..C0(2) # %const.0 +; P8-AIX-64-NEXT: lfs 1, 0(3) +; P8-AIX-64-NEXT: L..BB19_3: # %entry +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_minimum: +; P9-AIX-32: # %bb.0: # %entry +; P9-AIX-32-NEXT: xscvhpdp 0, 2 +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: fcmpu 0, 1, 0 +; P9-AIX-32-NEXT: bc 12, 3, L..BB19_2 +; P9-AIX-32-NEXT: # %bb.1: # %entry +; P9-AIX-32-NEXT: xsmindp 0, 1, 0 +; P9-AIX-32-NEXT: xscvdphp 1, 0 +; P9-AIX-32-NEXT: blr +; P9-AIX-32-NEXT: L..BB19_2: +; P9-AIX-32-NEXT: lwz 3, L..C0(2) # %const.0 +; P9-AIX-32-NEXT: lfs 0, 0(3) +; P9-AIX-32-NEXT: xscvdphp 1, 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_minimum: +; P9-AIX-64: # %bb.0: # %entry +; P9-AIX-64-NEXT: xscvhpdp 0, 2 +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: fcmpu 0, 1, 0 +; P9-AIX-64-NEXT: bc 12, 3, L..BB19_2 +; P9-AIX-64-NEXT: # %bb.1: # %entry +; P9-AIX-64-NEXT: xsmindp 0, 1, 0 +; P9-AIX-64-NEXT: xscvdphp 1, 0 +; P9-AIX-64-NEXT: blr +; P9-AIX-64-NEXT: L..BB19_2: +; P9-AIX-64-NEXT: ld 3, L..C0(2) # %const.0 +; P9-AIX-64-NEXT: lfs 0, 0(3) +; P9-AIX-64-NEXT: xscvdphp 1, 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_minimum: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 30 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fcmpu 0, 1, 31 +; P8-LINUX-64-NEXT: bc 12, 3, .LBB19_2 +; P8-LINUX-64-NEXT: # %bb.1: # %entry +; P8-LINUX-64-NEXT: xsmindp 1, 1, 31 +; P8-LINUX-64-NEXT: b .LBB19_3 +; P8-LINUX-64-NEXT: .LBB19_2: +; P8-LINUX-64-NEXT: addis 3, 2, .LCPI19_0@toc@ha +; P8-LINUX-64-NEXT: lfs 1, .LCPI19_0@toc@l(3) +; P8-LINUX-64-NEXT: .LBB19_3: # %entry +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_minimum: +; P9-LINUX-64: # %bb.0: # %entry +; P9-LINUX-64-NEXT: xscvhpdp 0, 2 +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: fcmpu 0, 1, 0 +; P9-LINUX-64-NEXT: bc 12, 3, .LBB19_2 +; P9-LINUX-64-NEXT: # %bb.1: # %entry +; P9-LINUX-64-NEXT: xsmindp 0, 1, 0 +; P9-LINUX-64-NEXT: xscvdphp 1, 0 +; P9-LINUX-64-NEXT: blr +; P9-LINUX-64-NEXT: .LBB19_2: +; P9-LINUX-64-NEXT: addis 3, 2, .LCPI19_0@toc@ha +; P9-LINUX-64-NEXT: lfs 0, .LCPI19_0@toc@l(3) +; P9-LINUX-64-NEXT: xscvdphp 1, 0 +; P9-LINUX-64-NEXT: blr +entry: + %r = call half @llvm.minimum.f16(half %a, half %b) + ret half %r +} + +define half @test_maximum(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: test_maximum: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 30 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fcmpu 0, 1, 31 +; P8-AIX-32-NEXT: bc 12, 3, L..BB20_2 +; P8-AIX-32-NEXT: # %bb.1: # %entry +; P8-AIX-32-NEXT: xsmaxdp 1, 1, 31 +; P8-AIX-32-NEXT: b L..BB20_3 +; P8-AIX-32-NEXT: L..BB20_2: +; P8-AIX-32-NEXT: lwz 3, L..C1(2) # %const.0 +; P8-AIX-32-NEXT: lfs 1, 0(3) +; P8-AIX-32-NEXT: L..BB20_3: # %entry +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_maximum: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 30 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fcmpu 0, 1, 31 +; P8-AIX-64-NEXT: bc 12, 3, L..BB20_2 +; P8-AIX-64-NEXT: # %bb.1: # %entry +; P8-AIX-64-NEXT: xsmaxdp 1, 1, 31 +; P8-AIX-64-NEXT: b L..BB20_3 +; P8-AIX-64-NEXT: L..BB20_2: +; P8-AIX-64-NEXT: ld 3, L..C1(2) # %const.0 +; P8-AIX-64-NEXT: lfs 1, 0(3) +; P8-AIX-64-NEXT: L..BB20_3: # %entry +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_maximum: +; P9-AIX-32: # %bb.0: # %entry +; P9-AIX-32-NEXT: xscvhpdp 0, 2 +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: fcmpu 0, 1, 0 +; P9-AIX-32-NEXT: bc 12, 3, L..BB20_2 +; P9-AIX-32-NEXT: # %bb.1: # %entry +; P9-AIX-32-NEXT: xsmaxdp 0, 1, 0 +; P9-AIX-32-NEXT: xscvdphp 1, 0 +; P9-AIX-32-NEXT: blr +; P9-AIX-32-NEXT: L..BB20_2: +; P9-AIX-32-NEXT: lwz 3, L..C1(2) # %const.0 +; P9-AIX-32-NEXT: lfs 0, 0(3) +; P9-AIX-32-NEXT: xscvdphp 1, 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_maximum: +; P9-AIX-64: # %bb.0: # %entry +; P9-AIX-64-NEXT: xscvhpdp 0, 2 +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: fcmpu 0, 1, 0 +; P9-AIX-64-NEXT: bc 12, 3, L..BB20_2 +; P9-AIX-64-NEXT: # %bb.1: # %entry +; P9-AIX-64-NEXT: xsmaxdp 0, 1, 0 +; P9-AIX-64-NEXT: xscvdphp 1, 0 +; P9-AIX-64-NEXT: blr +; P9-AIX-64-NEXT: L..BB20_2: +; P9-AIX-64-NEXT: ld 3, L..C1(2) # %const.0 +; P9-AIX-64-NEXT: lfs 0, 0(3) +; P9-AIX-64-NEXT: xscvdphp 1, 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_maximum: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 30 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fcmpu 0, 1, 31 +; P8-LINUX-64-NEXT: bc 12, 3, .LBB20_2 +; P8-LINUX-64-NEXT: # %bb.1: # %entry +; P8-LINUX-64-NEXT: xsmaxdp 1, 1, 31 +; P8-LINUX-64-NEXT: b .LBB20_3 +; P8-LINUX-64-NEXT: .LBB20_2: +; P8-LINUX-64-NEXT: addis 3, 2, .LCPI20_0@toc@ha +; P8-LINUX-64-NEXT: lfs 1, .LCPI20_0@toc@l(3) +; P8-LINUX-64-NEXT: .LBB20_3: # %entry +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_maximum: +; P9-LINUX-64: # %bb.0: # %entry +; P9-LINUX-64-NEXT: xscvhpdp 0, 2 +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: fcmpu 0, 1, 0 +; P9-LINUX-64-NEXT: bc 12, 3, .LBB20_2 +; P9-LINUX-64-NEXT: # %bb.1: # %entry +; P9-LINUX-64-NEXT: xsmaxdp 0, 1, 0 +; P9-LINUX-64-NEXT: xscvdphp 1, 0 +; P9-LINUX-64-NEXT: blr +; P9-LINUX-64-NEXT: .LBB20_2: +; P9-LINUX-64-NEXT: addis 3, 2, .LCPI20_0@toc@ha +; P9-LINUX-64-NEXT: lfs 0, .LCPI20_0@toc@l(3) +; P9-LINUX-64-NEXT: xscvdphp 1, 0 +; P9-LINUX-64-NEXT: blr +entry: + %r = call half @llvm.maximum.f16(half %a, half %b) + ret half %r +} + +; ======================================================================================== +; Rounding Operations +; ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FROUND, ISD::FRINT, ISD::FNEARBYINT, ISD::FROUNDEVEN +; ======================================================================================== + +define half @op_ceil(half %a) nounwind { +; P8-AIX-32-LABEL: op_ceil: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xsrdpip 1, 1 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_ceil: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xsrdpip 1, 1 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_ceil: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 1 +; P9-NEXT: xsrdpip 0, 0 +; P9-NEXT: xscvdphp 1, 0 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_ceil: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xsrdpip 1, 1 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %0 = tail call half @llvm.ceil.f16(half %a) + ret half %0 +} + +define half @op_floor(half %a) nounwind { +; P8-AIX-32-LABEL: op_floor: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xsrdpim 1, 1 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_floor: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xsrdpim 1, 1 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_floor: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 1 +; P9-NEXT: xsrdpim 0, 0 +; P9-NEXT: xscvdphp 1, 0 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_floor: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xsrdpim 1, 1 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %0 = tail call half @llvm.floor.f16(half %a) + ret half %0 +} + +define half @op_trunc(half %a) nounwind { +; P8-AIX-32-LABEL: op_trunc: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xsrdpiz 1, 1 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_trunc: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xsrdpiz 1, 1 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_trunc: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 1 +; P9-NEXT: xsrdpiz 0, 0 +; P9-NEXT: xscvdphp 1, 0 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_trunc: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xsrdpiz 1, 1 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %0 = tail call half @llvm.trunc.f16(half %a) + ret half %0 +} + +define half @op_round(half %a) nounwind { +; P8-AIX-32-LABEL: op_round: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xsrdpi 1, 1 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_round: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xsrdpi 1, 1 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_round: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 1 +; P9-NEXT: xsrdpi 0, 0 +; P9-NEXT: xscvdphp 1, 0 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_round: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xsrdpi 1, 1 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %0 = tail call half @llvm.round.f16(half %a) + ret half %0 +} + +define half @op_rint(half %a) nounwind { +; P8-AIX-32-LABEL: op_rint: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xsrdpic 1, 1 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_rint: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xsrdpic 1, 1 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_rint: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 1 +; P9-NEXT: xsrdpic 0, 0 +; P9-NEXT: xscvdphp 1, 0 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_rint: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xsrdpic 1, 1 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %0 = tail call half @llvm.rint.f16(half %a) + ret half %0 +} + +define half @op_nearbyint(half %a) nounwind { +; P8-AIX-32-LABEL: op_nearbyint: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .nearbyintf[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_nearbyint: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .nearbyintf[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: op_nearbyint: +; P9-AIX-32: # %bb.0: # %entry +; P9-AIX-32-NEXT: mflr 0 +; P9-AIX-32-NEXT: stwu 1, -64(1) +; P9-AIX-32-NEXT: stw 0, 72(1) +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: bl .nearbyintf[PR] +; P9-AIX-32-NEXT: nop +; P9-AIX-32-NEXT: xscvdphp 1, 1 +; P9-AIX-32-NEXT: addi 1, 1, 64 +; P9-AIX-32-NEXT: lwz 0, 8(1) +; P9-AIX-32-NEXT: mtlr 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: op_nearbyint: +; P9-AIX-64: # %bb.0: # %entry +; P9-AIX-64-NEXT: mflr 0 +; P9-AIX-64-NEXT: stdu 1, -112(1) +; P9-AIX-64-NEXT: std 0, 128(1) +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: bl .nearbyintf[PR] +; P9-AIX-64-NEXT: nop +; P9-AIX-64-NEXT: xscvdphp 1, 1 +; P9-AIX-64-NEXT: addi 1, 1, 112 +; P9-AIX-64-NEXT: ld 0, 16(1) +; P9-AIX-64-NEXT: mtlr 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: op_nearbyint: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl nearbyintf +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: op_nearbyint: +; P9-LINUX-64: # %bb.0: # %entry +; P9-LINUX-64-NEXT: mflr 0 +; P9-LINUX-64-NEXT: stdu 1, -32(1) +; P9-LINUX-64-NEXT: std 0, 48(1) +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: bl nearbyintf +; P9-LINUX-64-NEXT: nop +; P9-LINUX-64-NEXT: xscvdphp 1, 1 +; P9-LINUX-64-NEXT: addi 1, 1, 32 +; P9-LINUX-64-NEXT: ld 0, 16(1) +; P9-LINUX-64-NEXT: mtlr 0 +; P9-LINUX-64-NEXT: blr +entry: + %0 = tail call half @llvm.nearbyint.f16(half %a) + ret half %0 +} + +define half @test_roundeven(half %a) nounwind { +; P8-AIX-32-LABEL: test_roundeven: +; P8-AIX-32: # %bb.0: +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .roundevenf[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_roundeven: +; P8-AIX-64: # %bb.0: +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .roundevenf[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_roundeven: +; P9-AIX-32: # %bb.0: +; P9-AIX-32-NEXT: mflr 0 +; P9-AIX-32-NEXT: stwu 1, -64(1) +; P9-AIX-32-NEXT: stw 0, 72(1) +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: bl .roundevenf[PR] +; P9-AIX-32-NEXT: nop +; P9-AIX-32-NEXT: xscvdphp 1, 1 +; P9-AIX-32-NEXT: addi 1, 1, 64 +; P9-AIX-32-NEXT: lwz 0, 8(1) +; P9-AIX-32-NEXT: mtlr 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_roundeven: +; P9-AIX-64: # %bb.0: +; P9-AIX-64-NEXT: mflr 0 +; P9-AIX-64-NEXT: stdu 1, -112(1) +; P9-AIX-64-NEXT: std 0, 128(1) +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: bl .roundevenf[PR] +; P9-AIX-64-NEXT: nop +; P9-AIX-64-NEXT: xscvdphp 1, 1 +; P9-AIX-64-NEXT: addi 1, 1, 112 +; P9-AIX-64-NEXT: ld 0, 16(1) +; P9-AIX-64-NEXT: mtlr 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_roundeven: +; P8-LINUX-64: # %bb.0: +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl roundevenf +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_roundeven: +; P9-LINUX-64: # %bb.0: +; P9-LINUX-64-NEXT: mflr 0 +; P9-LINUX-64-NEXT: stdu 1, -32(1) +; P9-LINUX-64-NEXT: std 0, 48(1) +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: bl roundevenf +; P9-LINUX-64-NEXT: nop +; P9-LINUX-64-NEXT: xscvdphp 1, 1 +; P9-LINUX-64-NEXT: addi 1, 1, 32 +; P9-LINUX-64-NEXT: ld 0, 16(1) +; P9-LINUX-64-NEXT: mtlr 0 +; P9-LINUX-64-NEXT: blr + %r = call half @llvm.roundeven.f16(half %a) + ret half %r +} + +; ======================================================================================== +; Bit Manipulation Operations +; ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN +; ======================================================================================== + +define half @test_fabs(half %a) nounwind { +; P8-AIX-32-LABEL: test_fabs: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mffprwz 3, 1 +; P8-AIX-32-NEXT: clrlwi 3, 3, 17 +; P8-AIX-32-NEXT: mtfprwz 1, 3 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_fabs: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mffprwz 3, 1 +; P8-AIX-64-NEXT: clrlwi 3, 3, 17 +; P8-AIX-64-NEXT: mtfprwz 1, 3 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: test_fabs: +; P9: # %bb.0: # %entry +; P9-NEXT: mffprwz 3, 1 +; P9-NEXT: clrlwi 3, 3, 17 +; P9-NEXT: mtfprwz 1, 3 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: test_fabs: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mffprwz 3, 1 +; P8-LINUX-64-NEXT: clrlwi 3, 3, 17 +; P8-LINUX-64-NEXT: mtfprwz 1, 3 +; P8-LINUX-64-NEXT: blr +entry: + %r = call half @llvm.fabs.f16(half %a) + ret half %r +} + +define half @test_fneg(half %a) nounwind { +; P8-AIX-32-LABEL: test_fneg: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mffprwz 3, 1 +; P8-AIX-32-NEXT: xori 3, 3, 32768 +; P8-AIX-32-NEXT: mtfprwz 1, 3 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_fneg: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mffprwz 3, 1 +; P8-AIX-64-NEXT: xori 3, 3, 32768 +; P8-AIX-64-NEXT: mtfprwz 1, 3 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: test_fneg: +; P9: # %bb.0: # %entry +; P9-NEXT: mffprwz 3, 1 +; P9-NEXT: xori 3, 3, 32768 +; P9-NEXT: mtfprwz 1, 3 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: test_fneg: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mffprwz 3, 1 +; P8-LINUX-64-NEXT: xori 3, 3, 32768 +; P8-LINUX-64-NEXT: mtfprwz 1, 3 +; P8-LINUX-64-NEXT: blr +entry: + %r = fneg half %a + ret half %r +} + +define half @test_fcopysign(half %mag, half %sign) nounwind { +; P8-AIX-32-LABEL: test_fcopysign: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mffprwz 3, 2 +; P8-AIX-32-NEXT: mffprwz 4, 1 +; P8-AIX-32-NEXT: rlwinm 3, 3, 0, 16, 16 +; P8-AIX-32-NEXT: clrlwi 4, 4, 17 +; P8-AIX-32-NEXT: or 3, 4, 3 +; P8-AIX-32-NEXT: mtfprwz 1, 3 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_fcopysign: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mffprwz 3, 2 +; P8-AIX-64-NEXT: mffprwz 4, 1 +; P8-AIX-64-NEXT: rlwinm 3, 3, 0, 16, 16 +; P8-AIX-64-NEXT: clrlwi 4, 4, 17 +; P8-AIX-64-NEXT: or 3, 4, 3 +; P8-AIX-64-NEXT: mtfprwz 1, 3 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: test_fcopysign: +; P9: # %bb.0: # %entry +; P9-NEXT: mffprwz 3, 2 +; P9-NEXT: mffprwz 4, 1 +; P9-NEXT: rlwinm 3, 3, 0, 16, 16 +; P9-NEXT: clrlwi 4, 4, 17 +; P9-NEXT: or 3, 4, 3 +; P9-NEXT: mtfprwz 1, 3 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: test_fcopysign: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mffprwz 3, 2 +; P8-LINUX-64-NEXT: mffprwz 4, 1 +; P8-LINUX-64-NEXT: rlwinm 3, 3, 0, 16, 16 +; P8-LINUX-64-NEXT: clrlwi 4, 4, 17 +; P8-LINUX-64-NEXT: or 3, 4, 3 +; P8-LINUX-64-NEXT: mtfprwz 1, 3 +; P8-LINUX-64-NEXT: blr +entry: + %r = call half @llvm.copysign.f16(half %mag, half %sign) + ret half %r +} + +; ======================================================================================== +; Special Operations +; ISD::FCANONICALIZE - Canonicalize NaN representation +; ======================================================================================== + +define half @test_canonicalize(half %a) nounwind { +; P8-AIX-32-LABEL: test_canonicalize: +; P8-AIX-32: # %bb.0: +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xsmaxdp 1, 1, 1 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_canonicalize: +; P8-AIX-64: # %bb.0: +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xsmaxdp 1, 1, 1 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: test_canonicalize: +; P9: # %bb.0: +; P9-NEXT: xscvhpdp 0, 1 +; P9-NEXT: xsmaxdp 0, 0, 0 +; P9-NEXT: xscvdphp 1, 0 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: test_canonicalize: +; P8-LINUX-64: # %bb.0: +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xsmaxdp 1, 1, 1 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr + %r = call half @llvm.canonicalize.f16(half %a) + ret half %r +} + +; ======================================================================================== +; Transcendental Functions - Power +; ISD::FPOW , calls libm powf() +; ======================================================================================== + +define half @test_pow(half %a, half %b) nounwind { +; P8-AIX-32-LABEL: test_pow: +; P8-AIX-32: # %bb.0: +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 2, 30 +; P8-AIX-32-NEXT: bl .powf[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_pow: +; P8-AIX-64: # %bb.0: +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -128(1) +; P8-AIX-64-NEXT: std 0, 144(1) +; P8-AIX-64-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 2, 30 +; P8-AIX-64-NEXT: bl .powf[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: addi 1, 1, 128 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_pow: +; P9-AIX-32: # %bb.0: +; P9-AIX-32-NEXT: mflr 0 +; P9-AIX-32-NEXT: stwu 1, -64(1) +; P9-AIX-32-NEXT: stw 0, 72(1) +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: xscvhpdp 2, 2 +; P9-AIX-32-NEXT: bl .powf[PR] +; P9-AIX-32-NEXT: nop +; P9-AIX-32-NEXT: xscvdphp 1, 1 +; P9-AIX-32-NEXT: addi 1, 1, 64 +; P9-AIX-32-NEXT: lwz 0, 8(1) +; P9-AIX-32-NEXT: mtlr 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_pow: +; P9-AIX-64: # %bb.0: +; P9-AIX-64-NEXT: mflr 0 +; P9-AIX-64-NEXT: stdu 1, -112(1) +; P9-AIX-64-NEXT: std 0, 128(1) +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: xscvhpdp 2, 2 +; P9-AIX-64-NEXT: bl .powf[PR] +; P9-AIX-64-NEXT: nop +; P9-AIX-64-NEXT: xscvdphp 1, 1 +; P9-AIX-64-NEXT: addi 1, 1, 112 +; P9-AIX-64-NEXT: ld 0, 16(1) +; P9-AIX-64-NEXT: mtlr 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_pow: +; P8-LINUX-64: # %bb.0: +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -48(1) +; P8-LINUX-64-NEXT: fmr 31, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 64(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 2, 30 +; P8-LINUX-64-NEXT: bl powf +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 48 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_pow: +; P9-LINUX-64: # %bb.0: +; P9-LINUX-64-NEXT: mflr 0 +; P9-LINUX-64-NEXT: stdu 1, -32(1) +; P9-LINUX-64-NEXT: std 0, 48(1) +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: xscvhpdp 2, 2 +; P9-LINUX-64-NEXT: bl powf +; P9-LINUX-64-NEXT: nop +; P9-LINUX-64-NEXT: xscvdphp 1, 1 +; P9-LINUX-64-NEXT: addi 1, 1, 32 +; P9-LINUX-64-NEXT: ld 0, 16(1) +; P9-LINUX-64-NEXT: mtlr 0 +; P9-LINUX-64-NEXT: blr + %r = call half @llvm.pow.f16(half %a, half %b) + ret half %r +} + +; ======================================================================================== +; Transcendental Functions - Logarithms +; ISD::FLOG, ISD::FLOG2, ISD::FLOG10 , calls libm +; ======================================================================================== + +define half @test_log(half %a) nounwind { +; P8-AIX-32-LABEL: test_log: +; P8-AIX-32: # %bb.0: +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .logf[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_log: +; P8-AIX-64: # %bb.0: +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .logf[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_log: +; P9-AIX-32: # %bb.0: +; P9-AIX-32-NEXT: mflr 0 +; P9-AIX-32-NEXT: stwu 1, -64(1) +; P9-AIX-32-NEXT: stw 0, 72(1) +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: bl .logf[PR] +; P9-AIX-32-NEXT: nop +; P9-AIX-32-NEXT: xscvdphp 1, 1 +; P9-AIX-32-NEXT: addi 1, 1, 64 +; P9-AIX-32-NEXT: lwz 0, 8(1) +; P9-AIX-32-NEXT: mtlr 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_log: +; P9-AIX-64: # %bb.0: +; P9-AIX-64-NEXT: mflr 0 +; P9-AIX-64-NEXT: stdu 1, -112(1) +; P9-AIX-64-NEXT: std 0, 128(1) +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: bl .logf[PR] +; P9-AIX-64-NEXT: nop +; P9-AIX-64-NEXT: xscvdphp 1, 1 +; P9-AIX-64-NEXT: addi 1, 1, 112 +; P9-AIX-64-NEXT: ld 0, 16(1) +; P9-AIX-64-NEXT: mtlr 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_log: +; P8-LINUX-64: # %bb.0: +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl logf +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_log: +; P9-LINUX-64: # %bb.0: +; P9-LINUX-64-NEXT: mflr 0 +; P9-LINUX-64-NEXT: stdu 1, -32(1) +; P9-LINUX-64-NEXT: std 0, 48(1) +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: bl logf +; P9-LINUX-64-NEXT: nop +; P9-LINUX-64-NEXT: xscvdphp 1, 1 +; P9-LINUX-64-NEXT: addi 1, 1, 32 +; P9-LINUX-64-NEXT: ld 0, 16(1) +; P9-LINUX-64-NEXT: mtlr 0 +; P9-LINUX-64-NEXT: blr + %r = call half @llvm.log.f16(half %a) + ret half %r +} + +define half @test_log2(half %a) nounwind { +; P8-AIX-32-LABEL: test_log2: +; P8-AIX-32: # %bb.0: +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .log2f[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_log2: +; P8-AIX-64: # %bb.0: +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .log2f[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_log2: +; P9-AIX-32: # %bb.0: +; P9-AIX-32-NEXT: mflr 0 +; P9-AIX-32-NEXT: stwu 1, -64(1) +; P9-AIX-32-NEXT: stw 0, 72(1) +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: bl .log2f[PR] +; P9-AIX-32-NEXT: nop +; P9-AIX-32-NEXT: xscvdphp 1, 1 +; P9-AIX-32-NEXT: addi 1, 1, 64 +; P9-AIX-32-NEXT: lwz 0, 8(1) +; P9-AIX-32-NEXT: mtlr 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_log2: +; P9-AIX-64: # %bb.0: +; P9-AIX-64-NEXT: mflr 0 +; P9-AIX-64-NEXT: stdu 1, -112(1) +; P9-AIX-64-NEXT: std 0, 128(1) +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: bl .log2f[PR] +; P9-AIX-64-NEXT: nop +; P9-AIX-64-NEXT: xscvdphp 1, 1 +; P9-AIX-64-NEXT: addi 1, 1, 112 +; P9-AIX-64-NEXT: ld 0, 16(1) +; P9-AIX-64-NEXT: mtlr 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_log2: +; P8-LINUX-64: # %bb.0: +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl log2f +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_log2: +; P9-LINUX-64: # %bb.0: +; P9-LINUX-64-NEXT: mflr 0 +; P9-LINUX-64-NEXT: stdu 1, -32(1) +; P9-LINUX-64-NEXT: std 0, 48(1) +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: bl log2f +; P9-LINUX-64-NEXT: nop +; P9-LINUX-64-NEXT: xscvdphp 1, 1 +; P9-LINUX-64-NEXT: addi 1, 1, 32 +; P9-LINUX-64-NEXT: ld 0, 16(1) +; P9-LINUX-64-NEXT: mtlr 0 +; P9-LINUX-64-NEXT: blr + %r = call half @llvm.log2.f16(half %a) + ret half %r +} + +define half @test_log10(half %a) nounwind { +; P8-AIX-32-LABEL: test_log10: +; P8-AIX-32: # %bb.0: +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .log10f[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_log10: +; P8-AIX-64: # %bb.0: +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .log10f[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_log10: +; P9-AIX-32: # %bb.0: +; P9-AIX-32-NEXT: mflr 0 +; P9-AIX-32-NEXT: stwu 1, -64(1) +; P9-AIX-32-NEXT: stw 0, 72(1) +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: bl .log10f[PR] +; P9-AIX-32-NEXT: nop +; P9-AIX-32-NEXT: xscvdphp 1, 1 +; P9-AIX-32-NEXT: addi 1, 1, 64 +; P9-AIX-32-NEXT: lwz 0, 8(1) +; P9-AIX-32-NEXT: mtlr 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_log10: +; P9-AIX-64: # %bb.0: +; P9-AIX-64-NEXT: mflr 0 +; P9-AIX-64-NEXT: stdu 1, -112(1) +; P9-AIX-64-NEXT: std 0, 128(1) +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: bl .log10f[PR] +; P9-AIX-64-NEXT: nop +; P9-AIX-64-NEXT: xscvdphp 1, 1 +; P9-AIX-64-NEXT: addi 1, 1, 112 +; P9-AIX-64-NEXT: ld 0, 16(1) +; P9-AIX-64-NEXT: mtlr 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_log10: +; P8-LINUX-64: # %bb.0: +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl log10f +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_log10: +; P9-LINUX-64: # %bb.0: +; P9-LINUX-64-NEXT: mflr 0 +; P9-LINUX-64-NEXT: stdu 1, -32(1) +; P9-LINUX-64-NEXT: std 0, 48(1) +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: bl log10f +; P9-LINUX-64-NEXT: nop +; P9-LINUX-64-NEXT: xscvdphp 1, 1 +; P9-LINUX-64-NEXT: addi 1, 1, 32 +; P9-LINUX-64-NEXT: ld 0, 16(1) +; P9-LINUX-64-NEXT: mtlr 0 +; P9-LINUX-64-NEXT: blr + %r = call half @llvm.log10.f16(half %a) + ret half %r +} + +; ======================================================================================== +; Transcendental Functions - Exponentials +; ISD::FEXP, ISD::FEXP2, ISD::FEXP10 , calls libm +; ======================================================================================== + +define half @test_exp(half %a) nounwind { +; P8-AIX-32-LABEL: test_exp: +; P8-AIX-32: # %bb.0: +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .expf[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_exp: +; P8-AIX-64: # %bb.0: +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .expf[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_exp: +; P9-AIX-32: # %bb.0: +; P9-AIX-32-NEXT: mflr 0 +; P9-AIX-32-NEXT: stwu 1, -64(1) +; P9-AIX-32-NEXT: stw 0, 72(1) +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: bl .expf[PR] +; P9-AIX-32-NEXT: nop +; P9-AIX-32-NEXT: xscvdphp 1, 1 +; P9-AIX-32-NEXT: addi 1, 1, 64 +; P9-AIX-32-NEXT: lwz 0, 8(1) +; P9-AIX-32-NEXT: mtlr 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_exp: +; P9-AIX-64: # %bb.0: +; P9-AIX-64-NEXT: mflr 0 +; P9-AIX-64-NEXT: stdu 1, -112(1) +; P9-AIX-64-NEXT: std 0, 128(1) +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: bl .expf[PR] +; P9-AIX-64-NEXT: nop +; P9-AIX-64-NEXT: xscvdphp 1, 1 +; P9-AIX-64-NEXT: addi 1, 1, 112 +; P9-AIX-64-NEXT: ld 0, 16(1) +; P9-AIX-64-NEXT: mtlr 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_exp: +; P8-LINUX-64: # %bb.0: +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl expf +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_exp: +; P9-LINUX-64: # %bb.0: +; P9-LINUX-64-NEXT: mflr 0 +; P9-LINUX-64-NEXT: stdu 1, -32(1) +; P9-LINUX-64-NEXT: std 0, 48(1) +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: bl expf +; P9-LINUX-64-NEXT: nop +; P9-LINUX-64-NEXT: xscvdphp 1, 1 +; P9-LINUX-64-NEXT: addi 1, 1, 32 +; P9-LINUX-64-NEXT: ld 0, 16(1) +; P9-LINUX-64-NEXT: mtlr 0 +; P9-LINUX-64-NEXT: blr + %r = call half @llvm.exp.f16(half %a) + ret half %r +} + +define half @test_exp2(half %a) nounwind { +; P8-AIX-32-LABEL: test_exp2: +; P8-AIX-32: # %bb.0: +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .exp2f[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_exp2: +; P8-AIX-64: # %bb.0: +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .exp2f[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_exp2: +; P9-AIX-32: # %bb.0: +; P9-AIX-32-NEXT: mflr 0 +; P9-AIX-32-NEXT: stwu 1, -64(1) +; P9-AIX-32-NEXT: stw 0, 72(1) +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: bl .exp2f[PR] +; P9-AIX-32-NEXT: nop +; P9-AIX-32-NEXT: xscvdphp 1, 1 +; P9-AIX-32-NEXT: addi 1, 1, 64 +; P9-AIX-32-NEXT: lwz 0, 8(1) +; P9-AIX-32-NEXT: mtlr 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_exp2: +; P9-AIX-64: # %bb.0: +; P9-AIX-64-NEXT: mflr 0 +; P9-AIX-64-NEXT: stdu 1, -112(1) +; P9-AIX-64-NEXT: std 0, 128(1) +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: bl .exp2f[PR] +; P9-AIX-64-NEXT: nop +; P9-AIX-64-NEXT: xscvdphp 1, 1 +; P9-AIX-64-NEXT: addi 1, 1, 112 +; P9-AIX-64-NEXT: ld 0, 16(1) +; P9-AIX-64-NEXT: mtlr 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_exp2: +; P8-LINUX-64: # %bb.0: +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl exp2f +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_exp2: +; P9-LINUX-64: # %bb.0: +; P9-LINUX-64-NEXT: mflr 0 +; P9-LINUX-64-NEXT: stdu 1, -32(1) +; P9-LINUX-64-NEXT: std 0, 48(1) +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: bl exp2f +; P9-LINUX-64-NEXT: nop +; P9-LINUX-64-NEXT: xscvdphp 1, 1 +; P9-LINUX-64-NEXT: addi 1, 1, 32 +; P9-LINUX-64-NEXT: ld 0, 16(1) +; P9-LINUX-64-NEXT: mtlr 0 +; P9-LINUX-64-NEXT: blr + %r = call half @llvm.exp2.f16(half %a) + ret half %r +} + +define half @test_exp10(half %a) nounwind { +; P8-AIX-32-LABEL: test_exp10: +; P8-AIX-32: # %bb.0: +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .exp10f[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_exp10: +; P8-AIX-64: # %bb.0: +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .exp10f[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_exp10: +; P9-AIX-32: # %bb.0: +; P9-AIX-32-NEXT: mflr 0 +; P9-AIX-32-NEXT: stwu 1, -64(1) +; P9-AIX-32-NEXT: stw 0, 72(1) +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: bl .exp10f[PR] +; P9-AIX-32-NEXT: nop +; P9-AIX-32-NEXT: xscvdphp 1, 1 +; P9-AIX-32-NEXT: addi 1, 1, 64 +; P9-AIX-32-NEXT: lwz 0, 8(1) +; P9-AIX-32-NEXT: mtlr 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_exp10: +; P9-AIX-64: # %bb.0: +; P9-AIX-64-NEXT: mflr 0 +; P9-AIX-64-NEXT: stdu 1, -112(1) +; P9-AIX-64-NEXT: std 0, 128(1) +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: bl .exp10f[PR] +; P9-AIX-64-NEXT: nop +; P9-AIX-64-NEXT: xscvdphp 1, 1 +; P9-AIX-64-NEXT: addi 1, 1, 112 +; P9-AIX-64-NEXT: ld 0, 16(1) +; P9-AIX-64-NEXT: mtlr 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_exp10: +; P8-LINUX-64: # %bb.0: +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl exp10f +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_exp10: +; P9-LINUX-64: # %bb.0: +; P9-LINUX-64-NEXT: mflr 0 +; P9-LINUX-64-NEXT: stdu 1, -32(1) +; P9-LINUX-64-NEXT: std 0, 48(1) +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: bl exp10f +; P9-LINUX-64-NEXT: nop +; P9-LINUX-64-NEXT: xscvdphp 1, 1 +; P9-LINUX-64-NEXT: addi 1, 1, 32 +; P9-LINUX-64-NEXT: ld 0, 16(1) +; P9-LINUX-64-NEXT: mtlr 0 +; P9-LINUX-64-NEXT: blr + %r = call half @llvm.exp10.f16(half %a) + ret half %r +} + +; ======================================================================================== +; Transcendental Functions - Trigonometric +; ISD::FSIN, ISD::FCOS , calls libm +; ======================================================================================== + +define half @test_sin(half %a) nounwind { +; P8-AIX-32-LABEL: test_sin: +; P8-AIX-32: # %bb.0: +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .sinf[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_sin: +; P8-AIX-64: # %bb.0: +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .sinf[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_sin: +; P9-AIX-32: # %bb.0: +; P9-AIX-32-NEXT: mflr 0 +; P9-AIX-32-NEXT: stwu 1, -64(1) +; P9-AIX-32-NEXT: stw 0, 72(1) +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: bl .sinf[PR] +; P9-AIX-32-NEXT: nop +; P9-AIX-32-NEXT: xscvdphp 1, 1 +; P9-AIX-32-NEXT: addi 1, 1, 64 +; P9-AIX-32-NEXT: lwz 0, 8(1) +; P9-AIX-32-NEXT: mtlr 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_sin: +; P9-AIX-64: # %bb.0: +; P9-AIX-64-NEXT: mflr 0 +; P9-AIX-64-NEXT: stdu 1, -112(1) +; P9-AIX-64-NEXT: std 0, 128(1) +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: bl .sinf[PR] +; P9-AIX-64-NEXT: nop +; P9-AIX-64-NEXT: xscvdphp 1, 1 +; P9-AIX-64-NEXT: addi 1, 1, 112 +; P9-AIX-64-NEXT: ld 0, 16(1) +; P9-AIX-64-NEXT: mtlr 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_sin: +; P8-LINUX-64: # %bb.0: +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl sinf +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_sin: +; P9-LINUX-64: # %bb.0: +; P9-LINUX-64-NEXT: mflr 0 +; P9-LINUX-64-NEXT: stdu 1, -32(1) +; P9-LINUX-64-NEXT: std 0, 48(1) +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: bl sinf +; P9-LINUX-64-NEXT: nop +; P9-LINUX-64-NEXT: xscvdphp 1, 1 +; P9-LINUX-64-NEXT: addi 1, 1, 32 +; P9-LINUX-64-NEXT: ld 0, 16(1) +; P9-LINUX-64-NEXT: mtlr 0 +; P9-LINUX-64-NEXT: blr + %r = call half @llvm.sin.f16(half %a) + ret half %r +} + +define half @test_cos(half %a) nounwind { +; P8-AIX-32-LABEL: test_cos: +; P8-AIX-32: # %bb.0: +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .cosf[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_cos: +; P8-AIX-64: # %bb.0: +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .cosf[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_cos: +; P9-AIX-32: # %bb.0: +; P9-AIX-32-NEXT: mflr 0 +; P9-AIX-32-NEXT: stwu 1, -64(1) +; P9-AIX-32-NEXT: stw 0, 72(1) +; P9-AIX-32-NEXT: xscvhpdp 1, 1 +; P9-AIX-32-NEXT: bl .cosf[PR] +; P9-AIX-32-NEXT: nop +; P9-AIX-32-NEXT: xscvdphp 1, 1 +; P9-AIX-32-NEXT: addi 1, 1, 64 +; P9-AIX-32-NEXT: lwz 0, 8(1) +; P9-AIX-32-NEXT: mtlr 0 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_cos: +; P9-AIX-64: # %bb.0: +; P9-AIX-64-NEXT: mflr 0 +; P9-AIX-64-NEXT: stdu 1, -112(1) +; P9-AIX-64-NEXT: std 0, 128(1) +; P9-AIX-64-NEXT: xscvhpdp 1, 1 +; P9-AIX-64-NEXT: bl .cosf[PR] +; P9-AIX-64-NEXT: nop +; P9-AIX-64-NEXT: xscvdphp 1, 1 +; P9-AIX-64-NEXT: addi 1, 1, 112 +; P9-AIX-64-NEXT: ld 0, 16(1) +; P9-AIX-64-NEXT: mtlr 0 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_cos: +; P8-LINUX-64: # %bb.0: +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl cosf +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_cos: +; P9-LINUX-64: # %bb.0: +; P9-LINUX-64-NEXT: mflr 0 +; P9-LINUX-64-NEXT: stdu 1, -32(1) +; P9-LINUX-64-NEXT: std 0, 48(1) +; P9-LINUX-64-NEXT: xscvhpdp 1, 1 +; P9-LINUX-64-NEXT: bl cosf +; P9-LINUX-64-NEXT: nop +; P9-LINUX-64-NEXT: xscvdphp 1, 1 +; P9-LINUX-64-NEXT: addi 1, 1, 32 +; P9-LINUX-64-NEXT: ld 0, 16(1) +; P9-LINUX-64-NEXT: mtlr 0 +; P9-LINUX-64-NEXT: blr + %r = call half @llvm.cos.f16(half %a) + ret half %r +} + +; ======================================================================================== +; Type Conversion Operations +; ISD::FP_EXTEND (f16->f32/f64), ISD::FP_ROUND (f32/f64->f16) +; P9+: Hardware (xscvhpdp/xscvdphp), P8: Library calls +; ======================================================================================== + +define float @op_f16_to_f32(half %h) nounwind { +; P8-AIX-32-LABEL: op_f16_to_f32: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_f16_to_f32: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_f16_to_f32: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_f16_to_f32: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %conv = fpext half %h to float + ret float %conv +} + +define double @op_f16_to_f64(half %h) nounwind { +; P8-AIX-32-LABEL: op_f16_to_f64: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__extendhfdf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_f16_to_f64: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__extendhfdf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_f16_to_f64: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_f16_to_f64: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __extendhfdf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %conv = fpext half %h to double + ret double %conv +} + +define half @op_f32_to_f16(float %f) nounwind { +; P8-AIX-32-LABEL: op_f32_to_f16: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_f32_to_f16: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_f32_to_f16: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvdphp 1, 1 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_f32_to_f16: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %conv = fptrunc float %f to half + ret half %conv +} + +define half @op_f64_to_f16(double %d) nounwind { +; P8-AIX-32-LABEL: op_f64_to_f16: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -64(1) +; P8-AIX-32-NEXT: stw 0, 72(1) +; P8-AIX-32-NEXT: bl .__truncdfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: addi 1, 1, 64 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: op_f64_to_f16: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -112(1) +; P8-AIX-64-NEXT: std 0, 128(1) +; P8-AIX-64-NEXT: bl .__truncdfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: addi 1, 1, 112 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: op_f64_to_f16: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvdphp 1, 1 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: op_f64_to_f16: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stdu 1, -32(1) +; P8-LINUX-64-NEXT: std 0, 48(1) +; P8-LINUX-64-NEXT: bl __truncdfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 32 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: blr +entry: + %conv = fptrunc double %d to half + ret half %conv +} + +; ======================================================================================== +; Bitcast Operations +; Bitcast between f16 and i16 (no conversion, reinterpret bits) +; Test that bitcast between i16 and half is lossless when -mfloat16 is active. +; This verifies the fix for the miscompilation described in llvm/llvm-project#97981, +; where passing/returning half used lossy float conversion functions (__gnu_h2f_ieee, +; __gnu_f2h_ieee) that silenced signalling NaNs and corrupted NaN payloads. +; ======================================================================================== + +define half @to_half(i16 %bits) nounwind { +; P8-AIX-32-LABEL: to_half: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: sth 3, -2(1) +; P8-AIX-32-NEXT: lhz 3, -2(1) +; P8-AIX-32-NEXT: mtfprwz 1, 3 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: to_half: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: sth 3, -2(1) +; P8-AIX-64-NEXT: lhz 3, -2(1) +; P8-AIX-64-NEXT: mtfprwz 1, 3 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: to_half: +; P9: # %bb.0: # %entry +; P9-NEXT: sth 3, -2(1) +; P9-NEXT: addi 3, 1, -2 +; P9-NEXT: lxsihzx 1, 0, 3 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: to_half: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: sth 3, -2(1) +; P8-LINUX-64-NEXT: lhz 3, -2(1) +; P8-LINUX-64-NEXT: mtfprwz 1, 3 +; P8-LINUX-64-NEXT: blr +entry: + %f = bitcast i16 %bits to half + ret half %f +} + +define i16 @from_half(half %f) nounwind { +; P8-AIX-32-LABEL: from_half: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mffprwz 3, 1 +; P8-AIX-32-NEXT: sth 3, -2(1) +; P8-AIX-32-NEXT: lhz 3, -2(1) +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: from_half: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mffprwz 3, 1 +; P8-AIX-64-NEXT: sth 3, -2(1) +; P8-AIX-64-NEXT: lhz 3, -2(1) +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: from_half: +; P9: # %bb.0: # %entry +; P9-NEXT: addi 3, 1, -2 +; P9-NEXT: stxsihx 1, 0, 3 +; P9-NEXT: lhz 3, -2(1) +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: from_half: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mffprwz 3, 1 +; P8-LINUX-64-NEXT: sth 3, -2(1) +; P8-LINUX-64-NEXT: lhz 3, -2(1) +; P8-LINUX-64-NEXT: blr +entry: + %bits = bitcast half %f to i16 + ret i16 %bits +} + +; ======================================================================================== +; Edge Cases and Correctness Tests +; llvm/llvm-project#97975 +; ======================================================================================== +; Test that consecutive half operations round intermediate results back to f16 +; precision between each operation when -mfloat16 is active. +; This verifies the fix for the miscompilation described in llvm/llvm-project#97975, +; where backends kept intermediate results at f32 precision across consecutive +; half operations, causing incorrect results. For example: +; 65504.0 + 65504.0 + (-65504.0) should produce infinity at f16 precision +; but produces 65504.0 if the intermediate is kept at f32 precision. +; ======================================================================================== + +define half @consecutive_add(half %a, half %b, half %c) nounwind { +; P8-AIX-32-LABEL: consecutive_add: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: mflr 0 +; P8-AIX-32-NEXT: stwu 1, -80(1) +; P8-AIX-32-NEXT: stw 0, 88(1) +; P8-AIX-32-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 2 +; P8-AIX-32-NEXT: stfd 29, 56(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; P8-AIX-32-NEXT: fmr 31, 3 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 29, 1 +; P8-AIX-32-NEXT: fmr 1, 30 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xsaddsp 1, 1, 29 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: fmr 30, 1 +; P8-AIX-32-NEXT: fmr 1, 31 +; P8-AIX-32-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xsaddsp 1, 30, 1 +; P8-AIX-32-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: lfd 29, 56(1) # 8-byte Folded Reload +; P8-AIX-32-NEXT: addi 1, 1, 80 +; P8-AIX-32-NEXT: lwz 0, 8(1) +; P8-AIX-32-NEXT: mtlr 0 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: consecutive_add: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mflr 0 +; P8-AIX-64-NEXT: stdu 1, -144(1) +; P8-AIX-64-NEXT: std 0, 160(1) +; P8-AIX-64-NEXT: stfd 30, 128(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 2 +; P8-AIX-64-NEXT: stfd 29, 120(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: stfd 31, 136(1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: fmr 31, 3 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 29, 1 +; P8-AIX-64-NEXT: fmr 1, 30 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xsaddsp 1, 1, 29 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: fmr 30, 1 +; P8-AIX-64-NEXT: fmr 1, 31 +; P8-AIX-64-NEXT: bl .__extendhfsf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xsaddsp 1, 30, 1 +; P8-AIX-64-NEXT: bl .__truncsfhf2[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: lfd 31, 136(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 30, 128(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: lfd 29, 120(1) # 8-byte Folded Reload +; P8-AIX-64-NEXT: addi 1, 1, 144 +; P8-AIX-64-NEXT: ld 0, 16(1) +; P8-AIX-64-NEXT: mtlr 0 +; P8-AIX-64-NEXT: blr +; +; P9-LABEL: consecutive_add: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvhpdp 0, 2 +; P9-NEXT: xscvhpdp 1, 1 +; P9-NEXT: xsaddsp 0, 1, 0 +; P9-NEXT: xscvdphp 0, 0 +; P9-NEXT: xscvhpdp 0, 0 +; P9-NEXT: xscvhpdp 1, 3 +; P9-NEXT: xsaddsp 0, 0, 1 +; P9-NEXT: xscvdphp 1, 0 +; P9-NEXT: blr +; +; P8-LINUX-64-LABEL: consecutive_add: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: mflr 0 +; P8-LINUX-64-NEXT: stfd 29, -24(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; P8-LINUX-64-NEXT: stdu 1, -64(1) +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 2 +; P8-LINUX-64-NEXT: std 0, 80(1) +; P8-LINUX-64-NEXT: fmr 31, 3 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 29, 1 +; P8-LINUX-64-NEXT: fmr 1, 30 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xsaddsp 1, 1, 29 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: fmr 30, 1 +; P8-LINUX-64-NEXT: fmr 1, 31 +; P8-LINUX-64-NEXT: bl __extendhfsf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: xsaddsp 1, 30, 1 +; P8-LINUX-64-NEXT: bl __truncsfhf2 +; P8-LINUX-64-NEXT: nop +; P8-LINUX-64-NEXT: addi 1, 1, 64 +; P8-LINUX-64-NEXT: ld 0, 16(1) +; P8-LINUX-64-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: mtlr 0 +; P8-LINUX-64-NEXT: lfd 29, -24(1) # 8-byte Folded Reload +; P8-LINUX-64-NEXT: blr +entry: + %d = fadd half %a, %b + %e = fadd half %d, %c + ret half %e +} + +; ======================================================================================== +; ConstantFP - Expand path, materialized as integer +; ======================================================================================== + +; 0xH3C00 = 1.0 in IEEE 754 f16 encoding +define half @test_constant_fp() nounwind { +; P8-AIX-32-LABEL: test_constant_fp: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lwz 3, L..C2(2) # %const.0 +; P8-AIX-32-NEXT: lhzx 3, 0, 3 +; P8-AIX-32-NEXT: mtfprwz 1, 3 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_constant_fp: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: ld 3, L..C2(2) # %const.0 +; P8-AIX-64-NEXT: lhzx 3, 0, 3 +; P8-AIX-64-NEXT: mtfprwz 1, 3 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_constant_fp: +; P9-AIX-32: # %bb.0: # %entry +; P9-AIX-32-NEXT: lwz 3, L..C2(2) # %const.0 +; P9-AIX-32-NEXT: lxsihzx 1, 0, 3 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_constant_fp: +; P9-AIX-64: # %bb.0: # %entry +; P9-AIX-64-NEXT: ld 3, L..C2(2) # %const.0 +; P9-AIX-64-NEXT: lxsihzx 1, 0, 3 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_constant_fp: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: addis 3, 2, .LCPI48_0@toc@ha +; P8-LINUX-64-NEXT: lhz 3, .LCPI48_0@toc@l(3) +; P8-LINUX-64-NEXT: mtfprwz 1, 3 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_constant_fp: +; P9-LINUX-64: # %bb.0: # %entry +; P9-LINUX-64-NEXT: addis 3, 2, .LCPI48_0@toc@ha +; P9-LINUX-64-NEXT: addi 3, 3, .LCPI48_0@toc@l +; P9-LINUX-64-NEXT: lxsihzx 1, 0, 3 +; P9-LINUX-64-NEXT: blr +entry: + ret half 0xH3C00 +} + +define half @test_constant_zero() nounwind { +; P8-AIX-32-LABEL: test_constant_zero: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lwz 3, L..C3(2) # %const.0 +; P8-AIX-32-NEXT: lhzx 3, 0, 3 +; P8-AIX-32-NEXT: mtfprwz 1, 3 +; P8-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: test_constant_zero: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: ld 3, L..C3(2) # %const.0 +; P8-AIX-64-NEXT: lhzx 3, 0, 3 +; P8-AIX-64-NEXT: mtfprwz 1, 3 +; P8-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: test_constant_zero: +; P9-AIX-32: # %bb.0: # %entry +; P9-AIX-32-NEXT: lwz 3, L..C3(2) # %const.0 +; P9-AIX-32-NEXT: lxsihzx 1, 0, 3 +; P9-AIX-32-NEXT: blr +; +; P9-AIX-64-LABEL: test_constant_zero: +; P9-AIX-64: # %bb.0: # %entry +; P9-AIX-64-NEXT: ld 3, L..C3(2) # %const.0 +; P9-AIX-64-NEXT: lxsihzx 1, 0, 3 +; P9-AIX-64-NEXT: blr +; +; P8-LINUX-64-LABEL: test_constant_zero: +; P8-LINUX-64: # %bb.0: # %entry +; P8-LINUX-64-NEXT: addis 3, 2, .LCPI49_0@toc@ha +; P8-LINUX-64-NEXT: lhz 3, .LCPI49_0@toc@l(3) +; P8-LINUX-64-NEXT: mtfprwz 1, 3 +; P8-LINUX-64-NEXT: blr +; +; P9-LINUX-64-LABEL: test_constant_zero: +; P9-LINUX-64: # %bb.0: # %entry +; P9-LINUX-64-NEXT: addis 3, 2, .LCPI49_0@toc@ha +; P9-LINUX-64-NEXT: addi 3, 3, .LCPI49_0@toc@l +; P9-LINUX-64-NEXT: lxsihzx 1, 0, 3 +; P9-LINUX-64-NEXT: blr +entry: + ret half 0.0 +} + +; ======================================================================================== +; Intrinsic Declarations +; ======================================================================================== + +declare half @llvm.canonicalize.f16(half) +declare half @llvm.ceil.f16(half) +declare half @llvm.copysign.f16(half, half) +declare half @llvm.cos.f16(half) +declare half @llvm.exp.f16(half) +declare half @llvm.exp10.f16(half) +declare half @llvm.exp2.f16(half) +declare half @llvm.fabs.f16(half) +declare half @llvm.floor.f16(half) +declare half @llvm.fma.f16(half, half, half) +declare half @llvm.log.f16(half) +declare half @llvm.log10.f16(half) +declare half @llvm.log2.f16(half) +declare half @llvm.maximum.f16(half, half) +declare half @llvm.maxnum.f16(half, half) +declare half @llvm.minimum.f16(half, half) +declare half @llvm.minnum.f16(half, half) +declare half @llvm.nearbyint.f16(half) +declare half @llvm.pow.f16(half, half) +declare half @llvm.rint.f16(half) +declare half @llvm.round.f16(half) +declare half @llvm.roundeven.f16(half) +declare half @llvm.sin.f16(half) +declare half @llvm.sqrt.f16(half) +declare half @llvm.trunc.f16(half) diff --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.mir b/llvm/test/CodeGen/PowerPC/shrink-wrap.mir index 4981c81291c9e..3e70b3051b80d 100644 --- a/llvm/test/CodeGen/PowerPC/shrink-wrap.mir +++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.mir @@ -88,7 +88,7 @@ body: | ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000) ; CHECK-NEXT: liveins: $r4, $x3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"add $0, $1, $2", attdialect, regdef:GPRC, def renamable $r4, reguse:GPRC, renamable $r3, reguse:GPRC, killed renamable $r4, clobber, implicit-def dead early-clobber $r14, clobber, implicit-def dead early-clobber $r15, clobber, implicit-def dead early-clobber $r16, clobber, implicit-def dead early-clobber $r17, clobber, implicit-def dead early-clobber $r18, clobber, implicit-def dead early-clobber $r19, clobber, implicit-def dead early-clobber $r20, clobber, implicit-def dead early-clobber $r21, clobber, implicit-def dead early-clobber $r22, clobber, implicit-def dead early-clobber $r23, clobber, implicit-def dead early-clobber $r24, clobber, implicit-def dead early-clobber $r25, clobber, implicit-def dead early-clobber $r26, clobber, implicit-def dead early-clobber $r27, clobber, implicit-def dead early-clobber $r28, clobber, implicit-def dead early-clobber $r29, clobber, implicit-def dead early-clobber $r30, clobber, implicit-def dead early-clobber $r31 + ; CHECK-NEXT: INLINEASM &"add $0, $1, $2", attdialect, regdef:VSSRC, def renamable $r4, reguse:VSSRC, renamable $r3, reguse:VSSRC, killed renamable $r4, clobber, implicit-def dead early-clobber $r14, clobber, implicit-def dead early-clobber $r15, clobber, implicit-def dead early-clobber $r16, clobber, implicit-def dead early-clobber $r17, clobber, implicit-def dead early-clobber $r18, clobber, implicit-def dead early-clobber $r19, clobber, implicit-def dead early-clobber $r20, clobber, implicit-def dead early-clobber $r21, clobber, implicit-def dead early-clobber $r22, clobber, implicit-def dead early-clobber $r23, clobber, implicit-def dead early-clobber $r24, clobber, implicit-def dead early-clobber $r25, clobber, implicit-def dead early-clobber $r26, clobber, implicit-def dead early-clobber $r27, clobber, implicit-def dead early-clobber $r28, clobber, implicit-def dead early-clobber $r29, clobber, implicit-def dead early-clobber $r30, clobber, implicit-def dead early-clobber $r31 ; CHECK-NEXT: BDNZ8 %bb.4, implicit-def dead $ctr8, implicit $ctr8 ; CHECK-NEXT: B %bb.5 ; CHECK-NEXT: {{ $}}