diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index f9a38cf21efed..489b308b30d47 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -224,12 +224,11 @@ static size_t findLastNonVersionCharacter(StringRef Ext) { } namespace { -struct LessExtName { - bool operator()(const RISCVSupportedExtension &LHS, StringRef RHS) { - return StringRef(LHS.Name) < RHS; - } - bool operator()(StringRef LHS, const RISCVSupportedExtension &RHS) { - return LHS < StringRef(RHS.Name); +struct FindByName { + FindByName(StringRef Ext) : Ext(Ext){}; + StringRef Ext; + bool operator()(const RISCVSupportedExtension &ExtInfo) { + return ExtInfo.Name == Ext; } }; } // namespace @@ -240,12 +239,12 @@ findDefaultVersion(StringRef ExtName) { // TODO: We might set default version based on profile or ISA spec. for (auto &ExtInfo : {ArrayRef(SupportedExtensions), ArrayRef(SupportedExperimentalExtensions)}) { - auto I = llvm::lower_bound(ExtInfo, ExtName, LessExtName()); + auto ExtensionInfoIterator = llvm::find_if(ExtInfo, FindByName(ExtName)); - if (I == ExtInfo.end() || I->Name != ExtName) + if (ExtensionInfoIterator == ExtInfo.end()) { continue; - - return I->Version; + } + return ExtensionInfoIterator->Version; } return std::nullopt; } @@ -280,50 +279,37 @@ static StringRef getExtensionType(StringRef Ext) { static std::optional isExperimentalExtension(StringRef Ext) { - auto I = - llvm::lower_bound(SupportedExperimentalExtensions, Ext, LessExtName()); - if (I == std::end(SupportedExperimentalExtensions) || I->Name != Ext) + auto ExtIterator = + llvm::find_if(SupportedExperimentalExtensions, FindByName(Ext)); + if (ExtIterator == std::end(SupportedExperimentalExtensions)) return std::nullopt; - return I->Version; + return ExtIterator->Version; } bool RISCVISAInfo::isSupportedExtensionFeature(StringRef Ext) { bool IsExperimental = stripExperimentalPrefix(Ext); - ArrayRef ExtInfo = - IsExperimental ? ArrayRef(SupportedExperimentalExtensions) - : ArrayRef(SupportedExtensions); - - auto I = llvm::lower_bound(ExtInfo, Ext, LessExtName()); - return I != ExtInfo.end() && I->Name == Ext; + if (IsExperimental) + return llvm::any_of(SupportedExperimentalExtensions, FindByName(Ext)); + else + return llvm::any_of(SupportedExtensions, FindByName(Ext)); } bool RISCVISAInfo::isSupportedExtension(StringRef Ext) { verifyTables(); - - for (auto ExtInfo : {ArrayRef(SupportedExtensions), - ArrayRef(SupportedExperimentalExtensions)}) { - auto I = llvm::lower_bound(ExtInfo, Ext, LessExtName()); - if (I != ExtInfo.end() && I->Name == Ext) - return true; - } - - return false; + return llvm::any_of(SupportedExtensions, FindByName(Ext)) || + llvm::any_of(SupportedExperimentalExtensions, FindByName(Ext)); } bool RISCVISAInfo::isSupportedExtension(StringRef Ext, unsigned MajorVersion, unsigned MinorVersion) { - for (auto ExtInfo : {ArrayRef(SupportedExtensions), - ArrayRef(SupportedExperimentalExtensions)}) { - auto Range = - std::equal_range(ExtInfo.begin(), ExtInfo.end(), Ext, LessExtName()); - for (auto I = Range.first, E = Range.second; I != E; ++I) - if (I->Version.Major == MajorVersion && I->Version.Minor == MinorVersion) - return true; - } - - return false; + auto FindByNameAndVersion = [=](const RISCVSupportedExtension &ExtInfo) { + return ExtInfo.Name == Ext && (MajorVersion == ExtInfo.Version.Major) && + (MinorVersion == ExtInfo.Version.Minor); + }; + return llvm::any_of(SupportedExtensions, FindByNameAndVersion) || + llvm::any_of(SupportedExperimentalExtensions, FindByNameAndVersion); } bool RISCVISAInfo::hasExtension(StringRef Ext) const { @@ -563,12 +549,11 @@ RISCVISAInfo::parseFeatures(unsigned XLen, ? ArrayRef(SupportedExperimentalExtensions) : ArrayRef(SupportedExtensions); auto ExtensionInfoIterator = - llvm::lower_bound(ExtensionInfos, ExtName, LessExtName()); + llvm::find_if(ExtensionInfos, FindByName(ExtName)); // Not all features is related to ISA extension, like `relax` or // `save-restore`, skip those feature. - if (ExtensionInfoIterator == ExtensionInfos.end() || - ExtensionInfoIterator->Name != ExtName) + if (ExtensionInfoIterator == ExtensionInfos.end()) continue; if (Add) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ef9e96b6cca4c..d09295134c9bf 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -427,9 +427,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom); - setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom); - setOperationAction(ISD::FP_TO_BF16, MVT::f32, - Subtarget.isSoftFPABI() ? LibCall : Custom); if (Subtarget.hasStdExtZfa()) setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); @@ -464,9 +461,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom); - setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom); - setOperationAction(ISD::FP_TO_BF16, MVT::f64, - Subtarget.isSoftFPABI() ? LibCall : Custom); } if (Subtarget.is64Bit()) { @@ -4929,35 +4923,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: return lowerFP_TO_INT_SAT(Op, DAG, Subtarget); - case ISD::FP_TO_BF16: { - // Custom lower to ensure the libcall return is passed in an FPR on hard - // float ABIs. - assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization"); - SDLoc DL(Op); - MakeLibCallOptions CallOptions; - RTLIB::Libcall LC = - RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16); - SDValue Res = - makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; - if (Subtarget.is64Bit()) - return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); - return DAG.getBitcast(MVT::i32, Res); - } - case ISD::BF16_TO_FP: { - assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization"); - MVT VT = Op.getSimpleValueType(); - SDLoc DL(Op); - Op = DAG.getNode( - ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0), - DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL)); - SDValue Res = Subtarget.is64Bit() - ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op) - : DAG.getBitcast(MVT::f32, Op); - // fp_extend if the target VT is bigger than f32. - if (VT != MVT::f32) - return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res); - return Res; - } case ISD::FTRUNC: case ISD::FCEIL: case ISD::FFLOOR: @@ -16588,10 +16553,9 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts( unsigned NumParts, MVT PartVT, std::optional CC) const { bool IsABIRegCopy = CC.has_value(); EVT ValueVT = Val.getValueType(); - if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && - PartVT == MVT::f32) { - // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float - // nan, and cast to f32. + if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { + // Cast the f16 to i16, extend to i32, pad with ones to make a float nan, + // and cast to f32. Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val); Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val); Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val, @@ -16642,14 +16606,13 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional CC) const { bool IsABIRegCopy = CC.has_value(); - if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && - PartVT == MVT::f32) { + if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { SDValue Val = Parts[0]; - // Cast the f32 to i32, truncate to i16, and cast back to [b]f16. + // Cast the f32 to i32, truncate to i16, and cast back to f16. Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val); - Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val); return Val; } diff --git a/llvm/test/CodeGen/RISCV/bfloat.ll b/llvm/test/CodeGen/RISCV/bfloat.ll index c95d61fd6baab..4589e799b30c9 100644 --- a/llvm/test/CodeGen/RISCV/bfloat.ll +++ b/llvm/test/CodeGen/RISCV/bfloat.ll @@ -1,10 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32I-ILP32 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64I-LP64 -; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32ID-ILP32 -; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ID-LP64 -; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32ID-ILP32D -; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ID-LP64D + +; TODO: Enable codegen for hard float. define bfloat @float_to_bfloat(float %a) nounwind { ; RV32I-ILP32-LABEL: float_to_bfloat: @@ -24,54 +22,6 @@ define bfloat @float_to_bfloat(float %a) nounwind { ; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LP64-NEXT: addi sp, sp, 16 ; RV64I-LP64-NEXT: ret -; -; RV32ID-ILP32-LABEL: float_to_bfloat: -; RV32ID-ILP32: # %bb.0: -; RV32ID-ILP32-NEXT: addi sp, sp, -16 -; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: call __truncsfbf2@plt -; RV32ID-ILP32-NEXT: lui a1, 1048560 -; RV32ID-ILP32-NEXT: or a0, a0, a1 -; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: addi sp, sp, 16 -; RV32ID-ILP32-NEXT: ret -; -; RV64ID-LP64-LABEL: float_to_bfloat: -; RV64ID-LP64: # %bb.0: -; RV64ID-LP64-NEXT: addi sp, sp, -16 -; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64ID-LP64-NEXT: call __truncsfbf2@plt -; RV64ID-LP64-NEXT: lui a1, 1048560 -; RV64ID-LP64-NEXT: or a0, a0, a1 -; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64ID-LP64-NEXT: addi sp, sp, 16 -; RV64ID-LP64-NEXT: ret -; -; RV32ID-ILP32D-LABEL: float_to_bfloat: -; RV32ID-ILP32D: # %bb.0: -; RV32ID-ILP32D-NEXT: addi sp, sp, -16 -; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt -; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 -; RV32ID-ILP32D-NEXT: lui a1, 1048560 -; RV32ID-ILP32D-NEXT: or a0, a0, a1 -; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 -; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-ILP32D-NEXT: addi sp, sp, 16 -; RV32ID-ILP32D-NEXT: ret -; -; RV64ID-LP64D-LABEL: float_to_bfloat: -; RV64ID-LP64D: # %bb.0: -; RV64ID-LP64D-NEXT: addi sp, sp, -16 -; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64ID-LP64D-NEXT: call __truncsfbf2@plt -; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 -; RV64ID-LP64D-NEXT: lui a1, 1048560 -; RV64ID-LP64D-NEXT: or a0, a0, a1 -; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 -; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64ID-LP64D-NEXT: addi sp, sp, 16 -; RV64ID-LP64D-NEXT: ret %1 = fptrunc float %a to bfloat ret bfloat %1 } @@ -94,54 +44,6 @@ define bfloat @double_to_bfloat(double %a) nounwind { ; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LP64-NEXT: addi sp, sp, 16 ; RV64I-LP64-NEXT: ret -; -; RV32ID-ILP32-LABEL: double_to_bfloat: -; RV32ID-ILP32: # %bb.0: -; RV32ID-ILP32-NEXT: addi sp, sp, -16 -; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: call __truncdfbf2@plt -; RV32ID-ILP32-NEXT: lui a1, 1048560 -; RV32ID-ILP32-NEXT: or a0, a0, a1 -; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: addi sp, sp, 16 -; RV32ID-ILP32-NEXT: ret -; -; RV64ID-LP64-LABEL: double_to_bfloat: -; RV64ID-LP64: # %bb.0: -; RV64ID-LP64-NEXT: addi sp, sp, -16 -; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64ID-LP64-NEXT: call __truncdfbf2@plt -; RV64ID-LP64-NEXT: lui a1, 1048560 -; RV64ID-LP64-NEXT: or a0, a0, a1 -; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64ID-LP64-NEXT: addi sp, sp, 16 -; RV64ID-LP64-NEXT: ret -; -; RV32ID-ILP32D-LABEL: double_to_bfloat: -; RV32ID-ILP32D: # %bb.0: -; RV32ID-ILP32D-NEXT: addi sp, sp, -16 -; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32D-NEXT: call __truncdfbf2@plt -; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 -; RV32ID-ILP32D-NEXT: lui a1, 1048560 -; RV32ID-ILP32D-NEXT: or a0, a0, a1 -; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 -; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-ILP32D-NEXT: addi sp, sp, 16 -; RV32ID-ILP32D-NEXT: ret -; -; RV64ID-LP64D-LABEL: double_to_bfloat: -; RV64ID-LP64D: # %bb.0: -; RV64ID-LP64D-NEXT: addi sp, sp, -16 -; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64ID-LP64D-NEXT: call __truncdfbf2@plt -; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 -; RV64ID-LP64D-NEXT: lui a1, 1048560 -; RV64ID-LP64D-NEXT: or a0, a0, a1 -; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 -; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64ID-LP64D-NEXT: addi sp, sp, 16 -; RV64ID-LP64D-NEXT: ret %1 = fptrunc double %a to bfloat ret bfloat %1 } @@ -156,34 +58,6 @@ define float @bfloat_to_float(bfloat %a) nounwind { ; RV64I-LP64: # %bb.0: ; RV64I-LP64-NEXT: slliw a0, a0, 16 ; RV64I-LP64-NEXT: ret -; -; RV32ID-ILP32-LABEL: bfloat_to_float: -; RV32ID-ILP32: # %bb.0: -; RV32ID-ILP32-NEXT: slli a0, a0, 16 -; RV32ID-ILP32-NEXT: ret -; -; RV64ID-LP64-LABEL: bfloat_to_float: -; RV64ID-LP64: # %bb.0: -; RV64ID-LP64-NEXT: slli a0, a0, 48 -; RV64ID-LP64-NEXT: srli a0, a0, 48 -; RV64ID-LP64-NEXT: slli a0, a0, 16 -; RV64ID-LP64-NEXT: ret -; -; RV32ID-ILP32D-LABEL: bfloat_to_float: -; RV32ID-ILP32D: # %bb.0: -; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 -; RV32ID-ILP32D-NEXT: slli a0, a0, 16 -; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 -; RV32ID-ILP32D-NEXT: ret -; -; RV64ID-LP64D-LABEL: bfloat_to_float: -; RV64ID-LP64D: # %bb.0: -; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 -; RV64ID-LP64D-NEXT: slli a0, a0, 48 -; RV64ID-LP64D-NEXT: srli a0, a0, 48 -; RV64ID-LP64D-NEXT: slli a0, a0, 16 -; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 -; RV64ID-LP64D-NEXT: ret %1 = fpext bfloat %a to float ret float %1 } @@ -208,46 +82,6 @@ define double @bfloat_to_double(bfloat %a) nounwind { ; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LP64-NEXT: addi sp, sp, 16 ; RV64I-LP64-NEXT: ret -; -; RV32ID-ILP32-LABEL: bfloat_to_double: -; RV32ID-ILP32: # %bb.0: -; RV32ID-ILP32-NEXT: addi sp, sp, -16 -; RV32ID-ILP32-NEXT: slli a0, a0, 16 -; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 -; RV32ID-ILP32-NEXT: fcvt.d.s fa5, fa5 -; RV32ID-ILP32-NEXT: fsd fa5, 8(sp) -; RV32ID-ILP32-NEXT: lw a0, 8(sp) -; RV32ID-ILP32-NEXT: lw a1, 12(sp) -; RV32ID-ILP32-NEXT: addi sp, sp, 16 -; RV32ID-ILP32-NEXT: ret -; -; RV64ID-LP64-LABEL: bfloat_to_double: -; RV64ID-LP64: # %bb.0: -; RV64ID-LP64-NEXT: slli a0, a0, 48 -; RV64ID-LP64-NEXT: srli a0, a0, 48 -; RV64ID-LP64-NEXT: slli a0, a0, 16 -; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 -; RV64ID-LP64-NEXT: fcvt.d.s fa5, fa5 -; RV64ID-LP64-NEXT: fmv.x.d a0, fa5 -; RV64ID-LP64-NEXT: ret -; -; RV32ID-ILP32D-LABEL: bfloat_to_double: -; RV32ID-ILP32D: # %bb.0: -; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 -; RV32ID-ILP32D-NEXT: slli a0, a0, 16 -; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a0 -; RV32ID-ILP32D-NEXT: fcvt.d.s fa0, fa5 -; RV32ID-ILP32D-NEXT: ret -; -; RV64ID-LP64D-LABEL: bfloat_to_double: -; RV64ID-LP64D: # %bb.0: -; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 -; RV64ID-LP64D-NEXT: slli a0, a0, 48 -; RV64ID-LP64D-NEXT: srli a0, a0, 48 -; RV64ID-LP64D-NEXT: slli a0, a0, 16 -; RV64ID-LP64D-NEXT: fmv.w.x fa5, a0 -; RV64ID-LP64D-NEXT: fcvt.d.s fa0, fa5 -; RV64ID-LP64D-NEXT: ret %1 = fpext bfloat %a to double ret double %1 } @@ -260,32 +94,6 @@ define bfloat @i16_to_bfloat(i16 %a) nounwind { ; RV64I-LP64-LABEL: i16_to_bfloat: ; RV64I-LP64: # %bb.0: ; RV64I-LP64-NEXT: ret -; -; RV32ID-ILP32-LABEL: i16_to_bfloat: -; RV32ID-ILP32: # %bb.0: -; RV32ID-ILP32-NEXT: lui a1, 1048560 -; RV32ID-ILP32-NEXT: or a0, a0, a1 -; RV32ID-ILP32-NEXT: ret -; -; RV64ID-LP64-LABEL: i16_to_bfloat: -; RV64ID-LP64: # %bb.0: -; RV64ID-LP64-NEXT: lui a1, 1048560 -; RV64ID-LP64-NEXT: or a0, a0, a1 -; RV64ID-LP64-NEXT: ret -; -; RV32ID-ILP32D-LABEL: i16_to_bfloat: -; RV32ID-ILP32D: # %bb.0: -; RV32ID-ILP32D-NEXT: lui a1, 1048560 -; RV32ID-ILP32D-NEXT: or a0, a0, a1 -; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 -; RV32ID-ILP32D-NEXT: ret -; -; RV64ID-LP64D-LABEL: i16_to_bfloat: -; RV64ID-LP64D: # %bb.0: -; RV64ID-LP64D-NEXT: lui a1, 1048560 -; RV64ID-LP64D-NEXT: or a0, a0, a1 -; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 -; RV64ID-LP64D-NEXT: ret %1 = bitcast i16 %a to bfloat ret bfloat %1 } @@ -298,24 +106,6 @@ define i16 @bfloat_to_i16(bfloat %a) nounwind { ; RV64I-LP64-LABEL: bfloat_to_i16: ; RV64I-LP64: # %bb.0: ; RV64I-LP64-NEXT: ret -; -; RV32ID-ILP32-LABEL: bfloat_to_i16: -; RV32ID-ILP32: # %bb.0: -; RV32ID-ILP32-NEXT: ret -; -; RV64ID-LP64-LABEL: bfloat_to_i16: -; RV64ID-LP64: # %bb.0: -; RV64ID-LP64-NEXT: ret -; -; RV32ID-ILP32D-LABEL: bfloat_to_i16: -; RV32ID-ILP32D: # %bb.0: -; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 -; RV32ID-ILP32D-NEXT: ret -; -; RV64ID-LP64D-LABEL: bfloat_to_i16: -; RV64ID-LP64D: # %bb.0: -; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 -; RV64ID-LP64D-NEXT: ret %1 = bitcast bfloat %a to i16 ret i16 %1 } @@ -344,88 +134,6 @@ define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind { ; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LP64-NEXT: addi sp, sp, 16 ; RV64I-LP64-NEXT: ret -; -; RV32ID-ILP32-LABEL: bfloat_add: -; RV32ID-ILP32: # %bb.0: -; RV32ID-ILP32-NEXT: addi sp, sp, -16 -; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: slli a1, a1, 16 -; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1 -; RV32ID-ILP32-NEXT: slli a0, a0, 16 -; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 -; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5 -; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5 -; RV32ID-ILP32-NEXT: call __truncsfbf2@plt -; RV32ID-ILP32-NEXT: lui a1, 1048560 -; RV32ID-ILP32-NEXT: or a0, a0, a1 -; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: addi sp, sp, 16 -; RV32ID-ILP32-NEXT: ret -; -; RV64ID-LP64-LABEL: bfloat_add: -; RV64ID-LP64: # %bb.0: -; RV64ID-LP64-NEXT: addi sp, sp, -16 -; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64ID-LP64-NEXT: lui a2, 16 -; RV64ID-LP64-NEXT: addiw a2, a2, -1 -; RV64ID-LP64-NEXT: and a0, a0, a2 -; RV64ID-LP64-NEXT: and a1, a1, a2 -; RV64ID-LP64-NEXT: slli a1, a1, 16 -; RV64ID-LP64-NEXT: fmv.w.x fa5, a1 -; RV64ID-LP64-NEXT: slli a0, a0, 16 -; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 -; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5 -; RV64ID-LP64-NEXT: fmv.x.w a0, fa5 -; RV64ID-LP64-NEXT: call __truncsfbf2@plt -; RV64ID-LP64-NEXT: lui a1, 1048560 -; RV64ID-LP64-NEXT: or a0, a0, a1 -; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64ID-LP64-NEXT: addi sp, sp, 16 -; RV64ID-LP64-NEXT: ret -; -; RV32ID-ILP32D-LABEL: bfloat_add: -; RV32ID-ILP32D: # %bb.0: -; RV32ID-ILP32D-NEXT: addi sp, sp, -16 -; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 -; RV32ID-ILP32D-NEXT: fmv.x.w a1, fa1 -; RV32ID-ILP32D-NEXT: slli a1, a1, 16 -; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1 -; RV32ID-ILP32D-NEXT: slli a0, a0, 16 -; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0 -; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5 -; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt -; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 -; RV32ID-ILP32D-NEXT: lui a1, 1048560 -; RV32ID-ILP32D-NEXT: or a0, a0, a1 -; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 -; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-ILP32D-NEXT: addi sp, sp, 16 -; RV32ID-ILP32D-NEXT: ret -; -; RV64ID-LP64D-LABEL: bfloat_add: -; RV64ID-LP64D: # %bb.0: -; RV64ID-LP64D-NEXT: addi sp, sp, -16 -; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 -; RV64ID-LP64D-NEXT: lui a1, 16 -; RV64ID-LP64D-NEXT: addiw a1, a1, -1 -; RV64ID-LP64D-NEXT: and a0, a0, a1 -; RV64ID-LP64D-NEXT: fmv.x.w a2, fa1 -; RV64ID-LP64D-NEXT: and a1, a2, a1 -; RV64ID-LP64D-NEXT: slli a1, a1, 16 -; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1 -; RV64ID-LP64D-NEXT: slli a0, a0, 16 -; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0 -; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5 -; RV64ID-LP64D-NEXT: call __truncsfbf2@plt -; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 -; RV64ID-LP64D-NEXT: lui a1, 1048560 -; RV64ID-LP64D-NEXT: or a0, a0, a1 -; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 -; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64ID-LP64D-NEXT: addi sp, sp, 16 -; RV64ID-LP64D-NEXT: ret %1 = fadd bfloat %a, %b ret bfloat %1 } @@ -458,84 +166,6 @@ define bfloat @bfloat_load(ptr %a) nounwind { ; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LP64-NEXT: addi sp, sp, 16 ; RV64I-LP64-NEXT: ret -; -; RV32ID-ILP32-LABEL: bfloat_load: -; RV32ID-ILP32: # %bb.0: -; RV32ID-ILP32-NEXT: addi sp, sp, -16 -; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: lhu a1, 6(a0) -; RV32ID-ILP32-NEXT: lhu a0, 0(a0) -; RV32ID-ILP32-NEXT: slli a1, a1, 16 -; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1 -; RV32ID-ILP32-NEXT: slli a0, a0, 16 -; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 -; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5 -; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5 -; RV32ID-ILP32-NEXT: call __truncsfbf2@plt -; RV32ID-ILP32-NEXT: lui a1, 1048560 -; RV32ID-ILP32-NEXT: or a0, a0, a1 -; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: addi sp, sp, 16 -; RV32ID-ILP32-NEXT: ret -; -; RV64ID-LP64-LABEL: bfloat_load: -; RV64ID-LP64: # %bb.0: -; RV64ID-LP64-NEXT: addi sp, sp, -16 -; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64ID-LP64-NEXT: lhu a1, 6(a0) -; RV64ID-LP64-NEXT: lhu a0, 0(a0) -; RV64ID-LP64-NEXT: slli a1, a1, 16 -; RV64ID-LP64-NEXT: fmv.w.x fa5, a1 -; RV64ID-LP64-NEXT: slli a0, a0, 16 -; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 -; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5 -; RV64ID-LP64-NEXT: fmv.x.w a0, fa5 -; RV64ID-LP64-NEXT: call __truncsfbf2@plt -; RV64ID-LP64-NEXT: lui a1, 1048560 -; RV64ID-LP64-NEXT: or a0, a0, a1 -; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64ID-LP64-NEXT: addi sp, sp, 16 -; RV64ID-LP64-NEXT: ret -; -; RV32ID-ILP32D-LABEL: bfloat_load: -; RV32ID-ILP32D: # %bb.0: -; RV32ID-ILP32D-NEXT: addi sp, sp, -16 -; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32D-NEXT: lhu a1, 6(a0) -; RV32ID-ILP32D-NEXT: lhu a0, 0(a0) -; RV32ID-ILP32D-NEXT: slli a1, a1, 16 -; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1 -; RV32ID-ILP32D-NEXT: slli a0, a0, 16 -; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0 -; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5 -; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt -; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 -; RV32ID-ILP32D-NEXT: lui a1, 1048560 -; RV32ID-ILP32D-NEXT: or a0, a0, a1 -; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 -; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-ILP32D-NEXT: addi sp, sp, 16 -; RV32ID-ILP32D-NEXT: ret -; -; RV64ID-LP64D-LABEL: bfloat_load: -; RV64ID-LP64D: # %bb.0: -; RV64ID-LP64D-NEXT: addi sp, sp, -16 -; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64ID-LP64D-NEXT: lhu a1, 6(a0) -; RV64ID-LP64D-NEXT: lhu a0, 0(a0) -; RV64ID-LP64D-NEXT: slli a1, a1, 16 -; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1 -; RV64ID-LP64D-NEXT: slli a0, a0, 16 -; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0 -; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5 -; RV64ID-LP64D-NEXT: call __truncsfbf2@plt -; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 -; RV64ID-LP64D-NEXT: lui a1, 1048560 -; RV64ID-LP64D-NEXT: or a0, a0, a1 -; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 -; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64ID-LP64D-NEXT: addi sp, sp, 16 -; RV64ID-LP64D-NEXT: ret %1 = load bfloat, ptr %a %2 = getelementptr bfloat, ptr %a, i32 3 %3 = load bfloat, ptr %2 @@ -577,98 +207,6 @@ define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind { ; RV64I-LP64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-LP64-NEXT: addi sp, sp, 16 ; RV64I-LP64-NEXT: ret -; -; RV32ID-ILP32-LABEL: bfloat_store: -; RV32ID-ILP32: # %bb.0: -; RV32ID-ILP32-NEXT: addi sp, sp, -16 -; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: mv s0, a0 -; RV32ID-ILP32-NEXT: slli a2, a2, 16 -; RV32ID-ILP32-NEXT: fmv.w.x fa5, a2 -; RV32ID-ILP32-NEXT: slli a1, a1, 16 -; RV32ID-ILP32-NEXT: fmv.w.x fa4, a1 -; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5 -; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5 -; RV32ID-ILP32-NEXT: call __truncsfbf2@plt -; RV32ID-ILP32-NEXT: sh a0, 0(s0) -; RV32ID-ILP32-NEXT: sh a0, 16(s0) -; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: addi sp, sp, 16 -; RV32ID-ILP32-NEXT: ret -; -; RV64ID-LP64-LABEL: bfloat_store: -; RV64ID-LP64: # %bb.0: -; RV64ID-LP64-NEXT: addi sp, sp, -16 -; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64ID-LP64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64ID-LP64-NEXT: mv s0, a0 -; RV64ID-LP64-NEXT: lui a0, 16 -; RV64ID-LP64-NEXT: addiw a0, a0, -1 -; RV64ID-LP64-NEXT: and a1, a1, a0 -; RV64ID-LP64-NEXT: and a0, a2, a0 -; RV64ID-LP64-NEXT: slli a0, a0, 16 -; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 -; RV64ID-LP64-NEXT: slli a1, a1, 16 -; RV64ID-LP64-NEXT: fmv.w.x fa4, a1 -; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5 -; RV64ID-LP64-NEXT: fmv.x.w a0, fa5 -; RV64ID-LP64-NEXT: call __truncsfbf2@plt -; RV64ID-LP64-NEXT: sh a0, 0(s0) -; RV64ID-LP64-NEXT: sh a0, 16(s0) -; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64ID-LP64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64ID-LP64-NEXT: addi sp, sp, 16 -; RV64ID-LP64-NEXT: ret -; -; RV32ID-ILP32D-LABEL: bfloat_store: -; RV32ID-ILP32D: # %bb.0: -; RV32ID-ILP32D-NEXT: addi sp, sp, -16 -; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32D-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32ID-ILP32D-NEXT: mv s0, a0 -; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 -; RV32ID-ILP32D-NEXT: fmv.x.w a1, fa1 -; RV32ID-ILP32D-NEXT: slli a1, a1, 16 -; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1 -; RV32ID-ILP32D-NEXT: slli a0, a0, 16 -; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0 -; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5 -; RV32ID-ILP32D-NEXT: call __truncsfbf2@plt -; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 -; RV32ID-ILP32D-NEXT: sh a0, 0(s0) -; RV32ID-ILP32D-NEXT: sh a0, 16(s0) -; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-ILP32D-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32ID-ILP32D-NEXT: addi sp, sp, 16 -; RV32ID-ILP32D-NEXT: ret -; -; RV64ID-LP64D-LABEL: bfloat_store: -; RV64ID-LP64D: # %bb.0: -; RV64ID-LP64D-NEXT: addi sp, sp, -16 -; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64ID-LP64D-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64ID-LP64D-NEXT: mv s0, a0 -; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 -; RV64ID-LP64D-NEXT: lui a1, 16 -; RV64ID-LP64D-NEXT: addiw a1, a1, -1 -; RV64ID-LP64D-NEXT: and a0, a0, a1 -; RV64ID-LP64D-NEXT: fmv.x.w a2, fa1 -; RV64ID-LP64D-NEXT: and a1, a2, a1 -; RV64ID-LP64D-NEXT: slli a1, a1, 16 -; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1 -; RV64ID-LP64D-NEXT: slli a0, a0, 16 -; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0 -; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5 -; RV64ID-LP64D-NEXT: call __truncsfbf2@plt -; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 -; RV64ID-LP64D-NEXT: sh a0, 0(s0) -; RV64ID-LP64D-NEXT: sh a0, 16(s0) -; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64ID-LP64D-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64ID-LP64D-NEXT: addi sp, sp, 16 -; RV64ID-LP64D-NEXT: ret %1 = fadd bfloat %b, %c store bfloat %1, ptr %a %2 = getelementptr bfloat, ptr %a, i32 8