diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 213311b96df74..6628e8f265fe4 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -325,6 +325,9 @@ BUILTIN(__builtin_amdgcn_read_exec_hi, "Ui", "nc") BUILTIN(__builtin_amdgcn_endpgm, "v", "nr") +BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n") +BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n") + //===----------------------------------------------------------------------===// // R600-NI only builtins. //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index cc30665a4eeee..20c3575793915 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18439,6 +18439,17 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::amdgcn_global_load_tr, {ArgTy}); return Builder.CreateCall(F, {Addr}); } + case AMDGPU::BI__builtin_amdgcn_get_fpenv: { + Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv, + {llvm::Type::getInt64Ty(getLLVMContext())}); + return Builder.CreateCall(F); + } + case AMDGPU::BI__builtin_amdgcn_set_fpenv: { + Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv, + {llvm::Type::getInt64Ty(getLLVMContext())}); + llvm::Value *Env = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall(F, {Env}); + } case AMDGPU::BI__builtin_amdgcn_read_exec: return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 561764edd0810..82a7c16106f6c 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5321,6 +5321,9 @@ bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, // position of memory order and scope arguments in the builtin unsigned OrderIndex, ScopeIndex; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_get_fpenv: + case AMDGPU::BI__builtin_amdgcn_set_fpenv: + return false; case AMDGPU::BI__builtin_amdgcn_atomic_inc32: case AMDGPU::BI__builtin_amdgcn_atomic_inc64: case AMDGPU::BI__builtin_amdgcn_atomic_dec32: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 7d9010ee9067d..8a4533633706b 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -839,6 +839,18 @@ unsigned test_wavefrontsize() { return __builtin_amdgcn_wavefrontsize(); } +// CHECK-LABEL test_get_fpenv( +unsigned long test_get_fpenv() { + // CHECK: call i64 @llvm.get.fpenv.i64() + return __builtin_amdgcn_get_fpenv(); +} + +// CHECK-LABEL test_set_fpenv( +void test_set_fpenv(unsigned long env) { + // CHECK: call void @llvm.set.fpenv.i64(i64 %[[ENV:.+]]) + __builtin_amdgcn_set_fpenv(env); +} + // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025} // CHECK-DAG: attributes #[[$NOUNWIND_READONLY]] = { convergent mustprogress nocallback nofree nounwind willreturn memory(none) } diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 0c588c8495862..ed41be4b08dee 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1151,6 +1151,13 @@ The AMDGPU backend implements the following LLVM IR intrinsics. register do not exactly match the FLT_ROUNDS values, so a conversion is performed. + :ref:`llvm.get.fpenv` Returns the current value of the AMDGPU floating point environment. + This stores information related to the current rounding mode, + denormalization mode, enabled traps, and floating point exceptions. + The format is a 64-bit concatenation of the MODE and TRAPSTS registers. + + :ref:`llvm.set.fpenv` Sets the floating point environment to the specifies state. + llvm.amdgcn.wave.reduce.umin Performs an arithmetic unsigned min reduction on the unsigned values provided by each lane in the wavefront. Intrinsic takes a hint for reduction strategy using second operand diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 0e3f6f8daabab..e0f301aa06dc1 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -26432,6 +26432,7 @@ similar to C library function 'fesetround', however this intrinsic does not return any value and uses platform-independent representation of IEEE rounding modes. +.. _int_get_fpenv: '``llvm.get.fpenv``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -26455,6 +26456,7 @@ Semantics: The '``llvm.get.fpenv``' intrinsic reads the current floating-point environment and returns it as an integer value. +.. _int_set_fpenv: '``llvm.set.fpenv``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 8ce6ee5cebb26..56abe7fe40270 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -70,6 +70,8 @@ Changes to the AArch64 Backend Changes to the AMDGPU Backend ----------------------------- +* Implemented the ``llvm.get.fpenv`` and ``llvm.set.fpenv`` intrinsics. + Changes to the ARM Backend -------------------------- diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 4c3b983f2960d..0029c51231f28 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -905,6 +905,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder(G_STACKRESTORE) .legalFor({PrivatePtr}); + getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV}).customFor({S64}); + getActionDefinitionsBuilder(G_GLOBAL_VALUE) .customIf(typeIsNot(0, PrivatePtr)); @@ -2128,6 +2130,10 @@ bool AMDGPULegalizerInfo::legalizeCustom( return legalizeFPTruncRound(MI, B); case TargetOpcode::G_STACKSAVE: return legalizeStackSave(MI, B); + case TargetOpcode::G_GET_FPENV: + return legalizeGetFPEnv(MI, MRI, B); + case TargetOpcode::G_SET_FPENV: + return legalizeSetFPEnv(MI, MRI, B); default: return false; } @@ -6940,6 +6946,52 @@ bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI, return true; } +static constexpr unsigned FPEnvModeBitField = + AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 0, 23); + +static constexpr unsigned FPEnvTrapBitField = + AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_TRAPSTS, 0, 5); + +bool AMDGPULegalizerInfo::legalizeGetFPEnv(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + Register Src = MI.getOperand(0).getReg(); + if (MRI.getType(Src) != S64) + return false; + + auto ModeReg = + B.buildIntrinsic(Intrinsic::amdgcn_s_getreg, {S32}, + /*HasSideEffects=*/true, /*isConvergent=*/false) + .addImm(FPEnvModeBitField); + auto TrapReg = + B.buildIntrinsic(Intrinsic::amdgcn_s_getreg, {S32}, + /*HasSideEffects=*/true, /*isConvergent=*/false) + .addImm(FPEnvTrapBitField); + B.buildMergeLikeInstr(Src, {ModeReg, TrapReg}); + MI.eraseFromParent(); + return true; +} + +bool AMDGPULegalizerInfo::legalizeSetFPEnv(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + Register Src = MI.getOperand(0).getReg(); + if (MRI.getType(Src) != S64) + return false; + + auto Unmerge = B.buildUnmerge({S32, S32}, MI.getOperand(0)); + B.buildIntrinsic(Intrinsic::amdgcn_s_setreg, ArrayRef(), + /*HasSideEffects=*/true, /*isConvergent=*/false) + .addImm(static_cast(FPEnvModeBitField)) + .addReg(Unmerge.getReg(0)); + B.buildIntrinsic(Intrinsic::amdgcn_s_setreg, ArrayRef(), + /*HasSideEffects=*/true, /*isConvergent=*/false) + .addImm(static_cast(FPEnvTrapBitField)) + .addReg(Unmerge.getReg(1)); + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { MachineIRBuilder &B = Helper.MIRBuilder; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index ecbe42681c669..9661646fffc9d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -214,6 +214,11 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeGetFPEnv(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeSetFPEnv(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeImageIntrinsic( MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 5f6ce12ae1fba..bf1463f93df6c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -877,6 +877,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); + setOperationAction(ISD::GET_FPENV, MVT::i64, Custom); + setOperationAction(ISD::SET_FPENV, MVT::i64, Custom); // TODO: Could move this to custom lowering, could benefit from combines on // extract of relevant bits. @@ -4081,6 +4083,72 @@ SDValue SITargetLowering::lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BF16_TO_FP, SL, DstVT, BitCast); } +SDValue SITargetLowering::lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + if (Op.getValueType() != MVT::i64) + return Op; + + uint32_t ModeHwReg = + AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 0, 23); + SDValue ModeHwRegImm = DAG.getTargetConstant(ModeHwReg, SL, MVT::i32); + uint32_t TrapHwReg = + AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_TRAPSTS, 0, 5); + SDValue TrapHwRegImm = DAG.getTargetConstant(TrapHwReg, SL, MVT::i32); + + SDVTList VTList = DAG.getVTList(MVT::i32, MVT::Other); + SDValue IntrinID = + DAG.getTargetConstant(Intrinsic::amdgcn_s_getreg, SL, MVT::i32); + SDValue GetModeReg = DAG.getNode(ISD::INTRINSIC_W_CHAIN, SL, VTList, + Op.getOperand(0), IntrinID, ModeHwRegImm); + SDValue GetTrapReg = DAG.getNode(ISD::INTRINSIC_W_CHAIN, SL, VTList, + Op.getOperand(0), IntrinID, TrapHwRegImm); + SDValue TokenReg = + DAG.getNode(ISD::TokenFactor, SL, MVT::Other, GetModeReg.getValue(1), + GetTrapReg.getValue(1)); + + SDValue CvtPtr = + DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, GetModeReg, GetTrapReg); + SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr); + + return DAG.getMergeValues({Result, TokenReg}, SL); +} + +SDValue SITargetLowering::lowerSET_FPENV(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + if (Op.getOperand(1).getValueType() != MVT::i64) + return Op; + + SDValue Input = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op.getOperand(1)); + SDValue NewModeReg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Input, + DAG.getConstant(0, SL, MVT::i32)); + SDValue NewTrapReg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Input, + DAG.getConstant(1, SL, MVT::i32)); + + SDValue ReadFirstLaneID = + DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, SL, MVT::i32); + NewModeReg = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, MVT::i32, + ReadFirstLaneID, NewModeReg); + NewTrapReg = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, MVT::i32, + ReadFirstLaneID, NewTrapReg); + + unsigned ModeHwReg = + AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 0, 23); + SDValue ModeHwRegImm = DAG.getTargetConstant(ModeHwReg, SL, MVT::i32); + unsigned TrapHwReg = + AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_TRAPSTS, 0, 5); + SDValue TrapHwRegImm = DAG.getTargetConstant(TrapHwReg, SL, MVT::i32); + + SDValue IntrinID = + DAG.getTargetConstant(Intrinsic::amdgcn_s_setreg, SL, MVT::i32); + SDValue SetModeReg = + DAG.getNode(ISD::INTRINSIC_VOID, SL, MVT::Other, Op.getOperand(0), + IntrinID, ModeHwRegImm, NewModeReg); + SDValue SetTrapReg = + DAG.getNode(ISD::INTRINSIC_VOID, SL, MVT::Other, Op.getOperand(0), + IntrinID, TrapHwRegImm, NewTrapReg); + return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, SetTrapReg, SetModeReg); +} + Register SITargetLowering::getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const { Register Reg = StringSwitch(RegName) @@ -5681,6 +5749,10 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: return lowerFP_EXTEND(Op, DAG); + case ISD::GET_FPENV: + return lowerGET_FPENV(Op, DAG); + case ISD::SET_FPENV: + return lowerSET_FPENV(Op, DAG); } return SDValue(); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index fc90a208fa0b3..a20442e3737ee 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -425,6 +425,8 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSET_FPENV(SDValue Op, SelectionDAG &DAG) const; Register getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const override; diff --git a/llvm/test/CodeGen/AMDGPU/fpenv.ll b/llvm/test/CodeGen/AMDGPU/fpenv.ll new file mode 100644 index 0000000000000..80d5c9ad0cebf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fpenv.ll @@ -0,0 +1,374 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn -global-isel=0 -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX6-SDAG %s +; RUN: llc -mtriple=amdgcn -global-isel=1 -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX6-ISEL %s +; RUN: llc -mtriple=amdgcn -global-isel=0 -mcpu=fiji < %s | FileCheck -check-prefixes=GFX8-SDAG %s +; RUN: llc -mtriple=amdgcn -global-isel=1 -mcpu=fiji < %s | FileCheck -check-prefixes=GFX8-ISEL %s +; RUN: llc -mtriple=amdgcn -global-isel=0 -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9-SDAG %s +; RUN: llc -mtriple=amdgcn -global-isel=1 -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9-ISEL %s +; RUN: llc -mtriple=amdgcn -global-isel=0 -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10-SDAG %s +; RUN: llc -mtriple=amdgcn -global-isel=1 -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10-ISEL %s +; RUN: llc -mtriple=amdgcn -amdgpu-enable-delay-alu=0 -global-isel=0 -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11-SDAG %s +; RUN: llc -mtriple=amdgcn -amdgpu-enable-delay-alu=0 -global-isel=1 -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11-ISEL %s + +declare i64 @llvm.get.fpenv.i64() + +declare void @llvm.set.fpenv.i64(i64) + +define i64 @get_fpenv() { +; GFX6-SDAG-LABEL: get_fpenv: +; GFX6-SDAG: ; %bb.0: ; %entry +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX6-SDAG-NEXT: s_getreg_b32 s5, hwreg(HW_REG_MODE, 0, 23) +; GFX6-SDAG-NEXT: v_mov_b32_e32 v0, s5 +; GFX6-SDAG-NEXT: v_mov_b32_e32 v1, s4 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-ISEL-LABEL: get_fpenv: +; GFX6-ISEL: ; %bb.0: ; %entry +; GFX6-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-ISEL-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 23) +; GFX6-ISEL-NEXT: s_getreg_b32 s5, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX6-ISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-ISEL-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: get_fpenv: +; GFX8-SDAG: ; %bb.0: ; %entry +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX8-SDAG-NEXT: s_getreg_b32 s5, hwreg(HW_REG_MODE, 0, 23) +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, s5 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, s4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-ISEL-LABEL: get_fpenv: +; GFX8-ISEL: ; %bb.0: ; %entry +; GFX8-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-ISEL-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 23) +; GFX8-ISEL-NEXT: s_getreg_b32 s5, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX8-ISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-ISEL-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: get_fpenv: +; GFX9-SDAG: ; %bb.0: ; %entry +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX9-SDAG-NEXT: s_getreg_b32 s5, hwreg(HW_REG_MODE, 0, 23) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s5 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-ISEL-LABEL: get_fpenv: +; GFX9-ISEL: ; %bb.0: ; %entry +; GFX9-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-ISEL-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 23) +; GFX9-ISEL-NEXT: s_getreg_b32 s5, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX9-ISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-ISEL-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: get_fpenv: +; GFX10-SDAG: ; %bb.0: ; %entry +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX10-SDAG-NEXT: s_getreg_b32 s5, hwreg(HW_REG_MODE, 0, 23) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v1, s4 +; GFX10-SDAG-NEXT: v_mov_b32_e32 v0, s5 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-ISEL-LABEL: get_fpenv: +; GFX10-ISEL: ; %bb.0: ; %entry +; GFX10-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-ISEL-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 23) +; GFX10-ISEL-NEXT: s_getreg_b32 s5, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX10-ISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-ISEL-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: get_fpenv: +; GFX11-SDAG: ; %bb.0: ; %entry +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_getreg_b32 s0, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX11-SDAG-NEXT: s_getreg_b32 s1, hwreg(HW_REG_MODE, 0, 23) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v0, s1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-ISEL-LABEL: get_fpenv: +; GFX11-ISEL: ; %bb.0: ; %entry +; GFX11-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-ISEL-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 23) +; GFX11-ISEL-NEXT: s_getreg_b32 s1, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX11-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-ISEL-NEXT: s_setpc_b64 s[30:31] +entry: + %0 = call i64 @llvm.get.fpenv.i64() + ret i64 %0 +} + +define void @set_fpenv(i64 %env) { +; GFX6-SDAG-LABEL: set_fpenv: +; GFX6-SDAG: ; %bb.0: ; %entry +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_readfirstlane_b32 s4, v0 +; GFX6-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s4 +; GFX6-SDAG-NEXT: v_readfirstlane_b32 s4, v1 +; GFX6-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s4 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-ISEL-LABEL: set_fpenv: +; GFX6-ISEL: ; %bb.0: ; %entry +; GFX6-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-ISEL-NEXT: v_readfirstlane_b32 s4, v0 +; GFX6-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s4 +; GFX6-ISEL-NEXT: v_readfirstlane_b32 s4, v1 +; GFX6-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s4 +; GFX6-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: set_fpenv: +; GFX8-SDAG: ; %bb.0: ; %entry +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_readfirstlane_b32 s4, v0 +; GFX8-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s4 +; GFX8-SDAG-NEXT: v_readfirstlane_b32 s4, v1 +; GFX8-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-ISEL-LABEL: set_fpenv: +; GFX8-ISEL: ; %bb.0: ; %entry +; GFX8-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-ISEL-NEXT: v_readfirstlane_b32 s4, v0 +; GFX8-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s4 +; GFX8-ISEL-NEXT: v_readfirstlane_b32 s4, v1 +; GFX8-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s4 +; GFX8-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: set_fpenv: +; GFX9-SDAG: ; %bb.0: ; %entry +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s4, v0 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s4 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s4, v1 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-ISEL-LABEL: set_fpenv: +; GFX9-ISEL: ; %bb.0: ; %entry +; GFX9-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-ISEL-NEXT: v_readfirstlane_b32 s4, v0 +; GFX9-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s4 +; GFX9-ISEL-NEXT: v_readfirstlane_b32 s4, v1 +; GFX9-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s4 +; GFX9-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: set_fpenv: +; GFX10-SDAG: ; %bb.0: ; %entry +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s4 +; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v1 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s4 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-ISEL-LABEL: set_fpenv: +; GFX10-ISEL: ; %bb.0: ; %entry +; GFX10-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-ISEL-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s4 +; GFX10-ISEL-NEXT: v_readfirstlane_b32 s4, v1 +; GFX10-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s4 +; GFX10-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: set_fpenv: +; GFX11-SDAG: ; %bb.0: ; %entry +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s0 +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v1 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-ISEL-LABEL: set_fpenv: +; GFX11-ISEL: ; %bb.0: ; %entry +; GFX11-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-ISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s0 +; GFX11-ISEL-NEXT: v_readfirstlane_b32 s0, v1 +; GFX11-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s0 +; GFX11-ISEL-NEXT: s_setpc_b64 s[30:31] +entry: + call void @llvm.set.fpenv.i64(i64 %env) + ret void +} + +define void @set_fpenv_constant() { +; GFX6-SDAG-LABEL: set_fpenv_constant: +; GFX6-SDAG: ; %bb.0: ; %entry +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 23), 0 +; GFX6-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 0, 5), 0 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-ISEL-LABEL: set_fpenv_constant: +; GFX6-ISEL: ; %bb.0: ; %entry +; GFX6-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-ISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 23), 0 +; GFX6-ISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 0, 5), 0 +; GFX6-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: set_fpenv_constant: +; GFX8-SDAG: ; %bb.0: ; %entry +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 23), 0 +; GFX8-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 0, 5), 0 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-ISEL-LABEL: set_fpenv_constant: +; GFX8-ISEL: ; %bb.0: ; %entry +; GFX8-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-ISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 23), 0 +; GFX8-ISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 0, 5), 0 +; GFX8-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: set_fpenv_constant: +; GFX9-SDAG: ; %bb.0: ; %entry +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 23), 0 +; GFX9-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 0, 5), 0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-ISEL-LABEL: set_fpenv_constant: +; GFX9-ISEL: ; %bb.0: ; %entry +; GFX9-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-ISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 23), 0 +; GFX9-ISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 0, 5), 0 +; GFX9-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: set_fpenv_constant: +; GFX10-SDAG: ; %bb.0: ; %entry +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 23), 0 +; GFX10-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 0, 5), 0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-ISEL-LABEL: set_fpenv_constant: +; GFX10-ISEL: ; %bb.0: ; %entry +; GFX10-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-ISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 23), 0 +; GFX10-ISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 0, 5), 0 +; GFX10-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: set_fpenv_constant: +; GFX11-SDAG: ; %bb.0: ; %entry +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 23), 0 +; GFX11-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 0, 5), 0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-ISEL-LABEL: set_fpenv_constant: +; GFX11-ISEL: ; %bb.0: ; %entry +; GFX11-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-ISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 23), 0 +; GFX11-ISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 0, 5), 0 +; GFX11-ISEL-NEXT: s_setpc_b64 s[30:31] +entry: + call void @llvm.set.fpenv.i64(i64 0) + ret void +} + +define void @get_set_fpenv() { +; GFX6-SDAG-LABEL: get_set_fpenv: +; GFX6-SDAG: ; %bb.0: ; %entry +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX6-SDAG-NEXT: s_getreg_b32 s5, hwreg(HW_REG_MODE, 0, 23) +; GFX6-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s5 +; GFX6-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s4 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-ISEL-LABEL: get_set_fpenv: +; GFX6-ISEL: ; %bb.0: ; %entry +; GFX6-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-ISEL-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 23) +; GFX6-ISEL-NEXT: s_getreg_b32 s5, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX6-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s4 +; GFX6-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s5 +; GFX6-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: get_set_fpenv: +; GFX8-SDAG: ; %bb.0: ; %entry +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX8-SDAG-NEXT: s_getreg_b32 s5, hwreg(HW_REG_MODE, 0, 23) +; GFX8-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s5 +; GFX8-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-ISEL-LABEL: get_set_fpenv: +; GFX8-ISEL: ; %bb.0: ; %entry +; GFX8-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-ISEL-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 23) +; GFX8-ISEL-NEXT: s_getreg_b32 s5, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX8-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s4 +; GFX8-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s5 +; GFX8-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: get_set_fpenv: +; GFX9-SDAG: ; %bb.0: ; %entry +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX9-SDAG-NEXT: s_getreg_b32 s5, hwreg(HW_REG_MODE, 0, 23) +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s5 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-ISEL-LABEL: get_set_fpenv: +; GFX9-ISEL: ; %bb.0: ; %entry +; GFX9-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-ISEL-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 23) +; GFX9-ISEL-NEXT: s_getreg_b32 s5, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX9-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s4 +; GFX9-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s5 +; GFX9-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: get_set_fpenv: +; GFX10-SDAG: ; %bb.0: ; %entry +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX10-SDAG-NEXT: s_getreg_b32 s5, hwreg(HW_REG_MODE, 0, 23) +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s5 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s4 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-ISEL-LABEL: get_set_fpenv: +; GFX10-ISEL: ; %bb.0: ; %entry +; GFX10-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-ISEL-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 23) +; GFX10-ISEL-NEXT: s_getreg_b32 s5, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX10-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s4 +; GFX10-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s5 +; GFX10-ISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: get_set_fpenv: +; GFX11-SDAG: ; %bb.0: ; %entry +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_getreg_b32 s0, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX11-SDAG-NEXT: s_getreg_b32 s1, hwreg(HW_REG_MODE, 0, 23) +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s1 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-ISEL-LABEL: get_set_fpenv: +; GFX11-ISEL: ; %bb.0: ; %entry +; GFX11-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-ISEL-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 23) +; GFX11-ISEL-NEXT: s_getreg_b32 s1, hwreg(HW_REG_TRAPSTS, 0, 5) +; GFX11-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 23), s0 +; GFX11-ISEL-NEXT: s_setreg_b32 hwreg(HW_REG_TRAPSTS, 0, 5), s1 +; GFX11-ISEL-NEXT: s_setpc_b64 s[30:31] +entry: + %0 = call i64 @llvm.get.fpenv.i64() + call void @llvm.set.fpenv.i64(i64 %0) + ret void +}