-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Lowering of fpmode intrinsics in DAG #80611
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Serge Pavlov (spavloff) ChangesLLVM intrinsics Full diff: https://github.com/llvm/llvm-project/pull/80611.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b59f8d7306046..3cb7761331df1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -798,6 +798,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
+ setOperationAction(ISD::GET_FPMODE, MVT::i32, Custom);
+ setOperationAction(ISD::SET_FPMODE, MVT::i32, Custom);
+ setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
@@ -4669,6 +4672,65 @@ SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
}
+SDValue AArch64TargetLowering::LowerGET_FPMODE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Chain = Op->getOperand(0);
+
+ // Get current value of FPCR.
+ SDValue Ops[] = {
+ Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
+ SDValue FPCR =
+ DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
+ Chain = FPCR.getValue(1);
+ FPCR = FPCR.getValue(0);
+
+ // Truncate FPCR to 32 bits.
+ SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPCR);
+
+ return DAG.getMergeValues({Result, Chain}, DL);
+}
+
+SDValue AArch64TargetLowering::LowerSET_FPMODE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Chain = Op->getOperand(0);
+ SDValue Mode = Op->getOperand(1);
+
+ // Extend the specified value to 64 bits.
+ SDValue FPCR = DAG.getZExtOrTrunc(Mode, DL, MVT::i64);
+
+ // Set new value of FPCR.
+ SDValue Ops2[] = {
+ Chain, DAG.getConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64), FPCR};
+ return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
+}
+
+SDValue AArch64TargetLowering::LowerRESET_FPMODE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Chain = Op->getOperand(0);
+
+ // Get current value of FPCR.
+ SDValue Ops[] = {
+ Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
+ SDValue FPCR =
+ DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
+ Chain = FPCR.getValue(1);
+ FPCR = FPCR.getValue(0);
+
+ // Clear bits that are not reserved.
+ SDValue FPSCRMasked = DAG.getNode(
+ ISD::AND, DL, MVT::i64, FPCR,
+ DAG.getConstant(AArch64::ReservedFPControlBits, DL, MVT::i64));
+
+ // Set new value of FPCR.
+ SDValue Ops2[] = {Chain,
+ DAG.getConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
+ FPSCRMasked};
+ return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
+}
+
static unsigned selectUmullSmull(SDValue &N0, SDValue &N1, SelectionDAG &DAG,
SDLoc DL, bool &IsMLA) {
bool IsN0SExt = isSignExtended(N0, DAG);
@@ -6297,6 +6359,12 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerGET_ROUNDING(Op, DAG);
case ISD::SET_ROUNDING:
return LowerSET_ROUNDING(Op, DAG);
+ case ISD::GET_FPMODE:
+ return LowerGET_FPMODE(Op, DAG);
+ case ISD::SET_FPMODE:
+ return LowerSET_FPMODE(Op, DAG);
+ case ISD::RESET_FPMODE:
+ return LowerRESET_FPMODE(Op, DAG);
case ISD::MUL:
return LowerMUL(Op, DAG);
case ISD::MULHS:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 436b21fd13463..205f6ad4d7c53 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -523,6 +523,9 @@ enum Rounding {
// Bit position of rounding mode bits in FPCR.
const unsigned RoundingBitsPos = 22;
+// Reserved bits should be preserved when modifying FPCR.
+const uint64_t ReservedFPControlBits = 0xfe0fe0f8;
+
// Registers used to pass function arguments.
ArrayRef<MCPhysReg> getGPRArgRegs();
ArrayRef<MCPhysReg> getFPRArgRegs();
@@ -1120,6 +1123,9 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/fpmode.ll b/llvm/test/CodeGen/AArch64/fpmode.ll
index ebfb0696a95ad..9ea5d081d1470 100644
--- a/llvm/test/CodeGen/AArch64/fpmode.ll
+++ b/llvm/test/CodeGen/AArch64/fpmode.ll
@@ -6,17 +6,14 @@ declare i32 @llvm.get.fpmode.i32()
declare void @llvm.set.fpmode.i32(i32 %fpmode)
declare void @llvm.reset.fpmode()
-define i32 @func_get_fpmode_soft() #0 {
-; DAG-LABEL: func_get_fpmode_soft:
+define i32 @func_get_fpmode() #0 {
+; DAG-LABEL: func_get_fpmode:
; DAG: // %bb.0: // %entry
-; DAG-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; DAG-NEXT: add x0, sp, #12
-; DAG-NEXT: bl fegetmode
-; DAG-NEXT: ldr w0, [sp, #12]
-; DAG-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; DAG-NEXT: mrs x0, FPCR
+; DAG-NEXT: // kill: def $w0 killed $w0 killed $x0
; DAG-NEXT: ret
;
-; GIS-LABEL: func_get_fpmode_soft:
+; GIS-LABEL: func_get_fpmode:
; GIS: // %bb.0: // %entry
; GIS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; GIS-NEXT: add x0, sp, #12
@@ -29,17 +26,14 @@ entry:
ret i32 %fpmode
}
-define void @func_set_fpmode_soft(i32 %fpmode) #0 {
-; DAG-LABEL: func_set_fpmode_soft:
+define void @func_set_fpmode(i32 %fpmode) #0 {
+; DAG-LABEL: func_set_fpmode:
; DAG: // %bb.0: // %entry
-; DAG-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; DAG-NEXT: str w0, [sp, #12]
-; DAG-NEXT: add x0, sp, #12
-; DAG-NEXT: bl fesetmode
-; DAG-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; DAG-NEXT: mov w8, w0
+; DAG-NEXT: msr FPCR, x8
; DAG-NEXT: ret
;
-; GIS-LABEL: func_set_fpmode_soft:
+; GIS-LABEL: func_set_fpmode:
; GIS: // %bb.0: // %entry
; GIS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; GIS-NEXT: str w0, [sp, #12]
@@ -52,16 +46,17 @@ entry:
ret void
}
-define void @func_reset_fpmode_soft() #0 {
-; DAG-LABEL: func_reset_fpmode_soft:
+define void @func_reset_fpmode() #0 {
+; DAG-LABEL: func_reset_fpmode:
; DAG: // %bb.0: // %entry
-; DAG-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; DAG-NEXT: mov x0, #-1 // =0xffffffffffffffff
-; DAG-NEXT: bl fesetmode
-; DAG-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; DAG-NEXT: mov w9, #57592 // =0xe0f8
+; DAG-NEXT: mrs x8, FPCR
+; DAG-NEXT: movk w9, #65039, lsl #16
+; DAG-NEXT: and x8, x8, x9
+; DAG-NEXT: msr FPCR, x8
; DAG-NEXT: ret
;
-; GIS-LABEL: func_reset_fpmode_soft:
+; GIS-LABEL: func_reset_fpmode:
; GIS: // %bb.0: // %entry
; GIS-NEXT: mov x0, #-1 // =0xffffffffffffffff
; GIS-NEXT: b fesetmode
@@ -70,4 +65,4 @@ entry:
ret void
}
-attributes #0 = { nounwind "use-soft-float"="true" }
+attributes #0 = { nounwind }
|
Ping. |
1 similar comment
Ping. |
Ping. |
Ping. |
LLVM intrinsics `get_fpmode`, `set_fpmode` and `reset_fpmode` operate control modes, the bits of FP environment that affect FP operations. On AArch64 these bits are in FPCR. The lowering implemented to produce code close to that of GLIBC.
Ping. |
LGTM. |
Thanks! |
LLVM intrinsics
get_fpmode
,set_fpmode
andreset_fpmode
operate control modes, the bits of FP environment that affect FP operations. On AArch64 these bits are in FPCR. The lowering implemented to produce code close to that of GLIBC.