[AArch64] Lowering of fpmode intrinsics in DAG (#80611)

LLVM intrinsics `get_fpmode`, `set_fpmode` and `reset_fpmode` operate control modes, the bits of FP environment that affect FP operations. On AArch64 these bits are in FPCR. The lowering implemented to produce code close to that of GLIBC.
llvm · Apr 27, 2024 · 9e30c96 · 9e30c96
1 parent b2c9f7d
commit 9e30c96
Show file tree

Hide file tree

Showing 3 changed files with 93 additions and 24 deletions.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -842,6 +842,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
   setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
+  setOperationAction(ISD::GET_FPMODE, MVT::i32, Custom);
+  setOperationAction(ISD::SET_FPMODE, MVT::i32, Custom);
+  setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
 
   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
   if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
@@ -4870,6 +4873,65 @@ SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
   return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
 }
 
+SDValue AArch64TargetLowering::LowerGET_FPMODE(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Chain = Op->getOperand(0);
+
+  // Get current value of FPCR.
+  SDValue Ops[] = {
+      Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
+  SDValue FPCR =
+      DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
+  Chain = FPCR.getValue(1);
+  FPCR = FPCR.getValue(0);
+
+  // Truncate FPCR to 32 bits.
+  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPCR);
+
+  return DAG.getMergeValues({Result, Chain}, DL);
+}
+
+SDValue AArch64TargetLowering::LowerSET_FPMODE(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Chain = Op->getOperand(0);
+  SDValue Mode = Op->getOperand(1);
+
+  // Extend the specified value to 64 bits.
+  SDValue FPCR = DAG.getZExtOrTrunc(Mode, DL, MVT::i64);
+
+  // Set new value of FPCR.
+  SDValue Ops2[] = {
+      Chain, DAG.getConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64), FPCR};
+  return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
+}
+
+SDValue AArch64TargetLowering::LowerRESET_FPMODE(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Chain = Op->getOperand(0);
+
+  // Get current value of FPCR.
+  SDValue Ops[] = {
+      Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
+  SDValue FPCR =
+      DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
+  Chain = FPCR.getValue(1);
+  FPCR = FPCR.getValue(0);
+
+  // Clear bits that are not reserved.
+  SDValue FPSCRMasked = DAG.getNode(
+      ISD::AND, DL, MVT::i64, FPCR,
+      DAG.getConstant(AArch64::ReservedFPControlBits, DL, MVT::i64));
+
+  // Set new value of FPCR.
+  SDValue Ops2[] = {Chain,
+                    DAG.getConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
+                    FPSCRMasked};
+  return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
+}
+
 static unsigned selectUmullSmull(SDValue &N0, SDValue &N1, SelectionDAG &DAG,
                                  SDLoc DL, bool &IsMLA) {
   bool IsN0SExt = isSignExtended(N0, DAG);
@@ -6484,6 +6546,12 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerGET_ROUNDING(Op, DAG);
   case ISD::SET_ROUNDING:
     return LowerSET_ROUNDING(Op, DAG);
+  case ISD::GET_FPMODE:
+    return LowerGET_FPMODE(Op, DAG);
+  case ISD::SET_FPMODE:
+    return LowerSET_FPMODE(Op, DAG);
+  case ISD::RESET_FPMODE:
+    return LowerRESET_FPMODE(Op, DAG);
   case ISD::MUL:
     return LowerMUL(Op, DAG);
   case ISD::MULHS:

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -523,6 +523,9 @@ enum Rounding {
 // Bit position of rounding mode bits in FPCR.
 const unsigned RoundingBitsPos = 22;
 
+// Reserved bits should be preserved when modifying FPCR.
+const uint64_t ReservedFPControlBits = 0xfffffffff80040f8;
+
 // Registers used to pass function arguments.
 ArrayRef<MCPhysReg> getGPRArgRegs();
 ArrayRef<MCPhysReg> getFPRArgRegs();
@@ -1128,6 +1131,9 @@ class AArch64TargetLowering : public TargetLowering {
   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;

diff --git a/llvm/test/CodeGen/AArch64/fpmode.ll b/llvm/test/CodeGen/AArch64/fpmode.ll
@@ -6,17 +6,14 @@ declare i32 @llvm.get.fpmode.i32()
 declare void @llvm.set.fpmode.i32(i32 %fpmode)
 declare void @llvm.reset.fpmode()
 
-define i32 @func_get_fpmode_soft() #0 {
-; DAG-LABEL: func_get_fpmode_soft:
+define i32 @func_get_fpmode() #0 {
+; DAG-LABEL: func_get_fpmode:
 ; DAG:       // %bb.0: // %entry
-; DAG-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; DAG-NEXT:    add x0, sp, #12
-; DAG-NEXT:    bl fegetmode
-; DAG-NEXT:    ldr w0, [sp, #12]
-; DAG-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; DAG-NEXT:    mrs x0, FPCR
+; DAG-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; DAG-NEXT:    ret
 ;
-; GIS-LABEL: func_get_fpmode_soft:
+; GIS-LABEL: func_get_fpmode:
 ; GIS:       // %bb.0: // %entry
 ; GIS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; GIS-NEXT:    add x0, sp, #12
@@ -29,17 +26,14 @@ entry:
   ret i32 %fpmode
 }
 
-define void @func_set_fpmode_soft(i32 %fpmode) #0 {
-; DAG-LABEL: func_set_fpmode_soft:
+define void @func_set_fpmode(i32 %fpmode) #0 {
+; DAG-LABEL: func_set_fpmode:
 ; DAG:       // %bb.0: // %entry
-; DAG-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; DAG-NEXT:    str w0, [sp, #12]
-; DAG-NEXT:    add x0, sp, #12
-; DAG-NEXT:    bl fesetmode
-; DAG-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; DAG-NEXT:    mov w8, w0
+; DAG-NEXT:    msr FPCR, x8
 ; DAG-NEXT:    ret
 ;
-; GIS-LABEL: func_set_fpmode_soft:
+; GIS-LABEL: func_set_fpmode:
 ; GIS:       // %bb.0: // %entry
 ; GIS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; GIS-NEXT:    str w0, [sp, #12]
@@ -52,16 +46,17 @@ entry:
   ret void
 }
 
-define void @func_reset_fpmode_soft() #0 {
-; DAG-LABEL: func_reset_fpmode_soft:
+define void @func_reset_fpmode() #0 {
+; DAG-LABEL: func_reset_fpmode:
 ; DAG:       // %bb.0: // %entry
-; DAG-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; DAG-NEXT:    mov x0, #-1 // =0xffffffffffffffff
-; DAG-NEXT:    bl fesetmode
-; DAG-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; DAG-NEXT:    mov x9, #-48904 // =0xffffffffffff40f8
+; DAG-NEXT:    mrs x8, FPCR
+; DAG-NEXT:    movk x9, #63488, lsl #16
+; DAG-NEXT:    and x8, x8, x9
+; DAG-NEXT:    msr FPCR, x8
 ; DAG-NEXT:    ret
 ;
-; GIS-LABEL: func_reset_fpmode_soft:
+; GIS-LABEL: func_reset_fpmode:
 ; GIS:       // %bb.0: // %entry
 ; GIS-NEXT:    mov x0, #-1 // =0xffffffffffffffff
 ; GIS-NEXT:    b fesetmode
@@ -70,4 +65,4 @@ entry:
   ret void
 }
 
-attributes #0 = { nounwind "use-soft-float"="true" }
+attributes #0 = { nounwind }