[RISCV] Support register allocation for GHC when f/d is not specified…

… in the architecture This patch supports register allocation for floating-point types when `zfinx` and `zdinx` is specified in the architecture for the GHC calling convention. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D155910
llvm · Jul 24, 2023 · 78d91df · 78d91df
1 parent d6675b6
commit 78d91df
Show file tree

Hide file tree

Showing 2 changed files with 97 additions and 9 deletions.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15444,25 +15444,28 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
 bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
                          CCValAssign::LocInfo LocInfo,
                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
-
   if (ArgFlags.isNest()) {
     report_fatal_error(
         "Attribute 'nest' is not supported in GHC calling convention");
   }
 
+  static const MCPhysReg GPRList[] = {
+      RISCV::X9,  RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
+      RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
+
   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
-    static const MCPhysReg GPRList[] = {
-        RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
-        RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
     if (unsigned Reg = State.AllocateReg(GPRList)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
   }
 
-  if (LocVT == MVT::f32) {
+  const RISCVSubtarget &Subtarget =
+      State.getMachineFunction().getSubtarget<RISCVSubtarget>();
+
+  if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
     // Pass in STG registers: F1, ..., F6
     //                        fs0 ... fs5
     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
@@ -15474,7 +15477,7 @@ bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
     }
   }
 
-  if (LocVT == MVT::f64) {
+  if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
     // Pass in STG registers: D1, ..., D6
     //                        fs6 ... fs11
     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
@@ -15486,6 +15489,15 @@ bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
     }
   }
 
+  if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
+      (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
+       Subtarget.is64Bit())) {
+    if (unsigned Reg = State.AllocateReg(GPRList)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+  }
+
   report_fatal_error("No registers left in GHC calling convention");
   return true;
 }
@@ -15505,9 +15517,9 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
   case CallingConv::Fast:
     break;
   case CallingConv::GHC:
-    if (!Subtarget.hasStdExtF() || !Subtarget.hasStdExtD())
-      report_fatal_error(
-        "GHC calling convention requires the F and D instruction set extensions");
+    if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
+      report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
+                         "(Zdinx/D) instruction set extensions");
   }
 
   const Function &Func = MF.getFunction();

diff --git a/llvm/test/CodeGen/RISCV/ghccc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/ghccc-without-f-reg.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=riscv64 -mattr=+zfinx,+zdinx < %s | FileCheck %s
+
+; Check the GHC call convention works for zfinx, zdinx (rv64)
+
+@f1 = external global float
+@f2 = external global float
+@f3 = external global float
+@f4 = external global float
+@f5 = external global float
+@f6 = external global float
+
+define ghccc void @caller_float() nounwind {
+; CHECK-LABEL: caller_float:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a0, %hi(f6)
+; CHECK-NEXT:    lw s6, %lo(f6)(a0)
+; CHECK-NEXT:    lui a0, %hi(f5)
+; CHECK-NEXT:    lw s5, %lo(f5)(a0)
+; CHECK-NEXT:    lui a0, %hi(f4)
+; CHECK-NEXT:    lw s4, %lo(f4)(a0)
+; CHECK-NEXT:    lui a0, %hi(f3)
+; CHECK-NEXT:    lw s3, %lo(f3)(a0)
+; CHECK-NEXT:    lui a0, %hi(f2)
+; CHECK-NEXT:    lw s2, %lo(f2)(a0)
+; CHECK-NEXT:    lui a0, %hi(f1)
+; CHECK-NEXT:    lw s1, %lo(f1)(a0)
+; CHECK-NEXT:    tail callee_float@plt
+entry:
+  %0  = load float, ptr @f6
+  %1  = load float, ptr @f5
+  %2  = load float, ptr @f4
+  %3  = load float, ptr @f3
+  %4 = load float, ptr @f2
+  %5 = load float, ptr @f1
+  tail call ghccc void @callee_float(float %5, float %4, float %3, float %2, float %1, float %0) nounwind
+  ret void
+}
+
+declare ghccc void @callee_float(float, float, float, float, float, float)
+
+@d1 = external global double
+@d2 = external global double
+@d3 = external global double
+@d4 = external global double
+@d5 = external global double
+@d6 = external global double
+
+define ghccc void @caller_double() nounwind {
+; CHECK-LABEL: caller_double:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a0, %hi(d6)
+; CHECK-NEXT:    ld s6, %lo(d6)(a0)
+; CHECK-NEXT:    lui a0, %hi(d5)
+; CHECK-NEXT:    ld s5, %lo(d5)(a0)
+; CHECK-NEXT:    lui a0, %hi(d4)
+; CHECK-NEXT:    ld s4, %lo(d4)(a0)
+; CHECK-NEXT:    lui a0, %hi(d3)
+; CHECK-NEXT:    ld s3, %lo(d3)(a0)
+; CHECK-NEXT:    lui a0, %hi(d2)
+; CHECK-NEXT:    ld s2, %lo(d2)(a0)
+; CHECK-NEXT:    lui a0, %hi(d1)
+; CHECK-NEXT:    ld s1, %lo(d1)(a0)
+; CHECK-NEXT:    tail callee_double@plt
+entry:
+  %0  = load double, ptr @d6
+  %1  = load double, ptr @d5
+  %2  = load double, ptr @d4
+  %3  = load double, ptr @d3
+  %4 = load double, ptr @d2
+  %5 = load double, ptr @d1
+  tail call ghccc void @callee_double(double %5, double %4, double %3, double %2, double %1, double %0) nounwind
+  ret void
+}
+
+declare ghccc void @callee_double(double, double, double, double, double, double)