openjdk · VladimirKempik · Sep 2, 2022
diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
@@ -133,7 +133,7 @@
     // Entry frames
     // n.b. these values are determined by the layout defined in
     // stubGenerator for the Java call stub
-    entry_frame_after_call_words                     =  22,
+    entry_frame_after_call_words                     =  34,
     entry_frame_call_wrapper_offset                  = -10,
 
     // we don't need a save area

diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
@@ -8936,7 +8936,7 @@ instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
   effect(USE lbl);
 
   ins_cost(XFER_COST + BRANCH_COST);
-  format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%}
+  format %{ "float_b$cmp $op1, $op2, $lbl \t#@cmpF_branch"%}
 
   ins_encode %{
     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
@@ -8953,7 +8953,7 @@ instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
   effect(USE lbl);
 
   ins_cost(XFER_COST + BRANCH_COST);
-  format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%}
+  format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%}
 
   ins_encode %{
     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
@@ -8971,7 +8971,7 @@ instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
   effect(USE lbl);
 
   ins_cost(XFER_COST + BRANCH_COST);
-  format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%}
+  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%}
 
   ins_encode %{
     __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
@@ -8989,7 +8989,7 @@ instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
   effect(USE lbl);
 
   ins_cost(XFER_COST + BRANCH_COST);
-  format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%}
+  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%}
 
   ins_encode %{
     __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
@@ -9264,7 +9264,7 @@ instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{
   effect(USE lbl);
 
   ins_cost(BRANCH_COST);
-  format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%}
+  format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%}
 
   ins_encode %{
     __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true);
@@ -9473,7 +9473,7 @@ instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
   effect(USE lbl);
 
   ins_cost(XFER_COST + BRANCH_COST * 2);
-  format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%}
+  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%}
 
   ins_encode %{
     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
@@ -9489,7 +9489,7 @@ instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
   effect(USE lbl);
 
   ins_cost(XFER_COST + BRANCH_COST * 2);
-  format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%}
+  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%}
 
   ins_encode %{
     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
@@ -9506,7 +9506,7 @@ instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
   effect(USE lbl);
 
   ins_cost(XFER_COST + BRANCH_COST * 2);
-  format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%}
+  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%}
 
   ins_encode %{
     __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
@@ -9522,7 +9522,7 @@ instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
   effect(USE lbl);
 
   ins_cost(XFER_COST + BRANCH_COST * 2);
-  format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%}
+  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%}
 
   ins_encode %{
     __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),

diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -119,16 +119,28 @@ class StubGenerator: public StubCodeGenerator {
   // we don't need to save x6-x7 and x28-x31 which both C and Java treat as
   // volatile
   //
-  // we save x18-x27 which Java uses as temporary registers and C
-  // expects to be callee-save
+  // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary
+  // registers and C expects to be callee-save
   //
   // so the stub frame looks like this when we enter Java code
   //
   //     [ return_from_Java     ] <--- sp
   //     [ argument word n      ]
   //      ...
-  // -22 [ argument word 1      ]
-  // -21 [ saved x27            ] <--- sp_after_call
+  // -34 [ argument word 1      ]
+  // -33 [ saved f27            ] <--- sp_after_call
+  // -32 [ saved f26            ]
+  // -31 [ saved f25            ]
+  // -30 [ saved f24            ]
+  // -29 [ saved f23            ]
+  // -28 [ saved f22            ]
+  // -27 [ saved f21            ]
+  // -26 [ saved f20            ]
+  // -25 [ saved f19            ]
+  // -24 [ saved f18            ]
+  // -23 [ saved f9             ]
+  // -22 [ saved f8             ]
+  // -21 [ saved x27            ]
   // -20 [ saved x26            ]
   // -19 [ saved x25            ]
   // -18 [ saved x24            ]
@@ -153,7 +165,20 @@ class StubGenerator: public StubCodeGenerator {
 
   // Call stub stack layout word offsets from fp
   enum call_stub_layout {
-    sp_after_call_off  = -21,
+    sp_after_call_off  = -33,
+
+    f27_off            = -33,
+    f26_off            = -32,
+    f25_off            = -31,
+    f24_off            = -30,
+    f23_off            = -29,
+    f22_off            = -28,
+    f21_off            = -27,
+    f20_off            = -26,
+    f19_off            = -25,
+    f18_off            = -24,
+    f9_off             = -23,
+    f8_off             = -22,
 
     x27_off            = -21,
     x26_off            = -20,
@@ -199,6 +224,19 @@ class StubGenerator: public StubCodeGenerator {
 
     const Address thread        (fp, thread_off         * wordSize);
 
+    const Address f27_save      (fp, f27_off            * wordSize);
+    const Address f26_save      (fp, f26_off            * wordSize);
+    const Address f25_save      (fp, f25_off            * wordSize);
+    const Address f24_save      (fp, f24_off            * wordSize);
+    const Address f23_save      (fp, f23_off            * wordSize);
+    const Address f22_save      (fp, f22_off            * wordSize);
+    const Address f21_save      (fp, f21_off            * wordSize);
+    const Address f20_save      (fp, f20_off            * wordSize);
+    const Address f19_save      (fp, f19_off            * wordSize);
+    const Address f18_save      (fp, f18_off            * wordSize);
+    const Address f9_save       (fp, f9_off             * wordSize);
+    const Address f8_save       (fp, f8_off             * wordSize);
+
     const Address x27_save      (fp, x27_off            * wordSize);
     const Address x26_save      (fp, x26_off            * wordSize);
     const Address x25_save      (fp, x25_off            * wordSize);
@@ -245,6 +283,19 @@ class StubGenerator: public StubCodeGenerator {
     __ sd(x26, x26_save);
     __ sd(x27, x27_save);
 
+    __ fsd(f8,  f8_save);
+    __ fsd(f9,  f9_save);
+    __ fsd(f18, f18_save);
+    __ fsd(f19, f19_save);
+    __ fsd(f20, f20_save);
+    __ fsd(f21, f21_save);
+    __ fsd(f22, f22_save);
+    __ fsd(f23, f23_save);
+    __ fsd(f24, f24_save);
+    __ fsd(f25, f25_save);
+    __ fsd(f26, f26_save);
+    __ fsd(f27, f27_save);
+
     // install Java thread in global register now we have saved
     // whatever value it held
     __ mv(xthread, c_rarg7);
@@ -336,6 +387,19 @@ class StubGenerator: public StubCodeGenerator {
 #endif
 
     // restore callee-save registers
+    __ fld(f27, f27_save);
+    __ fld(f26, f26_save);
+    __ fld(f25, f25_save);
+    __ fld(f24, f24_save);
+    __ fld(f23, f23_save);
+    __ fld(f22, f22_save);
+    __ fld(f21, f21_save);
+    __ fld(f20, f20_save);
+    __ fld(f19, f19_save);
+    __ fld(f18, f18_save);
+    __ fld(f9,  f9_save);
+    __ fld(f8,  f8_save);
+
     __ ld(x27, x27_save);
     __ ld(x26, x26_save);
     __ ld(x25, x25_save);

diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
@@ -40,15 +40,15 @@ void VMRegImpl::set_regName() {
   FloatRegister freg = ::as_FloatRegister(0);
   for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
     for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
-      regName[i++] = reg->name();
+      regName[i++] = freg->name();
     }
     freg = freg->successor();
   }
 
   VectorRegister vreg = ::as_VectorRegister(0);
   for ( ; i < ConcreteRegisterImpl::max_vpr ; ) {
     for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) {
-      regName[i++] = reg->name();
+      regName[i++] = vreg->name();
     }
     vreg = vreg->successor();
   }