Skip to content

Commit 5183d8a

Browse files
committed
8260355: AArch64: deoptimization stub should save vector registers
Reviewed-by: vlivanov, aph
1 parent 5d8204b commit 5183d8a

File tree

12 files changed

+182
-75
lines changed

12 files changed

+182
-75
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
3+
* Copyright (c) 2021, Arm Limited. All rights reserved.
4+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5+
*
6+
* This code is free software; you can redistribute it and/or modify it
7+
* under the terms of the GNU General Public License version 2 only, as
8+
* published by the Free Software Foundation.
9+
*
10+
* This code is distributed in the hope that it will be useful, but WITHOUT
11+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13+
* version 2 for more details (a copy is included in the LICENSE file that
14+
* accompanied this code).
15+
*
16+
* You should have received a copy of the GNU General Public License version
17+
* 2 along with this work; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19+
*
20+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21+
* or visit www.oracle.com if you need additional information or have any
22+
* questions.
23+
*/
24+
25+
#include "precompiled.hpp"
26+
#include "runtime/registerMap.hpp"
27+
#include "vmreg_aarch64.inline.hpp"
28+
29+
address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const {
30+
if (base_reg->is_FloatRegister()) {
31+
// Not all physical slots of an SVE register have corresponding
32+
// VMRegs. However they are always saved to the stack in a
33+
// contiguous region of memory so we can calculate the address of
34+
// the upper slots by offsetting from the base address.
35+
assert(base_reg->is_concrete(), "must pass base reg");
36+
int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_gpr) /
37+
FloatRegisterImpl::max_slots_per_register;
38+
intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size;
39+
address base_location = location(base_reg);
40+
if (base_location != NULL) {
41+
return base_location + offset_in_bytes;
42+
} else {
43+
return NULL;
44+
}
45+
} else {
46+
return location(base_reg->next(slot_idx));
47+
}
48+
}

src/hotspot/cpu/aarch64/registerMap_aarch64.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
33
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
44
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
*
@@ -32,8 +32,8 @@
3232
private:
3333
// This is the hook for finding a register in an "well-known" location,
3434
// such as a register block of a predetermined format.
35-
// Since there is none, we just return NULL.
36-
address pd_location(VMReg reg) const {return NULL;}
35+
address pd_location(VMReg reg) const { return NULL; }
36+
address pd_location(VMReg base_reg, int slot_idx) const;
3737

3838
// no PD state to clear or copy:
3939
void pd_clear() {}

src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp

Lines changed: 72 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -85,26 +85,24 @@ class SimpleRuntimeFrame {
8585

8686
// FIXME -- this is used by C1
8787
class RegisterSaver {
88+
const bool _save_vectors;
8889
public:
89-
static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
90-
static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
90+
RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {}
91+
92+
OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
93+
void restore_live_registers(MacroAssembler* masm);
9194

9295
// Offsets into the register save area
9396
// Used by deoptimization when it is managing result register
9497
// values on its own
9598

96-
static int r0_offset_in_bytes(void) { return (32 + r0->encoding()) * wordSize; }
97-
static int reg_offset_in_bytes(Register r) { return r0_offset_in_bytes() + r->encoding() * wordSize; }
98-
static int rmethod_offset_in_bytes(void) { return reg_offset_in_bytes(rmethod); }
99-
static int rscratch1_offset_in_bytes(void) { return (32 + rscratch1->encoding()) * wordSize; }
100-
static int v0_offset_in_bytes(void) { return 0; }
101-
static int return_offset_in_bytes(void) { return (32 /* floats*/ + 31 /* gregs*/) * wordSize; }
102-
103-
// During deoptimization only the result registers need to be restored,
104-
// all the other values have already been extracted.
105-
static void restore_result_registers(MacroAssembler* masm);
99+
int reg_offset_in_bytes(Register r);
100+
int r0_offset_in_bytes() { return reg_offset_in_bytes(r0); }
101+
int rscratch1_offset_in_bytes() { return reg_offset_in_bytes(rscratch1); }
102+
int v0_offset_in_bytes(void) { return 0; }
106103

107-
// Capture info about frame layout
104+
// Capture info about frame layout
105+
// Note this is only correct when not saving full vectors.
108106
enum layout {
109107
fpu_state_off = 0,
110108
fpu_state_end = fpu_state_off + FPUStateSizeInWords - 1,
@@ -119,7 +117,31 @@ class RegisterSaver {
119117

120118
};
121119

122-
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
120+
int RegisterSaver::reg_offset_in_bytes(Register r) {
121+
// The integer registers are located above the floating point
122+
// registers in the stack frame pushed by save_live_registers() so the
123+
// offset depends on whether we are saving full vectors, and whether
124+
// those vectors are NEON or SVE.
125+
126+
int slots_per_vect = FloatRegisterImpl::save_slots_per_register;
127+
128+
#if COMPILER2_OR_JVMCI
129+
if (_save_vectors) {
130+
slots_per_vect = FloatRegisterImpl::slots_per_neon_register;
131+
132+
#ifdef COMPILER2
133+
if (Matcher::supports_scalable_vector()) {
134+
slots_per_vect = Matcher::scalable_vector_reg_size(T_FLOAT);
135+
}
136+
#endif
137+
}
138+
#endif
139+
140+
int r0_offset = (slots_per_vect * FloatRegisterImpl::number_of_registers) * BytesPerInt;
141+
return r0_offset + r->encoding() * wordSize;
142+
}
143+
144+
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
123145
bool use_sve = false;
124146
int sve_vector_size_in_bytes = 0;
125147
int sve_vector_size_in_slots = 0;
@@ -131,7 +153,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
131153
#endif
132154

133155
#if COMPILER2_OR_JVMCI
134-
if (save_vectors) {
156+
if (_save_vectors) {
135157
int vect_words = 0;
136158
int extra_save_slots_per_register = 0;
137159
// Save upper half of vector registers
@@ -145,7 +167,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
145167
additional_frame_words += vect_words;
146168
}
147169
#else
148-
assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
170+
assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
149171
#endif
150172

151173
int frame_size_in_bytes = align_up(additional_frame_words * wordSize +
@@ -160,7 +182,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
160182

161183
// Save Integer and Float registers.
162184
__ enter();
163-
__ push_CPU_state(save_vectors, use_sve, sve_vector_size_in_bytes);
185+
__ push_CPU_state(_save_vectors, use_sve, sve_vector_size_in_bytes);
164186

165187
// Set an oopmap for the call site. This oopmap will map all
166188
// oop-registers and debug-info registers as callee-saved. This
@@ -185,7 +207,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
185207
for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
186208
FloatRegister r = as_FloatRegister(i);
187209
int sp_offset = 0;
188-
if (save_vectors) {
210+
if (_save_vectors) {
189211
sp_offset = use_sve ? (sve_vector_size_in_slots * i) :
190212
(FloatRegisterImpl::slots_per_neon_register * i);
191213
} else {
@@ -198,37 +220,20 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
198220
return oop_map;
199221
}
200222

201-
void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
223+
void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
202224
#ifdef COMPILER2
203-
__ pop_CPU_state(restore_vectors, Matcher::supports_scalable_vector(),
225+
__ pop_CPU_state(_save_vectors, Matcher::supports_scalable_vector(),
204226
Matcher::scalable_vector_reg_size(T_BYTE));
205227
#else
206228
#if !INCLUDE_JVMCI
207-
assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
229+
assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
208230
#endif
209-
__ pop_CPU_state(restore_vectors);
231+
__ pop_CPU_state(_save_vectors);
210232
#endif
211233
__ leave();
212234

213235
}
214236

215-
void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
216-
217-
// Just restore result register. Only used by deoptimization. By
218-
// now any callee save register that needs to be restored to a c2
219-
// caller of the deoptee has been extracted into the vframeArray
220-
// and will be stuffed into the c2i adapter we create for later
221-
// restoration so only result registers need to be restored here.
222-
223-
// Restore fp result register
224-
__ ldrd(v0, Address(sp, v0_offset_in_bytes()));
225-
// Restore integer result register
226-
__ ldr(r0, Address(sp, r0_offset_in_bytes()));
227-
228-
// Pop all of the register save are off the stack
229-
__ add(sp, sp, align_up(return_offset_in_bytes(), 16));
230-
}
231-
232237
// Is vector's size (in bytes) bigger than a size saved by default?
233238
// 8 bytes vector registers are saved by default on AArch64.
234239
bool SharedRuntime::is_wide_vector(int size) {
@@ -2164,6 +2169,7 @@ void SharedRuntime::generate_deopt_blob() {
21642169
int frame_size_in_words;
21652170
OopMap* map = NULL;
21662171
OopMapSet *oop_maps = new OopMapSet();
2172+
RegisterSaver reg_save(COMPILER2_OR_JVMCI != 0);
21672173

21682174
// -------------
21692175
// This code enters when returning to a de-optimized nmethod. A return
@@ -2201,7 +2207,7 @@ void SharedRuntime::generate_deopt_blob() {
22012207
// Prolog for non exception case!
22022208

22032209
// Save everything in sight.
2204-
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2210+
map = reg_save.save_live_registers(masm, 0, &frame_size_in_words);
22052211

22062212
// Normal deoptimization. Save exec mode for unpack_frames.
22072213
__ movw(rcpool, Deoptimization::Unpack_deopt); // callee-saved
@@ -2219,7 +2225,7 @@ void SharedRuntime::generate_deopt_blob() {
22192225
// return address is the pc describes what bci to do re-execute at
22202226

22212227
// No need to update map as each call to save_live_registers will produce identical oopmap
2222-
(void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2228+
(void) reg_save.save_live_registers(masm, 0, &frame_size_in_words);
22232229

22242230
__ movw(rcpool, Deoptimization::Unpack_reexecute); // callee-saved
22252231
__ b(cont);
@@ -2238,7 +2244,7 @@ void SharedRuntime::generate_deopt_blob() {
22382244
uncommon_trap_offset = __ pc() - start;
22392245

22402246
// Save everything in sight.
2241-
RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2247+
reg_save.save_live_registers(masm, 0, &frame_size_in_words);
22422248
// fetch_unroll_info needs to call last_java_frame()
22432249
Label retaddr;
22442250
__ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
@@ -2295,7 +2301,7 @@ void SharedRuntime::generate_deopt_blob() {
22952301
// This is a somewhat fragile mechanism.
22962302

22972303
// Save everything in sight.
2298-
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2304+
map = reg_save.save_live_registers(masm, 0, &frame_size_in_words);
22992305

23002306
// Now it is safe to overwrite any register
23012307

@@ -2376,7 +2382,7 @@ void SharedRuntime::generate_deopt_blob() {
23762382
__ verify_oop(r0);
23772383

23782384
// Overwrite the result registers with the exception results.
2379-
__ str(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2385+
__ str(r0, Address(sp, reg_save.r0_offset_in_bytes()));
23802386
// I think this is useless
23812387
// __ str(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
23822388

@@ -2385,7 +2391,14 @@ void SharedRuntime::generate_deopt_blob() {
23852391
// Only register save data is on the stack.
23862392
// Now restore the result registers. Everything else is either dead
23872393
// or captured in the vframeArray.
2388-
RegisterSaver::restore_result_registers(masm);
2394+
2395+
// Restore fp result register
2396+
__ ldrd(v0, Address(sp, reg_save.v0_offset_in_bytes()));
2397+
// Restore integer result register
2398+
__ ldr(r0, Address(sp, reg_save.r0_offset_in_bytes()));
2399+
2400+
// Pop all of the register save area off the stack
2401+
__ add(sp, sp, frame_size_in_words * wordSize);
23892402

23902403
// All of the register save area has been popped of the stack. Only the
23912404
// return address remains.
@@ -2466,8 +2479,8 @@ void SharedRuntime::generate_deopt_blob() {
24662479
__ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
24672480

24682481
// Restore frame locals after moving the frame
2469-
__ strd(v0, Address(sp, RegisterSaver::v0_offset_in_bytes()));
2470-
__ str(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2482+
__ strd(v0, Address(sp, reg_save.v0_offset_in_bytes()));
2483+
__ str(r0, Address(sp, reg_save.r0_offset_in_bytes()));
24712484

24722485
// Call C code. Need thread but NOT official VM entry
24732486
// crud. We cannot block on this call, no GC can happen. Call should
@@ -2494,8 +2507,8 @@ void SharedRuntime::generate_deopt_blob() {
24942507
__ reset_last_Java_frame(true);
24952508

24962509
// Collect return values
2497-
__ ldrd(v0, Address(sp, RegisterSaver::v0_offset_in_bytes()));
2498-
__ ldr(r0, Address(sp, RegisterSaver::r0_offset_in_bytes()));
2510+
__ ldrd(v0, Address(sp, reg_save.v0_offset_in_bytes()));
2511+
__ ldr(r0, Address(sp, reg_save.r0_offset_in_bytes()));
24992512
// I think this is useless (throwing pc?)
25002513
// __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
25012514

@@ -2741,10 +2754,10 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
27412754
address call_pc = NULL;
27422755
int frame_size_in_words;
27432756
bool cause_return = (poll_type == POLL_AT_RETURN);
2744-
bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
2757+
RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
27452758

27462759
// Save Integer and Float registers.
2747-
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors);
2760+
map = reg_save.save_live_registers(masm, 0, &frame_size_in_words);
27482761

27492762
// The following is basically a call_VM. However, we need the precise
27502763
// address of the call in order to generate an oopmap. Hence, we do all the
@@ -2789,7 +2802,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
27892802

27902803
// Exception pending
27912804

2792-
RegisterSaver::restore_live_registers(masm, save_vectors);
2805+
reg_save.restore_live_registers(masm);
27932806

27942807
__ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
27952808

@@ -2821,7 +2834,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
28212834

28222835
__ bind(no_adjust);
28232836
// Normal exit, restore registers and exit.
2824-
RegisterSaver::restore_live_registers(masm, save_vectors);
2837+
reg_save.restore_live_registers(masm);
28252838

28262839
__ ret(lr);
28272840

@@ -2855,13 +2868,14 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
28552868
MacroAssembler* masm = new MacroAssembler(&buffer);
28562869

28572870
int frame_size_in_words;
2871+
RegisterSaver reg_save(false /* save_vectors */);
28582872

28592873
OopMapSet *oop_maps = new OopMapSet();
28602874
OopMap* map = NULL;
28612875

28622876
int start = __ offset();
28632877

2864-
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2878+
map = reg_save.save_live_registers(masm, 0, &frame_size_in_words);
28652879

28662880
int frame_complete = __ offset();
28672881

@@ -2893,11 +2907,11 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
28932907

28942908
// get the returned Method*
28952909
__ get_vm_result_2(rmethod, rthread);
2896-
__ str(rmethod, Address(sp, RegisterSaver::reg_offset_in_bytes(rmethod)));
2910+
__ str(rmethod, Address(sp, reg_save.reg_offset_in_bytes(rmethod)));
28972911

28982912
// r0 is where we want to jump, overwrite rscratch1 which is saved and scratch
2899-
__ str(r0, Address(sp, RegisterSaver::rscratch1_offset_in_bytes()));
2900-
RegisterSaver::restore_live_registers(masm);
2913+
__ str(r0, Address(sp, reg_save.rscratch1_offset_in_bytes()));
2914+
reg_save.restore_live_registers(masm);
29012915

29022916
// We are back the the original state on entry and ready to go.
29032917

@@ -2907,7 +2921,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
29072921

29082922
__ bind(pending);
29092923

2910-
RegisterSaver::restore_live_registers(masm);
2924+
reg_save.restore_live_registers(masm);
29112925

29122926
// exception pending => remove activation and forward to exception handler
29132927

0 commit comments

Comments
 (0)