Skip to content

Commit ab509f1

Browse files
committed
8337654: Relocate uncommon trap stub from SharedRuntime to OptoRuntime
Reviewed-by: kvn, vlivanov, fyang
1 parent 1348ece commit ab509f1

22 files changed

+1378
-1371
lines changed

src/hotspot/cpu/aarch64/runtime_aarch64.cpp

Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,349 @@
3636
#include "runtime/vframeArray.hpp"
3737
#include "utilities/globalDefinitions.hpp"
3838
#include "vmreg_aarch64.inline.hpp"
39+
40+
class SimpleRuntimeFrame {
41+
42+
public:
43+
44+
// Most of the runtime stubs have this simple frame layout.
45+
// This class exists to make the layout shared in one place.
46+
// Offsets are for compiler stack slots, which are jints.
47+
enum layout {
48+
// The frame sender code expects that rbp will be in the "natural" place and
49+
// will override any oopMap setting for it. We must therefore force the layout
50+
// so that it agrees with the frame sender code.
51+
// we don't expect any arg reg save area so aarch64 asserts that
52+
// frame::arg_reg_save_area_bytes == 0
53+
rfp_off = 0,
54+
rfp_off2,
55+
return_off, return_off2,
56+
framesize
57+
};
58+
};
59+
60+
#define __ masm->
61+
62+
//------------------------------generate_uncommon_trap_blob--------------------
63+
void OptoRuntime::generate_uncommon_trap_blob() {
64+
// Allocate space for the code
65+
ResourceMark rm;
66+
// Setup code generation tools
67+
CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
68+
MacroAssembler* masm = new MacroAssembler(&buffer);
69+
70+
assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
71+
72+
address start = __ pc();
73+
74+
// Push self-frame. We get here with a return address in LR
75+
// and sp should be 16 byte aligned
76+
// push rfp and retaddr by hand
77+
__ protect_return_address();
78+
__ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize)));
79+
// we don't expect an arg reg save area
80+
#ifndef PRODUCT
81+
assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
82+
#endif
83+
// compiler left unloaded_class_index in j_rarg0 move to where the
84+
// runtime expects it.
85+
if (c_rarg1 != j_rarg0) {
86+
__ movw(c_rarg1, j_rarg0);
87+
}
88+
89+
// we need to set the past SP to the stack pointer of the stub frame
90+
// and the pc to the address where this runtime call will return
91+
// although actually any pc in this code blob will do).
92+
Label retaddr;
93+
__ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
94+
95+
// Call C code. Need thread but NOT official VM entry
96+
// crud. We cannot block on this call, no GC can happen. Call should
97+
// capture callee-saved registers as well as return values.
98+
//
99+
// UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index);
100+
//
101+
// n.b. 2 gp args, 0 fp args, integral return type
102+
103+
__ mov(c_rarg0, rthread);
104+
__ movw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
105+
__ lea(rscratch1,
106+
RuntimeAddress(CAST_FROM_FN_PTR(address,
107+
Deoptimization::uncommon_trap)));
108+
__ blr(rscratch1);
109+
__ bind(retaddr);
110+
111+
// Set an oopmap for the call site
112+
OopMapSet* oop_maps = new OopMapSet();
113+
OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
114+
115+
// location of rfp is known implicitly by the frame sender code
116+
117+
oop_maps->add_gc_map(__ pc() - start, map);
118+
119+
__ reset_last_Java_frame(false);
120+
121+
// move UnrollBlock* into r4
122+
__ mov(r4, r0);
123+
124+
#ifdef ASSERT
125+
{ Label L;
126+
__ ldrw(rscratch1, Address(r4, Deoptimization::UnrollBlock::unpack_kind_offset()));
127+
__ cmpw(rscratch1, (unsigned)Deoptimization::Unpack_uncommon_trap);
128+
__ br(Assembler::EQ, L);
129+
__ stop("OptoRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
130+
__ bind(L);
131+
}
132+
#endif
133+
134+
// Pop all the frames we must move/replace.
135+
//
136+
// Frame picture (youngest to oldest)
137+
// 1: self-frame (no frame link)
138+
// 2: deopting frame (no frame link)
139+
// 3: caller of deopting frame (could be compiled/interpreted).
140+
141+
// Pop self-frame. We have no frame, and must rely only on r0 and sp.
142+
__ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog!
143+
144+
// Pop deoptimized frame (int)
145+
__ ldrw(r2, Address(r4,
146+
Deoptimization::UnrollBlock::
147+
size_of_deoptimized_frame_offset()));
148+
__ sub(r2, r2, 2 * wordSize);
149+
__ add(sp, sp, r2);
150+
__ ldp(rfp, zr, __ post(sp, 2 * wordSize));
151+
152+
#ifdef ASSERT
153+
// Compilers generate code that bang the stack by as much as the
154+
// interpreter would need. So this stack banging should never
155+
// trigger a fault. Verify that it does not on non product builds.
156+
__ ldrw(r1, Address(r4,
157+
Deoptimization::UnrollBlock::
158+
total_frame_sizes_offset()));
159+
__ bang_stack_size(r1, r2);
160+
#endif
161+
162+
// Load address of array of frame pcs into r2 (address*)
163+
__ ldr(r2, Address(r4,
164+
Deoptimization::UnrollBlock::frame_pcs_offset()));
165+
166+
// Load address of array of frame sizes into r5 (intptr_t*)
167+
__ ldr(r5, Address(r4,
168+
Deoptimization::UnrollBlock::
169+
frame_sizes_offset()));
170+
171+
// Counter
172+
__ ldrw(r3, Address(r4,
173+
Deoptimization::UnrollBlock::
174+
number_of_frames_offset())); // (int)
175+
176+
// Now adjust the caller's stack to make up for the extra locals but
177+
// record the original sp so that we can save it in the skeletal
178+
// interpreter frame and the stack walking of interpreter_sender
179+
// will get the unextended sp value and not the "real" sp value.
180+
181+
const Register sender_sp = r8;
182+
183+
__ mov(sender_sp, sp);
184+
__ ldrw(r1, Address(r4,
185+
Deoptimization::UnrollBlock::
186+
caller_adjustment_offset())); // (int)
187+
__ sub(sp, sp, r1);
188+
189+
// Push interpreter frames in a loop
190+
Label loop;
191+
__ bind(loop);
192+
__ ldr(r1, Address(r5, 0)); // Load frame size
193+
__ sub(r1, r1, 2 * wordSize); // We'll push pc and rfp by hand
194+
__ ldr(lr, Address(r2, 0)); // Save return address
195+
__ enter(); // and old rfp & set new rfp
196+
__ sub(sp, sp, r1); // Prolog
197+
__ str(sender_sp, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
198+
// This value is corrected by layout_activation_impl
199+
__ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
200+
__ mov(sender_sp, sp); // Pass sender_sp to next frame
201+
__ add(r5, r5, wordSize); // Bump array pointer (sizes)
202+
__ add(r2, r2, wordSize); // Bump array pointer (pcs)
203+
__ subsw(r3, r3, 1); // Decrement counter
204+
__ br(Assembler::GT, loop);
205+
__ ldr(lr, Address(r2, 0)); // save final return address
206+
// Re-push self-frame
207+
__ enter(); // & old rfp & set new rfp
208+
209+
// Use rfp because the frames look interpreted now
210+
// Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
211+
// Don't need the precise return PC here, just precise enough to point into this code blob.
212+
address the_pc = __ pc();
213+
__ set_last_Java_frame(sp, rfp, the_pc, rscratch1);
214+
215+
// Call C code. Need thread but NOT official VM entry
216+
// crud. We cannot block on this call, no GC can happen. Call should
217+
// restore return values to their stack-slots with the new SP.
218+
// Thread is in rdi already.
219+
//
220+
// BasicType unpack_frames(JavaThread* thread, int exec_mode);
221+
//
222+
// n.b. 2 gp args, 0 fp args, integral return type
223+
224+
// sp should already be aligned
225+
__ mov(c_rarg0, rthread);
226+
__ movw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
227+
__ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
228+
__ blr(rscratch1);
229+
230+
// Set an oopmap for the call site
231+
// Use the same PC we used for the last java frame
232+
oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
233+
234+
// Clear fp AND pc
235+
__ reset_last_Java_frame(true);
236+
237+
// Pop self-frame.
238+
__ leave(); // Epilog
239+
240+
// Jump to interpreter
241+
__ ret(lr);
242+
243+
// Make sure all code is generated
244+
masm->flush();
245+
246+
_uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps,
247+
SimpleRuntimeFrame::framesize >> 1);
248+
}
249+
250+
//------------------------------generate_exception_blob---------------------------
251+
// creates exception blob at the end
252+
// Using exception blob, this code is jumped from a compiled method.
253+
// (see emit_exception_handler in aarch64.ad file)
254+
//
255+
// Given an exception pc at a call we call into the runtime for the
256+
// handler in this method. This handler might merely restore state
257+
// (i.e. callee save registers) unwind the frame and jump to the
258+
// exception handler for the nmethod if there is no Java level handler
259+
// for the nmethod.
260+
//
261+
// This code is entered with a jmp.
262+
//
263+
// Arguments:
264+
// r0: exception oop
265+
// r3: exception pc
266+
//
267+
// Results:
268+
// r0: exception oop
269+
// r3: exception pc in caller or ???
270+
// destination: exception handler of caller
271+
//
272+
// Note: the exception pc MUST be at a call (precise debug information)
273+
// Registers r0, r3, r2, r4, r5, r8-r11 are not callee saved.
274+
//
275+
276+
void OptoRuntime::generate_exception_blob() {
277+
assert(!OptoRuntime::is_callee_saved_register(R3_num), "");
278+
assert(!OptoRuntime::is_callee_saved_register(R0_num), "");
279+
assert(!OptoRuntime::is_callee_saved_register(R2_num), "");
280+
281+
assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
282+
283+
// Allocate space for the code
284+
ResourceMark rm;
285+
// Setup code generation tools
286+
CodeBuffer buffer("exception_blob", 2048, 1024);
287+
MacroAssembler* masm = new MacroAssembler(&buffer);
288+
289+
// TODO check various assumptions made here
290+
//
291+
// make sure we do so before running this
292+
293+
address start = __ pc();
294+
295+
// push rfp and retaddr by hand
296+
// Exception pc is 'return address' for stack walker
297+
__ protect_return_address();
298+
__ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize)));
299+
// there are no callee save registers and we don't expect an
300+
// arg reg save area
301+
#ifndef PRODUCT
302+
assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
303+
#endif
304+
// Store exception in Thread object. We cannot pass any arguments to the
305+
// handle_exception call, since we do not want to make any assumption
306+
// about the size of the frame where the exception happened in.
307+
__ str(r0, Address(rthread, JavaThread::exception_oop_offset()));
308+
__ str(r3, Address(rthread, JavaThread::exception_pc_offset()));
309+
310+
// This call does all the hard work. It checks if an exception handler
311+
// exists in the method.
312+
// If so, it returns the handler address.
313+
// If not, it prepares for stack-unwinding, restoring the callee-save
314+
// registers of the frame being removed.
315+
//
316+
// address OptoRuntime::handle_exception_C(JavaThread* thread)
317+
//
318+
// n.b. 1 gp arg, 0 fp args, integral return type
319+
320+
// the stack should always be aligned
321+
address the_pc = __ pc();
322+
__ set_last_Java_frame(sp, noreg, the_pc, rscratch1);
323+
__ mov(c_rarg0, rthread);
324+
__ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)));
325+
__ blr(rscratch1);
326+
// handle_exception_C is a special VM call which does not require an explicit
327+
// instruction sync afterwards.
328+
329+
// May jump to SVE compiled code
330+
__ reinitialize_ptrue();
331+
332+
// Set an oopmap for the call site. This oopmap will only be used if we
333+
// are unwinding the stack. Hence, all locations will be dead.
334+
// Callee-saved registers will be the same as the frame above (i.e.,
335+
// handle_exception_stub), since they were restored when we got the
336+
// exception.
337+
338+
OopMapSet* oop_maps = new OopMapSet();
339+
340+
oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
341+
342+
__ reset_last_Java_frame(false);
343+
344+
// Restore callee-saved registers
345+
346+
// rfp is an implicitly saved callee saved register (i.e. the calling
347+
// convention will save restore it in prolog/epilog) Other than that
348+
// there are no callee save registers now that adapter frames are gone.
349+
// and we dont' expect an arg reg save area
350+
__ ldp(rfp, r3, Address(__ post(sp, 2 * wordSize)));
351+
__ authenticate_return_address(r3);
352+
353+
// r0: exception handler
354+
355+
// We have a handler in r0 (could be deopt blob).
356+
__ mov(r8, r0);
357+
358+
// Get the exception oop
359+
__ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
360+
// Get the exception pc in case we are deoptimized
361+
__ ldr(r4, Address(rthread, JavaThread::exception_pc_offset()));
362+
#ifdef ASSERT
363+
__ str(zr, Address(rthread, JavaThread::exception_handler_pc_offset()));
364+
__ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
39365
#endif
366+
// Clear the exception oop so GC no longer processes it as a root.
367+
__ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
368+
369+
// r0: exception oop
370+
// r8: exception handler
371+
// r4: exception pc
372+
// Jump to handler
373+
374+
__ br(r8);
375+
376+
// Make sure all code is generated
377+
masm->flush();
378+
379+
// Set exception blob
380+
_exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
381+
}
382+
#endif // COMPILER2
40383

41384

0 commit comments

Comments
 (0)