Skip to content

Commit 78b8015

Browse files
pchilanoAlan BatemanAndrew HaleyRealFYangcoleenp
committed
8338383: Implement JEP 491: Synchronize Virtual Threads without Pinning
Co-authored-by: Patricio Chilano Mateo <pchilanomate@openjdk.org> Co-authored-by: Alan Bateman <alanb@openjdk.org> Co-authored-by: Andrew Haley <aph@openjdk.org> Co-authored-by: Fei Yang <fyang@openjdk.org> Co-authored-by: Coleen Phillimore <coleenp@openjdk.org> Co-authored-by: Richard Reingruber <rrich@openjdk.org> Co-authored-by: Martin Doerr <mdoerr@openjdk.org> Reviewed-by: aboldtch, dholmes, coleenp, fbredberg, dlong, sspitsyn
1 parent 8a2a75e commit 78b8015

File tree

246 files changed

+8283
-2743
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

246 files changed

+8283
-2743
lines changed

src/hotspot/cpu/aarch64/aarch64.ad

+3-4
Original file line numberDiff line numberDiff line change
@@ -1648,8 +1648,8 @@ int MachCallRuntimeNode::ret_addr_offset() {
16481648
// for real runtime callouts it will be six instructions
16491649
// see aarch64_enc_java_to_runtime
16501650
// adr(rscratch2, retaddr)
1651+
// str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
16511652
// lea(rscratch1, RuntimeAddress(addr)
1652-
// stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
16531653
// blr(rscratch1)
16541654
CodeBlob *cb = CodeCache::find_blob(_entry_point);
16551655
if (cb) {
@@ -3696,14 +3696,13 @@ encode %{
36963696
__ post_call_nop();
36973697
} else {
36983698
Label retaddr;
3699+
// Make the anchor frame walkable
36993700
__ adr(rscratch2, retaddr);
3701+
__ str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
37003702
__ lea(rscratch1, RuntimeAddress(entry));
3701-
// Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3702-
__ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
37033703
__ blr(rscratch1);
37043704
__ bind(retaddr);
37053705
__ post_call_nop();
3706-
__ add(sp, sp, 2 * wordSize);
37073706
}
37083707
if (Compile::current()->max_vector_size() > 0) {
37093708
__ reinitialize_ptrue();

src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,8 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
119119
cbnz(hdr, slow_case);
120120
// done
121121
bind(done);
122+
inc_held_monitor_count(rscratch1);
122123
}
123-
increment(Address(rthread, JavaThread::held_monitor_count_offset()));
124124
return null_check_offset;
125125
}
126126

@@ -159,8 +159,8 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_
159159
}
160160
// done
161161
bind(done);
162+
dec_held_monitor_count(rscratch1);
162163
}
163-
decrement(Address(rthread, JavaThread::held_monitor_count_offset()));
164164
}
165165

166166

src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp

+27-10
Original file line numberDiff line numberDiff line change
@@ -160,16 +160,15 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre
160160
}
161161

162162
enum return_state_t {
163-
does_not_return, requires_return
163+
does_not_return, requires_return, requires_pop_epilogue_return
164164
};
165165

166-
167166
// Implementation of StubFrame
168167

169168
class StubFrame: public StackObj {
170169
private:
171170
StubAssembler* _sasm;
172-
bool _return_state;
171+
return_state_t _return_state;
173172

174173
public:
175174
StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return);
@@ -183,8 +182,17 @@ void StubAssembler::prologue(const char* name, bool must_gc_arguments) {
183182
enter();
184183
}
185184

186-
void StubAssembler::epilogue() {
187-
leave();
185+
void StubAssembler::epilogue(bool use_pop) {
186+
// Avoid using a leave instruction when this frame may
187+
// have been frozen, since the current value of rfp
188+
// restored from the stub would be invalid. We still
189+
// must restore the rfp value saved on enter though.
190+
if (use_pop) {
191+
ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
192+
authenticate_return_address();
193+
} else {
194+
leave();
195+
}
188196
ret(lr);
189197
}
190198

@@ -203,10 +211,10 @@ void StubFrame::load_argument(int offset_in_words, Register reg) {
203211
}
204212

205213
StubFrame::~StubFrame() {
206-
if (_return_state == requires_return) {
207-
__ epilogue();
208-
} else {
214+
if (_return_state == does_not_return) {
209215
__ should_not_reach_here();
216+
} else {
217+
__ epilogue(_return_state == requires_pop_epilogue_return);
210218
}
211219
}
212220

@@ -252,7 +260,7 @@ static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
252260

253261
for (int i = 0; i < FrameMap::nof_cpu_regs; i++) {
254262
Register r = as_Register(i);
255-
if (i <= 18 && i != rscratch1->encoding() && i != rscratch2->encoding()) {
263+
if (r == rthread || (i <= 18 && i != rscratch1->encoding() && i != rscratch2->encoding())) {
256264
int sp_offset = cpu_reg_save_offsets[i];
257265
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
258266
r->as_VMReg());
@@ -337,6 +345,15 @@ void Runtime1::initialize_pd() {
337345
}
338346
}
339347

348+
// return: offset in 64-bit words.
349+
uint Runtime1::runtime_blob_current_thread_offset(frame f) {
350+
CodeBlob* cb = f.cb();
351+
assert(cb == Runtime1::blob_for(C1StubId::monitorenter_id) ||
352+
cb == Runtime1::blob_for(C1StubId::monitorenter_nofpu_id), "must be");
353+
assert(cb != nullptr && cb->is_runtime_stub(), "invalid frame");
354+
int offset = cpu_reg_save_offsets[rthread->encoding()];
355+
return offset / 2; // SP offsets are in halfwords
356+
}
340357

341358
// target: the entry point of the method that creates and posts the exception oop
342359
// has_argument: true if the exception needs arguments (passed in rscratch1 and rscratch2)
@@ -868,7 +885,7 @@ OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) {
868885
// fall through
869886
case C1StubId::monitorenter_id:
870887
{
871-
StubFrame f(sasm, "monitorenter", dont_gc_arguments);
888+
StubFrame f(sasm, "monitorenter", dont_gc_arguments, requires_pop_epilogue_return);
872889
OopMap* map = save_live_registers(sasm, save_fpu_registers);
873890

874891
// Called with store_parameter and not C abi

src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp

+16-15
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register
153153
Label count, no_count;
154154

155155
assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight");
156-
assert_different_registers(oop, box, tmp, disp_hdr);
156+
assert_different_registers(oop, box, tmp, disp_hdr, rscratch2);
157157

158158
// Load markWord from object into displaced_header.
159159
ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
@@ -206,12 +206,10 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register
206206
// Handle existing monitor.
207207
bind(object_has_monitor);
208208

209-
// The object's monitor m is unlocked iff m->owner == nullptr,
210-
// otherwise m->owner may contain a thread or a stack address.
211-
//
212-
// Try to CAS m->owner from null to current thread.
209+
// Try to CAS owner (no owner => current thread's _lock_id).
210+
ldr(rscratch2, Address(rthread, JavaThread::lock_id_offset()));
213211
add(tmp, disp_hdr, (in_bytes(ObjectMonitor::owner_offset())-markWord::monitor_value));
214-
cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true,
212+
cmpxchg(tmp, zr, rscratch2, Assembler::xword, /*acquire*/ true,
215213
/*release*/ true, /*weak*/ false, tmp3Reg); // Sets flags for result
216214

217215
// Store a non-null value into the box to avoid looking like a re-entrant
@@ -223,7 +221,7 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register
223221

224222
br(Assembler::EQ, cont); // CAS success means locking succeeded
225223

226-
cmp(tmp3Reg, rthread);
224+
cmp(tmp3Reg, rscratch2);
227225
br(Assembler::NE, cont); // Check for recursive locking
228226

229227
// Recursive lock case
@@ -236,7 +234,9 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register
236234
br(Assembler::NE, no_count);
237235

238236
bind(count);
239-
increment(Address(rthread, JavaThread::held_monitor_count_offset()));
237+
if (LockingMode == LM_LEGACY) {
238+
inc_held_monitor_count(rscratch1);
239+
}
240240

241241
bind(no_count);
242242
}
@@ -343,15 +343,17 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe
343343
br(Assembler::NE, no_count);
344344

345345
bind(count);
346-
decrement(Address(rthread, JavaThread::held_monitor_count_offset()));
346+
if (LockingMode == LM_LEGACY) {
347+
dec_held_monitor_count(rscratch1);
348+
}
347349

348350
bind(no_count);
349351
}
350352

351353
void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register t1,
352354
Register t2, Register t3) {
353355
assert(LockingMode == LM_LIGHTWEIGHT, "must be");
354-
assert_different_registers(obj, box, t1, t2, t3);
356+
assert_different_registers(obj, box, t1, t2, t3, rscratch2);
355357

356358
// Handle inflated monitor.
357359
Label inflated;
@@ -467,13 +469,14 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist
467469
// Compute owner address.
468470
lea(t2_owner_addr, owner_address);
469471

470-
// CAS owner (null => current thread).
471-
cmpxchg(t2_owner_addr, zr, rthread, Assembler::xword, /*acquire*/ true,
472+
// Try to CAS owner (no owner => current thread's _lock_id).
473+
ldr(rscratch2, Address(rthread, JavaThread::lock_id_offset()));
474+
cmpxchg(t2_owner_addr, zr, rscratch2, Assembler::xword, /*acquire*/ true,
472475
/*release*/ false, /*weak*/ false, t3_owner);
473476
br(Assembler::EQ, monitor_locked);
474477

475478
// Check if recursive.
476-
cmp(t3_owner, rthread);
479+
cmp(t3_owner, rscratch2);
477480
br(Assembler::NE, slow_path);
478481

479482
// Recursive.
@@ -486,7 +489,6 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist
486489
}
487490

488491
bind(locked);
489-
increment(Address(rthread, JavaThread::held_monitor_count_offset()));
490492

491493
#ifdef ASSERT
492494
// Check that locked label is reached with Flags == EQ.
@@ -655,7 +657,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Regi
655657
}
656658

657659
bind(unlocked);
658-
decrement(Address(rthread, JavaThread::held_monitor_count_offset()));
659660
cmp(zr, zr); // Set Flags to EQ => fast path
660661

661662
#ifdef ASSERT

src/hotspot/cpu/aarch64/continuationFreezeThaw_aarch64.inline.hpp

+35-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -129,6 +129,11 @@ void FreezeBase::adjust_interpreted_frame_unextended_sp(frame& f) {
129129
}
130130
}
131131

132+
inline void FreezeBase::prepare_freeze_interpreted_top_frame(frame& f) {
133+
assert(f.interpreter_frame_last_sp() == nullptr, "should be null for top frame");
134+
f.interpreter_frame_set_last_sp(f.unextended_sp());
135+
}
136+
132137
inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, const frame& hf) {
133138
assert(hf.fp() == hf.unextended_sp() + (f.fp() - f.unextended_sp()), "");
134139
assert((f.at(frame::interpreter_frame_last_sp_offset) != 0)
@@ -149,10 +154,16 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co
149154
// extended_sp is already relativized by TemplateInterpreterGenerator::generate_normal_entry or
150155
// AbstractInterpreter::layout_activation
151156

157+
// The interpreter native wrapper code adds space in the stack equal to size_of_parameters()
158+
// after the fixed part of the frame. For wait0 this is equal to 3 words (this + long parameter).
159+
// We adjust by this size since otherwise the saved last sp will be less than the extended_sp.
160+
DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
161+
DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;)
162+
152163
assert((hf.fp() - hf.unextended_sp()) == (f.fp() - f.unextended_sp()), "");
153164
assert(hf.unextended_sp() == (intptr_t*)hf.at(frame::interpreter_frame_last_sp_offset), "");
154165
assert(hf.unextended_sp() <= (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
155-
assert(hf.unextended_sp() > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
166+
assert(hf.unextended_sp() + extra_space > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
156167
assert(hf.fp() > (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
157168
assert(hf.fp() <= (intptr_t*)hf.at(frame::interpreter_frame_locals_offset), "");
158169
}
@@ -213,7 +224,6 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
213224
// If caller is interpreted it already made room for the callee arguments
214225
int overlap = caller.is_interpreted_frame() ? ContinuationHelper::InterpretedFrame::stack_argsize(hf) : 0;
215226
const int fsize = (int)(ContinuationHelper::InterpretedFrame::frame_bottom(hf) - hf.unextended_sp() - overlap);
216-
const int locals = hf.interpreter_frame_method()->max_locals();
217227
intptr_t* frame_sp = caller.unextended_sp() - fsize;
218228
intptr_t* fp = frame_sp + (hf.fp() - heap_sp);
219229
if ((intptr_t)fp % frame::frame_alignment != 0) {
@@ -235,7 +245,7 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
235245
int fsize = FKind::size(hf);
236246
intptr_t* frame_sp = caller.unextended_sp() - fsize;
237247
if (bottom || caller.is_interpreted_frame()) {
238-
int argsize = hf.compiled_frame_stack_argsize();
248+
int argsize = FKind::stack_argsize(hf);
239249

240250
fsize += argsize;
241251
frame_sp -= argsize;
@@ -252,8 +262,8 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
252262
// we need to recreate a "real" frame pointer, pointing into the stack
253263
fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset;
254264
} else {
255-
fp = FKind::stub
256-
? frame_sp + fsize - frame::sender_sp_offset // on AArch64, this value is used for the safepoint stub
265+
fp = FKind::stub || FKind::native
266+
? frame_sp + fsize - frame::sender_sp_offset // fp always points to the address below the pushed return pc. We need correct address.
257267
: *(intptr_t**)(hf.sp() - frame::sender_sp_offset); // we need to re-read fp because it may be an oop and we might have fixed the frame.
258268
}
259269
return frame(frame_sp, frame_sp, fp, hf.pc(), hf.cb(), hf.oop_map(), false); // TODO PERF : this computes deopt state; is it necessary?
@@ -277,6 +287,22 @@ inline void ThawBase::patch_pd(frame& f, const frame& caller) {
277287
patch_callee_link(caller, caller.fp());
278288
}
279289

290+
inline void ThawBase::patch_pd(frame& f, intptr_t* caller_sp) {
291+
intptr_t* fp = caller_sp - frame::sender_sp_offset;
292+
patch_callee_link(f, fp);
293+
}
294+
295+
inline intptr_t* ThawBase::push_cleanup_continuation() {
296+
frame enterSpecial = new_entry_frame();
297+
intptr_t* sp = enterSpecial.sp();
298+
299+
sp[-1] = (intptr_t)ContinuationEntry::cleanup_pc();
300+
sp[-2] = (intptr_t)enterSpecial.fp();
301+
302+
log_develop_trace(continuations, preempt)("push_cleanup_continuation initial sp: " INTPTR_FORMAT " final sp: " INTPTR_FORMAT, p2i(sp + 2 * frame::metadata_words), p2i(sp));
303+
return sp;
304+
}
305+
280306
inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, const frame& f) {
281307
// Make sure that last_sp is kept relativized.
282308
assert((intptr_t*)f.at_relative(frame::interpreter_frame_last_sp_offset) == f.unextended_sp(), "");
@@ -285,7 +311,9 @@ inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, c
285311
assert(f.at_absolute(frame::interpreter_frame_monitor_block_top_offset) <= frame::interpreter_frame_initial_sp_offset, "");
286312

287313
// Make sure that extended_sp is kept relativized.
288-
assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp(), "");
314+
DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
315+
DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;) // see comment in relativize_interpreted_frame_metadata()
316+
assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp() + extra_space, "");
289317
}
290318

291319
#endif // CPU_AARCH64_CONTINUATIONFREEZETHAW_AARCH64_INLINE_HPP

src/hotspot/cpu/aarch64/continuationHelper_aarch64.inline.hpp

+18-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -40,6 +40,22 @@ static inline intptr_t** link_address(const frame& f) {
4040
: (intptr_t**)(f.unextended_sp() + f.cb()->frame_size() - frame::sender_sp_offset);
4141
}
4242

43+
static inline void patch_return_pc_with_preempt_stub(frame& f) {
44+
if (f.is_runtime_frame()) {
45+
// Unlike x86 we don't know where in the callee frame the return pc is
46+
// saved so we can't patch the return from the VM call back to Java.
47+
// Instead, we will patch the return from the runtime stub back to the
48+
// compiled method so that the target returns to the preempt cleanup stub.
49+
intptr_t* caller_sp = f.sp() + f.cb()->frame_size();
50+
caller_sp[-1] = (intptr_t)StubRoutines::cont_preempt_stub();
51+
} else {
52+
// The target will check for preemption once it returns to the interpreter
53+
// or the native wrapper code and will manually jump to the preempt stub.
54+
JavaThread *thread = JavaThread::current();
55+
thread->set_preempt_alternate_return(StubRoutines::cont_preempt_stub());
56+
}
57+
}
58+
4359
inline int ContinuationHelper::frame_align_words(int size) {
4460
#ifdef _LP64
4561
return size & 1;
@@ -83,12 +99,12 @@ inline void ContinuationHelper::set_anchor_to_entry_pd(JavaFrameAnchor* anchor,
8399
anchor->set_last_Java_fp(entry->entry_fp());
84100
}
85101

86-
#ifdef ASSERT
87102
inline void ContinuationHelper::set_anchor_pd(JavaFrameAnchor* anchor, intptr_t* sp) {
88103
intptr_t* fp = *(intptr_t**)(sp - frame::sender_sp_offset);
89104
anchor->set_last_Java_fp(fp);
90105
}
91106

107+
#ifdef ASSERT
92108
inline bool ContinuationHelper::Frame::assert_frame_laid_out(frame f) {
93109
intptr_t* sp = f.sp();
94110
address pc = ContinuationHelper::return_address_at(

0 commit comments

Comments
 (0)