Skip to content

Commit

Permalink
JFR intrinsics for Loom
Browse files Browse the repository at this point in the history
  • Loading branch information
mgronlun committed Feb 27, 2020
1 parent 438f020 commit d2f8c47
Show file tree
Hide file tree
Showing 52 changed files with 788 additions and 444 deletions.
81 changes: 74 additions & 7 deletions src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -50,6 +50,9 @@
#if INCLUDE_ZGC
#include "gc/z/zThreadLocalData.hpp"
#endif
#if INCLUDE_JFR
#include "jfr/support/jfrIntrinsics.hpp"
#endif

// Declaration and definition of StubGenerator (no .hpp file).
// For a more detailed description of the stub routine structure
Expand Down Expand Up @@ -1682,7 +1685,7 @@ class StubGenerator: public StubCodeGenerator {
// i.e. orig to == Address(end_to, qword_count, Address::times_8, 8)

// Copy in multi-bytes chunks

if (UseUnalignedLoadStores) {
if (align) { // align target
NearLabel L_aligned_128, L_aligned_256, L_aligned_512;
Expand Down Expand Up @@ -1925,7 +1928,7 @@ class StubGenerator: public StubCodeGenerator {
__ align(OptoLoopAlignment);
__ BIND(L_loop_avx512);
if (prefetchnt) {
__ prefetchnta(Address(from, qword_count, Address::times_8, -prefetch_distance));
__ prefetchnta(Address(from, qword_count, Address::times_8, -prefetch_distance));
}
__ evmovdqa(xmm0, Address(from, qword_count, Address::times_8, 0), Assembler::AVX_512bit, nt);
__ evmovdqul(Address(to, qword_count, Address::times_8, 0), xmm0, Assembler::AVX_512bit);
Expand All @@ -1936,7 +1939,7 @@ class StubGenerator: public StubCodeGenerator {

__ bind(L_loop_avx2);
if (prefetchnt) {
__ prefetchnta(Address(from, qword_count, Address::times_8, -prefetch_distance));
__ prefetchnta(Address(from, qword_count, Address::times_8, -prefetch_distance));
}
__ vmovdqa(xmm0, Address(from, qword_count, Address::times_8, 32), nt);
__ vmovdqu(Address(to, qword_count, Address::times_8, 32), xmm0);
Expand All @@ -1954,7 +1957,7 @@ class StubGenerator: public StubCodeGenerator {
__ align(OptoLoopAlignment);
__ BIND(L_loop);
if (prefetchnt) {
__ prefetchnta(Address(from, qword_count, Address::times_8, -prefetch_distance));
__ prefetchnta(Address(from, qword_count, Address::times_8, -prefetch_distance));
}
if (UseAVX == 2) {
__ vmovdqa(xmm0, Address(from, qword_count, Address::times_8, 32), nt);
Expand Down Expand Up @@ -1997,7 +2000,7 @@ class StubGenerator: public StubCodeGenerator {
__ align(OptoLoopAlignment);
__ BIND(L_loop);
if (prefetchnt) {
__ prefetchnta(Address(from, qword_count, Address::times_8, -prefetch_distance));
__ prefetchnta(Address(from, qword_count, Address::times_8, -prefetch_distance));
}
__ movq(rax, Address(from, qword_count, Address::times_8, 24));
__ movq(Address(to, qword_count, Address::times_8, 24), rax);
Expand Down Expand Up @@ -2026,7 +2029,7 @@ class StubGenerator: public StubCodeGenerator {
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
if (nt) {
__ prefetchnta(Address(from, qword_count, Address::times_8, -8));
__ prefetchnta(Address(from, qword_count, Address::times_8, -8));
}
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
Expand Down Expand Up @@ -6978,6 +6981,67 @@ RuntimeStub* generate_cont_doYield() {
return start;
}

#if INCLUDE_JFR

static void jfr_set_last_java_frame(MacroAssembler* _masm, Register thread) {
Register last_java_pc = c_rarg0;
Register last_java_sp = c_rarg2;
__ movptr(last_java_pc, Address(rsp, 0));
__ lea(last_java_sp, Address(rsp, wordSize));
__ vzeroupper();
Address anchor_java_pc(thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
__ movptr(anchor_java_pc, last_java_pc);
__ movptr(Address(thread, JavaThread::last_Java_sp_offset()), last_java_sp);
}

static void jfr_prologue(MacroAssembler* _masm, Register thread) {
jfr_set_last_java_frame(_masm, thread);
NOT_LP64(__ push(thread));
LP64_ONLY(__ movptr(c_rarg0, thread));
}

// Handle is dereference here using correct load constructs.
static void jfr_epilogue(MacroAssembler* _masm, Register thread) {
NOT_LP64(__ pop(rdi));
__ reset_last_Java_frame(false);
Label null_jobject;
__ testq(rax, rax);
__ jcc(Assembler::zero, null_jobject);
DecoratorSet decorators = ACCESS_READ | IN_NATIVE;
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->load_at(_masm, decorators, T_OBJECT, rax, Address(rax, 0), c_rarg1, thread);
__ bind(null_jobject);
}

// For c2: c_rarg0 is junk, c_rarg1 is the thread id. Call to runtime to write a checkpoint.
// Runtime will return a jobject handle to the event writer. The handle is dereferenced and the return value
// is the event writer oop.
address generate_jfr_write_checkpoint() {
StubCodeMark mark(this, "jfr_write_checkpoint", "JFR C2 support for Virtual Threads");
address start = __ pc();
Register thread = get_thread();
jfr_prologue(_masm, thread);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, JFR_WRITE_CHECKPOINT_FUNCTION), 2);
jfr_epilogue(_masm, thread);
__ ret(0);
return start;
}

// For c1: call the corresponding runtime routine, it returns a jobject handle to the event writer.
// The handle is dereferenced and the return value is the event writer oop.
address generate_jfr_get_event_writer() {
StubCodeMark mark(this, "jfr_get_event_writer", "JFR C1 support for Virtual Threads");
address start = __ pc();
Register thread = get_thread();
jfr_prologue(_masm, thread);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, JFR_GET_EVENT_WRITER_FUNCTION), 1);
jfr_epilogue(_masm, thread);
__ ret(0);
return start;
}

#endif // INCLUDE_JFR

#undef __
#define __ masm->

Expand Down Expand Up @@ -7228,6 +7292,9 @@ RuntimeStub* generate_cont_doYield() {
StubRoutines::_cont_jump = generate_cont_jump();
StubRoutines::_cont_getSP = generate_cont_getSP();
StubRoutines::_cont_getPC = generate_cont_getPC();

JFR_ONLY(StubRoutines::_jfr_write_checkpoint = generate_jfr_write_checkpoint();)
JFR_ONLY(StubRoutines::_jfr_get_event_writer = generate_jfr_get_event_writer();)
}

void generate_all() {
Expand Down
5 changes: 1 addition & 4 deletions src/hotspot/share/c1/c1_Compiler.cpp
Expand Up @@ -229,10 +229,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
case vmIntrinsics::_Continuation_runLevel:
#ifdef JFR_HAVE_INTRINSICS
case vmIntrinsics::_counterTime:
//case vmIntrinsics::_getEventWriter:
#if defined(_LP64) || !defined(TRACE_ID_SHIFT)
case vmIntrinsics::_getClassId:
#endif
case vmIntrinsics::_getEventWriter:
#endif
break;
default:
Expand Down
60 changes: 10 additions & 50 deletions src/hotspot/share/c1/c1_LIRGenerator.cpp
Expand Up @@ -2952,53 +2952,18 @@ void LIRGenerator::do_IfOp(IfOp* x) {
}

#ifdef JFR_HAVE_INTRINSICS
void LIRGenerator::do_ClassIDIntrinsic(Intrinsic* x) {
CodeEmitInfo* info = state_for(x);
CodeEmitInfo* info2 = new CodeEmitInfo(info); // Clone for the second null check

assert(info != NULL, "must have info");
LIRItem arg(x->argument_at(0), this);

arg.load_item();
LIR_Opr klass = new_register(T_METADATA);
__ move(new LIR_Address(arg.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), klass, info);
LIR_Opr id = new_register(T_LONG);
ByteSize offset = KLASS_TRACE_ID_OFFSET;
LIR_Address* trace_id_addr = new LIR_Address(klass, in_bytes(offset), T_LONG);

__ move(trace_id_addr, id);
__ logical_or(id, LIR_OprFact::longConst(0x01l), id);
__ store(id, trace_id_addr);

#ifdef TRACE_ID_META_BITS
__ logical_and(id, LIR_OprFact::longConst(~TRACE_ID_META_BITS), id);
#endif
#ifdef TRACE_ID_SHIFT
__ unsigned_shift_right(id, TRACE_ID_SHIFT, id);
#endif

__ move(id, rlock_result(x));
}

/*
void LIRGenerator::do_getEventWriter(Intrinsic* x) {
LabelObj* L_end = new LabelObj();
LIR_Address* jobj_addr = new LIR_Address(getThreadPointer(),
in_bytes(THREAD_LOCAL_WRITER_OFFSET_JFR),
T_OBJECT);
LabelObj* L_NULL = new LabelObj();
BasicTypeList signature(0);
CallingConvention* cc = frame_map()->c_calling_convention(&signature);
LIR_Opr reg = result_register_for(x->type());
address entry = StubRoutines::jfr_get_event_writer();
CodeEmitInfo* info = state_for(x, x->state());
__ call_runtime(entry, getThreadTemp(), reg, cc->args(), info);
LIR_Opr result = rlock_result(x);
__ move_wide(jobj_addr, result);
__ cmp(lir_cond_equal, result, LIR_OprFact::oopConst(NULL));
__ branch(lir_cond_equal, T_OBJECT, L_end->label());
LIR_Opr jobj = new_register(T_OBJECT);
__ move(result, jobj);
access_load(IN_NATIVE, T_OBJECT, LIR_OprFact::address(new LIR_Address(jobj, T_OBJECT)), result);
__ branch_destination(L_end->label());
__ move(reg, result);
}
*/

#endif

Expand Down Expand Up @@ -3028,17 +2993,12 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
}

#ifdef JFR_HAVE_INTRINSICS
case vmIntrinsics::_getClassId:
do_ClassIDIntrinsic(x);
case vmIntrinsics::_counterTime:
do_RuntimeCall(CAST_FROM_FN_PTR(address, JFR_TIME_FUNCTION), x);
break;
/*
case vmIntrinsics::_getEventWriter:
do_getEventWriter(x);
break;
*/
case vmIntrinsics::_counterTime:
do_RuntimeCall(CAST_FROM_FN_PTR(address, JFR_TIME_FUNCTION), x);
break;
#endif

case vmIntrinsics::_currentTimeMillis:
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/c1/c1_LIRGenerator.hpp
Expand Up @@ -479,7 +479,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {

#ifdef JFR_HAVE_INTRINSICS
void do_ClassIDIntrinsic(Intrinsic* x);
// void do_getEventWriter(Intrinsic* x);
void do_getEventWriter(Intrinsic* x);
#endif

void do_RuntimeCall(address routine, Intrinsic* x);
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/share/c1/c1_Runtime1.cpp
Expand Up @@ -333,6 +333,7 @@ const char* Runtime1::name_for_address(address entry) {
FUNCTION_CASE(entry, trace_block_entry);
#ifdef JFR_HAVE_INTRINSICS
FUNCTION_CASE(entry, JFR_TIME_FUNCTION);
FUNCTION_CASE(entry, StubRoutines::jfr_get_event_writer());
#endif
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32C());
Expand Down
13 changes: 8 additions & 5 deletions src/hotspot/share/classfile/javaClasses.cpp
Expand Up @@ -1900,9 +1900,15 @@ java_lang_Thread::ThreadStatus java_lang_Thread::get_thread_status(oop java_thre
return java_lang_Thread_FieldHolder::get_thread_status(holder);
}


jlong java_lang_Thread::thread_id(oop java_thread) {
return java_thread->long_field(_tid_offset);
// The 16 most significant bits can be used for tracing
// so these bits are excluded using a mask.
static const jlong tid_mask = (((jlong)1) << 48) - 1;
return java_thread->long_field(_tid_offset) & tid_mask;
}

ByteSize java_lang_Thread::thread_id_offset() {
return in_ByteSize(_tid_offset);
}

oop java_lang_Thread::continuation(oop java_thread) {
Expand Down Expand Up @@ -2032,7 +2038,6 @@ int java_lang_VirtualThread::static_notify_jvmti_events_offset = 0;
int java_lang_VirtualThread::_carrierThread_offset = 0;
int java_lang_VirtualThread::_continuation_offset = 0;
int java_lang_VirtualThread::_state_offset = 0;
int java_lang_VirtualThread::_jfrTraceId_offset = 0;

#define VTHREAD_FIELDS_DO(macro) \
macro(static_notify_jvmti_events_offset, k, "notifyJvmtiEvents", bool_signature, true); \
Expand All @@ -2045,7 +2050,6 @@ static jboolean vthread_notify_jvmti_events = JNI_FALSE;
void java_lang_VirtualThread::compute_offsets() {
InstanceKlass* k = SystemDictionary::VirtualThread_klass();
VTHREAD_FIELDS_DO(FIELD_COMPUTE_OFFSET);
VTHREAD_INJECTED_FIELDS(INJECTED_FIELD_COMPUTE_OFFSET);
}

void java_lang_VirtualThread::init_static_notify_jvmti_events() {
Expand Down Expand Up @@ -2103,7 +2107,6 @@ java_lang_Thread::ThreadStatus java_lang_VirtualThread::map_state_to_thread_stat
#if INCLUDE_CDS
void java_lang_VirtualThread::serialize_offsets(SerializeClosure* f) {
VTHREAD_FIELDS_DO(FIELD_SERIALIZE_OFFSET);
VTHREAD_INJECTED_FIELDS(INJECTED_FIELD_SERIALIZE_OFFSET);
}
#endif

Expand Down
12 changes: 4 additions & 8 deletions src/hotspot/share/classfile/javaClasses.hpp
Expand Up @@ -376,6 +376,7 @@ class java_lang_Class : AllStatic {
// Interface to java.lang.Thread objects

class java_lang_Thread : AllStatic {
friend class java_lang_VirtualThread;
private:
// Note that for this class the layout changed between JDK1.2 and JDK1.3,
// so we compute the offsets at startup rather than hard-wiring them.
Expand Down Expand Up @@ -431,6 +432,7 @@ class java_lang_Thread : AllStatic {
static jlong stackSize(oop java_thread);
// Thread ID
static jlong thread_id(oop java_thread);
static ByteSize thread_id_offset();
// Continuation
static oop continuation(oop java_thread);
static void set_continuation(oop java_thread, oop continuation);
Expand Down Expand Up @@ -576,17 +578,12 @@ class java_lang_ThreadGroup : AllStatic {

// Interface to java.lang.VirtualThread objects

#define VTHREAD_INJECTED_FIELDS(macro) \
macro(java_lang_VirtualThread, jfrTraceId, long_signature, false)

class java_lang_VirtualThread : AllStatic {
private:
static int static_notify_jvmti_events_offset;
static int _carrierThread_offset;
static int _continuation_offset;
static int _state_offset;
// keep in sync with java.lang.VirtualThread
static int _jfrTraceId_offset;

public:
enum {
Expand All @@ -600,6 +597,7 @@ class java_lang_VirtualThread : AllStatic {
WALKINGSTACK = 51,
TERMINATED = 99,
};

static void compute_offsets();
static void serialize_offsets(SerializeClosure* f) NOT_CDS_RETURN;

Expand All @@ -615,7 +613,6 @@ class java_lang_VirtualThread : AllStatic {
static java_lang_Thread::ThreadStatus map_state_to_thread_status(jshort state);
static void set_notify_jvmti_events(jboolean enable);
static void init_static_notify_jvmti_events();
static jlong jfrTraceId(oop vthread);
static jlong set_jfrTraceId(oop vthread, jlong id);
};

Expand Down Expand Up @@ -1944,8 +1941,7 @@ class InjectedField {
MEMBERNAME_INJECTED_FIELDS(macro) \
CALLSITECONTEXT_INJECTED_FIELDS(macro) \
STACKFRAMEINFO_INJECTED_FIELDS(macro) \
MODULE_INJECTED_FIELDS(macro) \
VTHREAD_INJECTED_FIELDS(macro)
MODULE_INJECTED_FIELDS(macro)

// Interface to hard-coded offset checking

Expand Down
6 changes: 1 addition & 5 deletions src/hotspot/share/classfile/javaClasses.inline.hpp
Expand Up @@ -292,12 +292,8 @@ inline bool java_lang_Continuation::done(oop ref) {
return ref->bool_field(_done_offset);
}

inline jlong java_lang_VirtualThread::jfrTraceId(oop ref) {
return ref->long_field(_jfrTraceId_offset);
}

inline jlong java_lang_VirtualThread::set_jfrTraceId(oop ref, jlong id) {
ref->long_field_put(_jfrTraceId_offset, id);
ref->long_field_put(java_lang_Thread::_tid_offset, id);
return id;
}

Expand Down
1 change: 0 additions & 1 deletion src/hotspot/share/classfile/vmSymbols.cpp
Expand Up @@ -390,7 +390,6 @@ bool vmIntrinsics::can_trap(vmIntrinsics::ID id) {
switch(id) {
#ifdef JFR_HAVE_INTRINSICS
case vmIntrinsics::_counterTime:
case vmIntrinsics::_getClassId:
#endif
case vmIntrinsics::_currentTimeMillis:
case vmIntrinsics::_nanoTime:
Expand Down

0 comments on commit d2f8c47

Please sign in to comment.