Skip to content

Commit

Permalink
8283327: Add methods to save/restore registers when calling into the …
Browse files Browse the repository at this point in the history
…VM from C1/interpreter barrier code

Reviewed-by: eosterlund, dlong
  • Loading branch information
Thomas Schatzl committed Mar 21, 2022
1 parent fd93015 commit eb4849e
Show file tree
Hide file tree
Showing 9 changed files with 417 additions and 151 deletions.
6 changes: 3 additions & 3 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Expand Up @@ -2543,7 +2543,7 @@ void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
fatal("DEBUG MESSAGE: %s", msg);
}

RegSet MacroAssembler::call_clobbered_registers() {
RegSet MacroAssembler::call_clobbered_gp_registers() {
RegSet regs = RegSet::range(r0, r17) - RegSet::of(rscratch1, rscratch2);
#ifndef R18_RESERVED
regs += r18_tls;
Expand All @@ -2553,7 +2553,7 @@ RegSet MacroAssembler::call_clobbered_registers() {

void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
int step = 4 * wordSize;
push(call_clobbered_registers() - exclude, sp);
push(call_clobbered_gp_registers() - exclude, sp);
sub(sp, sp, step);
mov(rscratch1, -step);
// Push v0-v7, v16-v31.
Expand All @@ -2575,7 +2575,7 @@ void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {

reinitialize_ptrue();

pop(call_clobbered_registers() - exclude, sp);
pop(call_clobbered_gp_registers() - exclude, sp);
}

void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve,
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
Expand Up @@ -468,7 +468,7 @@ class MacroAssembler: public Assembler {
void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }

static RegSet call_clobbered_registers();
static RegSet call_clobbered_gp_registers();

void push_p(PRegSet regs, Register stack) { if (regs.bits()) push_p(regs.bits(), stack); }
void pop_p(PRegSet regs, Register stack) { if (regs.bits()) pop_p(regs.bits(), stack); }
Expand Down
105 changes: 0 additions & 105 deletions src/hotspot/cpu/aarch64/register_aarch64.hpp
Expand Up @@ -314,115 +314,10 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
static const int max_pr;
};

template <class RegImpl = Register> class RegSetIterator;

// A set of registers
template <class RegImpl>
class AbstractRegSet {
uint32_t _bitset;

AbstractRegSet(uint32_t bitset) : _bitset(bitset) { }

public:

AbstractRegSet() : _bitset(0) { }

AbstractRegSet(RegImpl r1) : _bitset(1 << r1->encoding()) { }

AbstractRegSet operator+(const AbstractRegSet aSet) const {
AbstractRegSet result(_bitset | aSet._bitset);
return result;
}

AbstractRegSet operator-(const AbstractRegSet aSet) const {
AbstractRegSet result(_bitset & ~aSet._bitset);
return result;
}

AbstractRegSet &operator+=(const AbstractRegSet aSet) {
*this = *this + aSet;
return *this;
}

AbstractRegSet &operator-=(const AbstractRegSet aSet) {
*this = *this - aSet;
return *this;
}

static AbstractRegSet of(RegImpl r1) {
return AbstractRegSet(r1);
}

static AbstractRegSet of(RegImpl r1, RegImpl r2) {
return of(r1) + r2;
}

static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3) {
return of(r1, r2) + r3;
}

static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4) {
return of(r1, r2, r3) + r4;
}

static AbstractRegSet range(RegImpl start, RegImpl end) {
uint32_t bits = ~0;
bits <<= start->encoding();
bits <<= 31 - end->encoding();
bits >>= 31 - end->encoding();

return AbstractRegSet(bits);
}

uint32_t bits() const { return _bitset; }

private:

RegImpl first();

public:

friend class RegSetIterator<RegImpl>;

RegSetIterator<RegImpl> begin();
};

typedef AbstractRegSet<Register> RegSet;
typedef AbstractRegSet<FloatRegister> FloatRegSet;
typedef AbstractRegSet<PRegister> PRegSet;

template <class RegImpl>
class RegSetIterator {
AbstractRegSet<RegImpl> _regs;

public:
RegSetIterator(AbstractRegSet<RegImpl> x): _regs(x) {}
RegSetIterator(const RegSetIterator& mit) : _regs(mit._regs) {}

RegSetIterator& operator++() {
RegImpl r = _regs.first();
if (r->is_valid())
_regs -= r;
return *this;
}

bool operator==(const RegSetIterator& rhs) const {
return _regs.bits() == rhs._regs.bits();
}
bool operator!=(const RegSetIterator& rhs) const {
return ! (rhs == *this);
}

RegImpl operator*() {
return _regs.first();
}
};

template <class RegImpl>
inline RegSetIterator<RegImpl> AbstractRegSet<RegImpl>::begin() {
return RegSetIterator<RegImpl>(*this);
}

template <>
inline Register AbstractRegSet<Register>::first() {
uint32_t first = _bitset & -_bitset;
Expand Down
7 changes: 4 additions & 3 deletions src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
Expand Up @@ -26,6 +26,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "asm/register.hpp"
#include "atomic_aarch64.hpp"
#include "compiler/oopMap.hpp"
#include "gc/shared/barrierSet.hpp"
Expand Down Expand Up @@ -1320,10 +1321,10 @@ class StubGenerator: public StubCodeGenerator {
void clobber_registers() {
#ifdef ASSERT
RegSet clobbered
= MacroAssembler::call_clobbered_registers() - rscratch1;
= MacroAssembler::call_clobbered_gp_registers() - rscratch1;
__ mov(rscratch1, (uint64_t)0xdeadbeef);
__ orr(rscratch1, rscratch1, rscratch1, Assembler::LSL, 32);
for (RegSetIterator<> it = clobbered.begin(); *it != noreg; ++it) {
for (RegSetIterator<Register> it = clobbered.begin(); *it != noreg; ++it) {
__ mov(*it, rscratch1);
}
#endif
Expand Down Expand Up @@ -6629,7 +6630,7 @@ class StubGenerator: public StubCodeGenerator {

// Register allocation

RegSetIterator<> regs = (RegSet::range(r0, r26) - r18_tls).begin();
RegSetIterator<Register> regs = (RegSet::range(r0, r26) - r18_tls).begin();
Pa_base = *regs; // Argument registers
if (squaring)
Pb_base = Pa_base;
Expand Down
58 changes: 19 additions & 39 deletions src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
Expand Up @@ -67,7 +67,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm

__ jcc(Assembler::equal, filtered);

__ pusha(); // push registers
__ push_call_clobbered_registers(false /* save_fpu */);
#ifdef _LP64
if (count == c_rarg0) {
if (addr == c_rarg1) {
Expand All @@ -90,15 +90,15 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry),
addr, count);
#endif
__ popa();
__ pop_call_clobbered_registers(false /* save_fpu */);

__ bind(filtered);
}
}

void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count, Register tmp) {
__ pusha(); // push registers (overkill)
__ push_call_clobbered_registers(false /* save_fpu */);
#ifdef _LP64
if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
assert_different_registers(c_rarg1, addr);
Expand All @@ -114,7 +114,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry),
addr, count);
#endif
__ popa();
__ pop_call_clobbered_registers(false /* save_fpu */);
}

void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Expand Down Expand Up @@ -204,14 +204,15 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
__ jmp(done);

__ bind(runtime);
// save the live input values
if(tosca_live) __ push(rax);

if (obj != noreg && obj != rax)
__ push(obj);
// Determine and save the live input values
RegSet saved;
if (tosca_live) saved += RegSet::of(rax);
if (obj != noreg && obj != rax) saved += RegSet::of(obj);
if (pre_val != rax) saved += RegSet::of(pre_val);
NOT_LP64( saved += RegSet::of(thread); )

if (pre_val != rax)
__ push(pre_val);
__ push_set(saved);

// Calling the runtime using the regular call_VM_leaf mechanism generates
// code (generated by InterpreterMacroAssember::call_VM_leaf_base)
Expand All @@ -225,8 +226,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
// So when we do not have have a full interpreter frame on the stack
// expand_call should be passed true.

NOT_LP64( __ push(thread); )

if (expand_call) {
LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
#ifdef _LP64
Expand All @@ -244,17 +243,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
} else {
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
}

NOT_LP64( __ pop(thread); )

// save the live input values
if (pre_val != rax)
__ pop(pre_val);

if (obj != noreg && obj != rax)
__ pop(obj);

if(tosca_live) __ pop(rax);
__ pop_set(saved);

__ bind(done);
}
Expand Down Expand Up @@ -328,15 +317,10 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,

__ bind(runtime);
// save the live input values
__ push(store_addr);
#ifdef _LP64
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, r15_thread);
#else
__ push(thread);
RegSet saved = RegSet::of(store_addr NOT_LP64(COMMA thread));
__ push_set(saved);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
__ pop(thread);
#endif
__ pop(store_addr);
__ pop_set(saved);

__ bind(done);
}
Expand Down Expand Up @@ -495,13 +479,13 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*

__ bind(runtime);

__ save_live_registers_no_oop_map(true);
__ push_call_clobbered_registers();

// load the pre-value
__ load_parameter(0, rcx);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), rcx, thread);

__ restore_live_registers(true);
__ pop_call_clobbered_registers();

__ bind(done);

Expand All @@ -514,9 +498,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
__ prologue("g1_post_barrier", false);

// arg0: store_address
Address store_addr(rbp, 2*BytesPerWord);

CardTableBarrierSet* ct =
barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());

Expand Down Expand Up @@ -572,12 +553,11 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
__ jmp(enqueued);

__ bind(runtime);

__ save_live_registers_no_oop_map(true);
__ push_call_clobbered_registers();

__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);

__ restore_live_registers(true);
__ pop_call_clobbered_registers();

__ bind(enqueued);
__ pop(rdx);
Expand Down

1 comment on commit eb4849e

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.