Skip to content

Commit

Permalink
8180450: secondary_super_cache does not scale well
Browse files Browse the repository at this point in the history
Co-authored-by: Vladimir Ivanov <vlivanov@openjdk.org>
Reviewed-by: kvn, vlivanov, dlong
  • Loading branch information
Andrew Haley and Vladimir Ivanov committed Apr 16, 2024
1 parent bfff02e commit f11a496
Show file tree
Hide file tree
Showing 40 changed files with 2,209 additions and 48 deletions.
35 changes: 34 additions & 1 deletion src/hotspot/cpu/aarch64/aarch64.ad
@@ -1,6 +1,6 @@
//
// Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2014, 2021, Red Hat, Inc. All rights reserved.
// Copyright (c) 2014, 2024, Red Hat, Inc. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -16201,6 +16201,39 @@ instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_
ins_pipe(pipe_class_memory);
%}

instruct partialSubtypeCheckConstSuper(iRegP_R4 sub, iRegP_R0 super_reg, immP super_con, vRegD_V0 vtemp, iRegP_R5 result,
iRegP_R1 tempR1, iRegP_R2 tempR2, iRegP_R3 tempR3,
rFlagsReg cr)
%{
match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
predicate(UseSecondarySupersTable);
effect(KILL cr, TEMP tempR1, TEMP tempR2, TEMP tempR3, TEMP vtemp);

ins_cost(700); // smaller than the next version
format %{ "partialSubtypeCheck $result, $sub, super" %}

ins_encode %{
bool success = false;
u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
if (InlineSecondarySupersTest) {
success = __ lookup_secondary_supers_table($sub$$Register, $super_reg$$Register,
$tempR1$$Register, $tempR2$$Register, $tempR3$$Register,
$vtemp$$FloatRegister,
$result$$Register,
super_klass_slot);
} else {
address call = __ trampoline_call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
success = (call != nullptr);
}
if (!success) {
ciEnv::current()->record_failure("CodeCache is full");
return;
}
%}

ins_pipe(pipe_class_memory);
%}

instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
%{
match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
Expand Down
3 changes: 2 additions & 1 deletion src/hotspot/cpu/aarch64/assembler_aarch64.hpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
* Copyright (c) 2014, 2024, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -2595,6 +2595,7 @@ template<typename R, typename... Rx>

#undef INSN

// Advanced SIMD across lanes
#define INSN(NAME, opc, opc2, accepted) \
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \
guarantee(T != T1Q && T != T1D, "incorrect arrangement"); \
Expand Down
240 changes: 239 additions & 1 deletion src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
* Copyright (c) 2014, 2024, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -1514,6 +1514,9 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
Label* L_success,
Label* L_failure,
bool set_cond_codes) {
// NB! Callers may assume that, when temp2_reg is a valid register,
// this code sets it to a nonzero value.

assert_different_registers(sub_klass, super_klass, temp_reg);
if (temp2_reg != noreg)
assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, rscratch1);
Expand Down Expand Up @@ -1593,6 +1596,241 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
bind(L_fallthrough);
}

// Ensure that the inline code and the stub are using the same registers.
#define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS \
do { \
assert(r_super_klass == r0 && \
r_array_base == r1 && \
r_array_length == r2 && \
(r_array_index == r3 || r_array_index == noreg) && \
(r_sub_klass == r4 || r_sub_klass == noreg) && \
(r_bitmap == rscratch2 || r_bitmap == noreg) && \
(result == r5 || result == noreg), "registers must match aarch64.ad"); \
} while(0)

// Return true: we succeeded in generating this code
bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
Register r_super_klass,
Register temp1,
Register temp2,
Register temp3,
FloatRegister vtemp,
Register result,
u1 super_klass_slot,
bool stub_is_near) {
assert_different_registers(r_sub_klass, temp1, temp2, temp3, result, rscratch1, rscratch2);

Label L_fallthrough;

BLOCK_COMMENT("lookup_secondary_supers_table {");

const Register
r_array_base = temp1, // r1
r_array_length = temp2, // r2
r_array_index = temp3, // r3
r_bitmap = rscratch2;

LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;

u1 bit = super_klass_slot;

// Make sure that result is nonzero if the TBZ below misses.
mov(result, 1);

// We're going to need the bitmap in a vector reg and in a core reg,
// so load both now.
ldr(r_bitmap, Address(r_sub_klass, Klass::bitmap_offset()));
if (bit != 0) {
ldrd(vtemp, Address(r_sub_klass, Klass::bitmap_offset()));
}
// First check the bitmap to see if super_klass might be present. If
// the bit is zero, we are certain that super_klass is not one of
// the secondary supers.
tbz(r_bitmap, bit, L_fallthrough);

// Get the first array index that can contain super_klass into r_array_index.
if (bit != 0) {
shld(vtemp, vtemp, Klass::SECONDARY_SUPERS_TABLE_MASK - bit);
cnt(vtemp, T8B, vtemp);
addv(vtemp, T8B, vtemp);
fmovd(r_array_index, vtemp);
} else {
mov(r_array_index, (u1)1);
}
// NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word.

// We will consult the secondary-super array.
ldr(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset())));

// The value i in r_array_index is >= 1, so even though r_array_base
// points to the length, we don't need to adjust it to point to the
// data.
assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code");
assert(Array<Klass*>::length_offset_in_bytes() == 0, "Adjust this code");

ldr(result, Address(r_array_base, r_array_index, Address::lsl(LogBytesPerWord)));
eor(result, result, r_super_klass);
cbz(result, L_fallthrough); // Found a match

// Is there another entry to check? Consult the bitmap.
tbz(r_bitmap, (bit + 1) & Klass::SECONDARY_SUPERS_TABLE_MASK, L_fallthrough);

// Linear probe.
if (bit != 0) {
ror(r_bitmap, r_bitmap, bit);
}

// The slot we just inspected is at secondary_supers[r_array_index - 1].
// The next slot to be inspected, by the stub we're about to call,
// is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap
// have been checked.
Address stub = RuntimeAddress(StubRoutines::lookup_secondary_supers_table_slow_path_stub());
if (stub_is_near) {
bl(stub);
} else {
address call = trampoline_call(stub);
if (call == nullptr) {
return false; // trampoline allocation failed
}
}

BLOCK_COMMENT("} lookup_secondary_supers_table");

bind(L_fallthrough);

if (VerifySecondarySupers) {
verify_secondary_supers_table(r_sub_klass, r_super_klass, // r4, r0
temp1, temp2, result); // r1, r2, r5
}
return true;
}

// Called by code generated by check_klass_subtype_slow_path
// above. This is called when there is a collision in the hashed
// lookup in the secondary supers array.
void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_klass,
Register r_array_base,
Register r_array_index,
Register r_bitmap,
Register temp1,
Register result) {
assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, temp1, result, rscratch1);

const Register
r_array_length = temp1,
r_sub_klass = noreg; // unused

LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;

Label L_fallthrough, L_huge;

// Load the array length.
ldrw(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes()));
// And adjust the array base to point to the data.
// NB! Effectively increments current slot index by 1.
assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "");
add(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes());

// The bitmap is full to bursting.
// Implicit invariant: BITMAP_FULL implies (length > 0)
assert(Klass::SECONDARY_SUPERS_BITMAP_FULL == ~uintx(0), "");
cmn(r_bitmap, (u1)1);
br(EQ, L_huge);

// NB! Our caller has checked bits 0 and 1 in the bitmap. The
// current slot (at secondary_supers[r_array_index]) has not yet
// been inspected, and r_array_index may be out of bounds if we
// wrapped around the end of the array.

{ // This is conventional linear probing, but instead of terminating
// when a null entry is found in the table, we maintain a bitmap
// in which a 0 indicates missing entries.
// The check above guarantees there are 0s in the bitmap, so the loop
// eventually terminates.
Label L_loop;
bind(L_loop);

// Check for wraparound.
cmp(r_array_index, r_array_length);
csel(r_array_index, zr, r_array_index, GE);

ldr(rscratch1, Address(r_array_base, r_array_index, Address::lsl(LogBytesPerWord)));
eor(result, rscratch1, r_super_klass);
cbz(result, L_fallthrough);

tbz(r_bitmap, 2, L_fallthrough); // look-ahead check (Bit 2); result is non-zero

ror(r_bitmap, r_bitmap, 1);
add(r_array_index, r_array_index, 1);
b(L_loop);
}

{ // Degenerate case: more than 64 secondary supers.
// FIXME: We could do something smarter here, maybe a vectorized
// comparison or a binary search, but is that worth any added
// complexity?
bind(L_huge);
cmp(sp, zr); // Clear Z flag; SP is never zero
repne_scan(r_array_base, r_super_klass, r_array_length, rscratch1);
cset(result, NE); // result == 0 iff we got a match.
}

bind(L_fallthrough);
}

// Make sure that the hashed lookup and a linear scan agree.
void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
Register r_super_klass,
Register temp1,
Register temp2,
Register result) {
assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, result, rscratch1);

const Register
r_array_base = temp1,
r_array_length = temp2,
r_array_index = noreg, // unused
r_bitmap = noreg; // unused

LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;

BLOCK_COMMENT("verify_secondary_supers_table {");

// We will consult the secondary-super array.
ldr(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset())));

// Load the array length.
ldrw(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes()));
// And adjust the array base to point to the data.
add(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes());

cmp(sp, zr); // Clear Z flag; SP is never zero
// Scan R2 words at [R5] for an occurrence of R0.
// Set NZ/Z based on last compare.
repne_scan(/*addr*/r_array_base, /*value*/r_super_klass, /*count*/r_array_length, rscratch2);
// rscratch1 == 0 iff we got a match.
cset(rscratch1, NE);

Label passed;
cmp(result, zr);
cset(result, NE); // normalize result to 0/1 for comparison

cmp(rscratch1, result);
br(EQ, passed);
{
mov(r0, r_super_klass); // r0 <- r0
mov(r1, r_sub_klass); // r1 <- r4
mov(r2, /*expected*/rscratch1); // r2 <- r8
mov(r3, result); // r3 <- r5
mov(r4, (address)("mismatch")); // r4 <- const
rt_call(CAST_FROM_FN_PTR(address, Klass::on_secondary_supers_verification_failure), rscratch2);
should_not_reach_here();
}
bind(passed);

BLOCK_COMMENT("} verify_secondary_supers_table");
}

void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_fast_path, Label* L_slow_path) {
assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required");
assert_different_registers(klass, rthread, scratch);
Expand Down
28 changes: 27 additions & 1 deletion src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
* Copyright (c) 2014, 2024, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -999,6 +999,31 @@ class MacroAssembler: public Assembler {
Label* L_failure,
bool set_cond_codes = false);

// As above, but with a constant super_klass.
// The result is in Register result, not the condition codes.
bool lookup_secondary_supers_table(Register r_sub_klass,
Register r_super_klass,
Register temp1,
Register temp2,
Register temp3,
FloatRegister vtemp,
Register result,
u1 super_klass_slot,
bool stub_is_near = false);

void verify_secondary_supers_table(Register r_sub_klass,
Register r_super_klass,
Register temp1,
Register temp2,
Register result);

void lookup_secondary_supers_table_slow_path(Register r_super_klass,
Register r_array_base,
Register r_array_index,
Register r_bitmap,
Register temp1,
Register result);

// Simplified, combined version, good for typical uses.
// Falls through on failure.
void check_klass_subtype(Register sub_klass,
Expand Down Expand Up @@ -1213,6 +1238,7 @@ class MacroAssembler: public Assembler {
// - relocInfo::virtual_call_type
//
// Return: the call PC or null if CodeCache is full.
// Clobbers: rscratch1
address trampoline_call(Address entry);

static bool far_branches() {
Expand Down

1 comment on commit f11a496

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.