Skip to content

Commit 0d1080d

Browse files
committed
8331117: [PPC64] secondary_super_cache does not scale well
Reviewed-by: rrich, amitkumar
1 parent 113a2c0 commit 0d1080d

File tree

6 files changed

+421
-0
lines changed

6 files changed

+421
-0
lines changed

src/hotspot/cpu/ppc/macroAssembler_ppc.cpp

Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2130,6 +2130,295 @@ void MacroAssembler::check_klass_subtype(Register sub_klass,
21302130
bind(L_failure); // Fallthru if not successful.
21312131
}
21322132

2133+
// scans count pointer sized words at [addr] for occurrence of value,
2134+
// generic (count must be >0)
2135+
// iff found: CR0 eq, scratch == 0
2136+
void MacroAssembler::repne_scan(Register addr, Register value, Register count, Register scratch) {
2137+
Label Lloop, Lexit;
2138+
2139+
#ifdef ASSERT
2140+
{
2141+
Label ok;
2142+
cmpdi(CCR0, count, 0);
2143+
bgt(CCR0, ok);
2144+
stop("count must be positive");
2145+
bind(ok);
2146+
}
2147+
#endif
2148+
2149+
mtctr(count);
2150+
2151+
bind(Lloop);
2152+
ld(scratch, 0 , addr);
2153+
xor_(scratch, scratch, value);
2154+
beq(CCR0, Lexit);
2155+
addi(addr, addr, wordSize);
2156+
bdnz(Lloop);
2157+
2158+
bind(Lexit);
2159+
}
2160+
2161+
// Ensure that the inline code and the stub are using the same registers.
2162+
#define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS \
2163+
do { \
2164+
assert(r_super_klass == R4_ARG2 && \
2165+
r_array_base == R3_ARG1 && \
2166+
r_array_length == R7_ARG5 && \
2167+
(r_array_index == R6_ARG4 || r_array_index == noreg) && \
2168+
(r_sub_klass == R5_ARG3 || r_sub_klass == noreg) && \
2169+
(r_bitmap == R11_scratch1 || r_bitmap == noreg) && \
2170+
(result == R8_ARG6 || result == noreg), "registers must match ppc64.ad"); \
2171+
} while(0)
2172+
2173+
// Return true: we succeeded in generating this code
2174+
void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
2175+
Register r_super_klass,
2176+
Register temp1,
2177+
Register temp2,
2178+
Register temp3,
2179+
Register temp4,
2180+
Register result,
2181+
u1 super_klass_slot) {
2182+
assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, temp3, temp4, result);
2183+
2184+
Label L_done;
2185+
2186+
BLOCK_COMMENT("lookup_secondary_supers_table {");
2187+
2188+
const Register
2189+
r_array_base = temp1,
2190+
r_array_length = temp2,
2191+
r_array_index = temp3,
2192+
r_bitmap = temp4;
2193+
2194+
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
2195+
2196+
ld(r_bitmap, in_bytes(Klass::bitmap_offset()), r_sub_klass);
2197+
2198+
// First check the bitmap to see if super_klass might be present. If
2199+
// the bit is zero, we are certain that super_klass is not one of
2200+
// the secondary supers.
2201+
u1 bit = super_klass_slot;
2202+
int shift_count = Klass::SECONDARY_SUPERS_TABLE_MASK - bit;
2203+
2204+
// if (shift_count == 0) this is used for comparing with 0:
2205+
sldi_(r_array_index, r_bitmap, shift_count);
2206+
2207+
li(result, 1); // failure
2208+
// We test the MSB of r_array_index, i.e. its sign bit
2209+
bge(CCR0, L_done);
2210+
2211+
// We will consult the secondary-super array.
2212+
ld(r_array_base, in_bytes(Klass::secondary_supers_offset()), r_sub_klass);
2213+
2214+
// The value i in r_array_index is >= 1, so even though r_array_base
2215+
// points to the length, we don't need to adjust it to point to the
2216+
// data.
2217+
assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code");
2218+
2219+
// Get the first array index that can contain super_klass.
2220+
if (bit != 0) {
2221+
popcntd(r_array_index, r_array_index);
2222+
// NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word.
2223+
sldi(r_array_index, r_array_index, LogBytesPerWord); // scale
2224+
ldx(result, r_array_base, r_array_index);
2225+
} else {
2226+
// Actually use index 0, but r_array_base and r_array_index are off by 1 word
2227+
// such that the sum is precise.
2228+
ld(result, BytesPerWord, r_array_base);
2229+
li(r_array_index, BytesPerWord); // for slow path (scaled)
2230+
}
2231+
2232+
xor_(result, result, r_super_klass);
2233+
beq(CCR0, L_done); // Found a match (result == 0)
2234+
2235+
// Is there another entry to check? Consult the bitmap.
2236+
testbitdi(CCR0, /* temp */ r_array_length, r_bitmap, (bit + 1) & Klass::SECONDARY_SUPERS_TABLE_MASK);
2237+
beq(CCR0, L_done); // (result != 0)
2238+
2239+
// Linear probe. Rotate the bitmap so that the next bit to test is
2240+
// in Bit 2 for the look-ahead check in the slow path.
2241+
if (bit != 0) {
2242+
rldicl(r_bitmap, r_bitmap, 64 - bit, 0);
2243+
}
2244+
2245+
// Calls into the stub generated by lookup_secondary_supers_table_slow_path.
2246+
// Arguments: r_super_klass, r_array_base, r_array_index, r_bitmap.
2247+
// Kills: r_array_length.
2248+
// Returns: result.
2249+
address stub = StubRoutines::lookup_secondary_supers_table_slow_path_stub();
2250+
Register r_stub_addr = r_array_length;
2251+
add_const_optimized(r_stub_addr, R29_TOC, MacroAssembler::offset_to_global_toc(stub), R0);
2252+
mtctr(r_stub_addr);
2253+
bctrl();
2254+
2255+
bind(L_done);
2256+
BLOCK_COMMENT("} lookup_secondary_supers_table");
2257+
2258+
if (VerifySecondarySupers) {
2259+
verify_secondary_supers_table(r_sub_klass, r_super_klass, result,
2260+
temp1, temp2, temp3);
2261+
}
2262+
}
2263+
2264+
// Called by code generated by check_klass_subtype_slow_path
2265+
// above. This is called when there is a collision in the hashed
2266+
// lookup in the secondary supers array.
2267+
void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_klass,
2268+
Register r_array_base,
2269+
Register r_array_index,
2270+
Register r_bitmap,
2271+
Register result,
2272+
Register temp1) {
2273+
assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, result, temp1);
2274+
2275+
const Register
2276+
r_array_length = temp1,
2277+
r_sub_klass = noreg;
2278+
2279+
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
2280+
2281+
Label L_done;
2282+
2283+
// Load the array length.
2284+
lwa(r_array_length, Array<Klass*>::length_offset_in_bytes(), r_array_base);
2285+
// And adjust the array base to point to the data.
2286+
// NB! Effectively increments current slot index by 1.
2287+
assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "");
2288+
addi(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes());
2289+
2290+
// Linear probe
2291+
Label L_huge;
2292+
2293+
// The bitmap is full to bursting.
2294+
// Implicit invariant: BITMAP_FULL implies (length > 0)
2295+
assert(Klass::SECONDARY_SUPERS_BITMAP_FULL == ~uintx(0), "");
2296+
cmpdi(CCR0, r_bitmap, -1);
2297+
beq(CCR0, L_huge);
2298+
2299+
// NB! Our caller has checked bits 0 and 1 in the bitmap. The
2300+
// current slot (at secondary_supers[r_array_index]) has not yet
2301+
// been inspected, and r_array_index may be out of bounds if we
2302+
// wrapped around the end of the array.
2303+
2304+
{ // This is conventional linear probing, but instead of terminating
2305+
// when a null entry is found in the table, we maintain a bitmap
2306+
// in which a 0 indicates missing entries.
2307+
// The check above guarantees there are 0s in the bitmap, so the loop
2308+
// eventually terminates.
2309+
2310+
#ifdef ASSERT
2311+
{
2312+
// We should only reach here after having found a bit in the bitmap.
2313+
// Invariant: array_length == popcount(bitmap)
2314+
Label ok;
2315+
cmpdi(CCR0, r_array_length, 0);
2316+
bgt(CCR0, ok);
2317+
stop("array_length must be positive");
2318+
bind(ok);
2319+
}
2320+
#endif
2321+
2322+
// Compute limit in r_array_length
2323+
addi(r_array_length, r_array_length, -1);
2324+
sldi(r_array_length, r_array_length, LogBytesPerWord);
2325+
2326+
Label L_loop;
2327+
bind(L_loop);
2328+
2329+
// Check for wraparound.
2330+
cmpd(CCR0, r_array_index, r_array_length);
2331+
isel_0(r_array_index, CCR0, Assembler::greater);
2332+
2333+
ldx(result, r_array_base, r_array_index);
2334+
xor_(result, result, r_super_klass);
2335+
beq(CCR0, L_done); // success (result == 0)
2336+
2337+
// look-ahead check (Bit 2); result is non-zero
2338+
testbitdi(CCR0, R0, r_bitmap, 2);
2339+
beq(CCR0, L_done); // fail (result != 0)
2340+
2341+
rldicl(r_bitmap, r_bitmap, 64 - 1, 0);
2342+
addi(r_array_index, r_array_index, BytesPerWord);
2343+
b(L_loop);
2344+
}
2345+
2346+
{ // Degenerate case: more than 64 secondary supers.
2347+
// FIXME: We could do something smarter here, maybe a vectorized
2348+
// comparison or a binary search, but is that worth any added
2349+
// complexity?
2350+
bind(L_huge);
2351+
repne_scan(r_array_base, r_super_klass, r_array_length, result);
2352+
}
2353+
2354+
bind(L_done);
2355+
}
2356+
2357+
// Make sure that the hashed lookup and a linear scan agree.
2358+
void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
2359+
Register r_super_klass,
2360+
Register result,
2361+
Register temp1,
2362+
Register temp2,
2363+
Register temp3) {
2364+
assert_different_registers(r_sub_klass, r_super_klass, result, temp1, temp2, temp3);
2365+
2366+
const Register
2367+
r_array_base = temp1,
2368+
r_array_length = temp2,
2369+
r_array_index = temp3,
2370+
r_bitmap = noreg; // unused
2371+
2372+
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
2373+
2374+
BLOCK_COMMENT("verify_secondary_supers_table {");
2375+
2376+
Label passed, failure;
2377+
2378+
// We will consult the secondary-super array.
2379+
ld(r_array_base, in_bytes(Klass::secondary_supers_offset()), r_sub_klass);
2380+
// Load the array length.
2381+
lwa(r_array_length, Array<Klass*>::length_offset_in_bytes(), r_array_base);
2382+
// And adjust the array base to point to the data.
2383+
addi(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes());
2384+
2385+
// convert !=0 to 1
2386+
neg(R0, result);
2387+
orr(result, result, R0);
2388+
srdi(result, result, 63);
2389+
2390+
const Register linear_result = r_array_index; // reuse
2391+
li(linear_result, 1);
2392+
cmpdi(CCR0, r_array_length, 0);
2393+
ble(CCR0, failure);
2394+
repne_scan(r_array_base, r_super_klass, r_array_length, linear_result);
2395+
bind(failure);
2396+
2397+
// convert !=0 to 1
2398+
neg(R0, linear_result);
2399+
orr(linear_result, linear_result, R0);
2400+
srdi(linear_result, linear_result, 63);
2401+
2402+
cmpd(CCR0, result, linear_result);
2403+
beq(CCR0, passed);
2404+
2405+
assert_different_registers(R3_ARG1, r_sub_klass, linear_result, result);
2406+
mr_if_needed(R3_ARG1, r_super_klass);
2407+
assert_different_registers(R4_ARG2, linear_result, result);
2408+
mr_if_needed(R4_ARG2, r_sub_klass);
2409+
assert_different_registers(R5_ARG3, result);
2410+
neg(R5_ARG3, linear_result);
2411+
neg(R6_ARG4, result);
2412+
const char* msg = "mismatch";
2413+
load_const_optimized(R7_ARG5, (intptr_t)msg, R0);
2414+
call_VM_leaf(CAST_FROM_FN_PTR(address, Klass::on_secondary_supers_verification_failure));
2415+
should_not_reach_here();
2416+
2417+
bind(passed);
2418+
2419+
BLOCK_COMMENT("} verify_secondary_supers_table");
2420+
}
2421+
21332422
void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) {
21342423
assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required");
21352424

src/hotspot/cpu/ppc/macroAssembler_ppc.hpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,33 @@ class MacroAssembler: public Assembler {
604604
Register temp2_reg,
605605
Label& L_success);
606606

607+
void repne_scan(Register addr, Register value, Register count, Register scratch);
608+
609+
// As above, but with a constant super_klass.
610+
// The result is in Register result, not the condition codes.
611+
void lookup_secondary_supers_table(Register r_sub_klass,
612+
Register r_super_klass,
613+
Register temp1,
614+
Register temp2,
615+
Register temp3,
616+
Register temp4,
617+
Register result,
618+
u1 super_klass_slot);
619+
620+
void verify_secondary_supers_table(Register r_sub_klass,
621+
Register r_super_klass,
622+
Register result,
623+
Register temp1,
624+
Register temp2,
625+
Register temp3);
626+
627+
void lookup_secondary_supers_table_slow_path(Register r_super_klass,
628+
Register r_array_base,
629+
Register r_array_index,
630+
Register r_bitmap,
631+
Register result,
632+
Register temp1);
633+
607634
void clinit_barrier(Register klass,
608635
Register thread,
609636
Label* L_fast_path = nullptr,

0 commit comments

Comments
 (0)