Skip to content

Commit ef282b7

Browse files
author
Andrew Haley
committed
8180450: secondary_super_cache does not scale well
8337958: Out-of-bounds array access in secondary_super_cache Reviewed-by: adinn Backport-of: f11a496de61d800a680517457eb43b078a633953
1 parent e48995f commit ef282b7

36 files changed

+1743
-49
lines changed

src/hotspot/cpu/aarch64/aarch64.ad

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//
22
// Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
3-
// Copyright (c) 2014, 2021, Red Hat, Inc. All rights reserved.
3+
// Copyright (c) 2014, 2024, Red Hat, Inc. All rights reserved.
44
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
//
66
// This code is free software; you can redistribute it and/or modify it
@@ -16696,6 +16696,39 @@ instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_
1669616696
ins_pipe(pipe_class_memory);
1669716697
%}
1669816698

16699+
instruct partialSubtypeCheckConstSuper(iRegP_R4 sub, iRegP_R0 super_reg, immP super_con, vRegD_V0 vtemp, iRegP_R5 result,
16700+
iRegP_R1 tempR1, iRegP_R2 tempR2, iRegP_R3 tempR3,
16701+
rFlagsReg cr)
16702+
%{
16703+
match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16704+
predicate(UseSecondarySupersTable);
16705+
effect(KILL cr, TEMP tempR1, TEMP tempR2, TEMP tempR3, TEMP vtemp);
16706+
16707+
ins_cost(700); // smaller than the next version
16708+
format %{ "partialSubtypeCheck $result, $sub, super" %}
16709+
16710+
ins_encode %{
16711+
bool success = false;
16712+
u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16713+
if (InlineSecondarySupersTest) {
16714+
success = __ lookup_secondary_supers_table($sub$$Register, $super_reg$$Register,
16715+
$tempR1$$Register, $tempR2$$Register, $tempR3$$Register,
16716+
$vtemp$$FloatRegister,
16717+
$result$$Register,
16718+
super_klass_slot);
16719+
} else {
16720+
address call = __ trampoline_call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16721+
success = (call != nullptr);
16722+
}
16723+
if (!success) {
16724+
ciEnv::current()->record_failure("CodeCache is full");
16725+
return;
16726+
}
16727+
%}
16728+
16729+
ins_pipe(pipe_class_memory);
16730+
%}
16731+
1669916732
instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
1670016733
%{
1670116734
match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));

src/hotspot/cpu/aarch64/assembler_aarch64.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2631,6 +2631,7 @@ template<typename R, typename... Rx>
26312631

26322632
#undef INSN
26332633

2634+
// Advanced SIMD across lanes
26342635
#define INSN(NAME, opc, opc2, accepted) \
26352636
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \
26362637
guarantee(T != T1Q && T != T1D, "incorrect arrangement"); \

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Lines changed: 239 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
3-
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
3+
* Copyright (c) 2014, 2024, Red Hat Inc. All rights reserved.
44
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
*
66
* This code is free software; you can redistribute it and/or modify it
@@ -1465,6 +1465,9 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
14651465
Label* L_success,
14661466
Label* L_failure,
14671467
bool set_cond_codes) {
1468+
// NB! Callers may assume that, when temp2_reg is a valid register,
1469+
// this code sets it to a nonzero value.
1470+
14681471
assert_different_registers(sub_klass, super_klass, temp_reg);
14691472
if (temp2_reg != noreg)
14701473
assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, rscratch1);
@@ -1540,6 +1543,241 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
15401543
bind(L_fallthrough);
15411544
}
15421545

1546+
// Ensure that the inline code and the stub are using the same registers.
1547+
#define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS \
1548+
do { \
1549+
assert(r_super_klass == r0 && \
1550+
r_array_base == r1 && \
1551+
r_array_length == r2 && \
1552+
(r_array_index == r3 || r_array_index == noreg) && \
1553+
(r_sub_klass == r4 || r_sub_klass == noreg) && \
1554+
(r_bitmap == rscratch2 || r_bitmap == noreg) && \
1555+
(result == r5 || result == noreg), "registers must match aarch64.ad"); \
1556+
} while(0)
1557+
1558+
// Return true: we succeeded in generating this code
1559+
bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
1560+
Register r_super_klass,
1561+
Register temp1,
1562+
Register temp2,
1563+
Register temp3,
1564+
FloatRegister vtemp,
1565+
Register result,
1566+
u1 super_klass_slot,
1567+
bool stub_is_near) {
1568+
assert_different_registers(r_sub_klass, temp1, temp2, temp3, result, rscratch1, rscratch2);
1569+
1570+
Label L_fallthrough;
1571+
1572+
BLOCK_COMMENT("lookup_secondary_supers_table {");
1573+
1574+
const Register
1575+
r_array_base = temp1, // r1
1576+
r_array_length = temp2, // r2
1577+
r_array_index = temp3, // r3
1578+
r_bitmap = rscratch2;
1579+
1580+
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
1581+
1582+
u1 bit = super_klass_slot;
1583+
1584+
// Make sure that result is nonzero if the TBZ below misses.
1585+
mov(result, 1);
1586+
1587+
// We're going to need the bitmap in a vector reg and in a core reg,
1588+
// so load both now.
1589+
ldr(r_bitmap, Address(r_sub_klass, Klass::bitmap_offset()));
1590+
if (bit != 0) {
1591+
ldrd(vtemp, Address(r_sub_klass, Klass::bitmap_offset()));
1592+
}
1593+
// First check the bitmap to see if super_klass might be present. If
1594+
// the bit is zero, we are certain that super_klass is not one of
1595+
// the secondary supers.
1596+
tbz(r_bitmap, bit, L_fallthrough);
1597+
1598+
// Get the first array index that can contain super_klass into r_array_index.
1599+
if (bit != 0) {
1600+
shld(vtemp, vtemp, Klass::SECONDARY_SUPERS_TABLE_MASK - bit);
1601+
cnt(vtemp, T8B, vtemp);
1602+
addv(vtemp, T8B, vtemp);
1603+
fmovd(r_array_index, vtemp);
1604+
} else {
1605+
mov(r_array_index, (u1)1);
1606+
}
1607+
// NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word.
1608+
1609+
// We will consult the secondary-super array.
1610+
ldr(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset())));
1611+
1612+
// The value i in r_array_index is >= 1, so even though r_array_base
1613+
// points to the length, we don't need to adjust it to point to the
1614+
// data.
1615+
assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code");
1616+
assert(Array<Klass*>::length_offset_in_bytes() == 0, "Adjust this code");
1617+
1618+
ldr(result, Address(r_array_base, r_array_index, Address::lsl(LogBytesPerWord)));
1619+
eor(result, result, r_super_klass);
1620+
cbz(result, L_fallthrough); // Found a match
1621+
1622+
// Is there another entry to check? Consult the bitmap.
1623+
tbz(r_bitmap, (bit + 1) & Klass::SECONDARY_SUPERS_TABLE_MASK, L_fallthrough);
1624+
1625+
// Linear probe.
1626+
if (bit != 0) {
1627+
ror(r_bitmap, r_bitmap, bit);
1628+
}
1629+
1630+
// The slot we just inspected is at secondary_supers[r_array_index - 1].
1631+
// The next slot to be inspected, by the stub we're about to call,
1632+
// is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap
1633+
// have been checked.
1634+
Address stub = RuntimeAddress(StubRoutines::lookup_secondary_supers_table_slow_path_stub());
1635+
if (stub_is_near) {
1636+
bl(stub);
1637+
} else {
1638+
address call = trampoline_call(stub);
1639+
if (call == nullptr) {
1640+
return false; // trampoline allocation failed
1641+
}
1642+
}
1643+
1644+
BLOCK_COMMENT("} lookup_secondary_supers_table");
1645+
1646+
bind(L_fallthrough);
1647+
1648+
if (VerifySecondarySupers) {
1649+
verify_secondary_supers_table(r_sub_klass, r_super_klass, // r4, r0
1650+
temp1, temp2, result); // r1, r2, r5
1651+
}
1652+
return true;
1653+
}
1654+
1655+
// Called by code generated by check_klass_subtype_slow_path
1656+
// above. This is called when there is a collision in the hashed
1657+
// lookup in the secondary supers array.
1658+
void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_klass,
1659+
Register r_array_base,
1660+
Register r_array_index,
1661+
Register r_bitmap,
1662+
Register temp1,
1663+
Register result) {
1664+
assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, temp1, result, rscratch1);
1665+
1666+
const Register
1667+
r_array_length = temp1,
1668+
r_sub_klass = noreg; // unused
1669+
1670+
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
1671+
1672+
Label L_fallthrough, L_huge;
1673+
1674+
// Load the array length.
1675+
ldrw(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes()));
1676+
// And adjust the array base to point to the data.
1677+
// NB! Effectively increments current slot index by 1.
1678+
assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "");
1679+
add(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes());
1680+
1681+
// The bitmap is full to bursting.
1682+
// Implicit invariant: BITMAP_FULL implies (length > 0)
1683+
assert(Klass::SECONDARY_SUPERS_BITMAP_FULL == ~uintx(0), "");
1684+
cmpw(r_array_length, (u1)(Klass::SECONDARY_SUPERS_TABLE_SIZE - 2));
1685+
br(GT, L_huge);
1686+
1687+
// NB! Our caller has checked bits 0 and 1 in the bitmap. The
1688+
// current slot (at secondary_supers[r_array_index]) has not yet
1689+
// been inspected, and r_array_index may be out of bounds if we
1690+
// wrapped around the end of the array.
1691+
1692+
{ // This is conventional linear probing, but instead of terminating
1693+
// when a null entry is found in the table, we maintain a bitmap
1694+
// in which a 0 indicates missing entries.
1695+
// The check above guarantees there are 0s in the bitmap, so the loop
1696+
// eventually terminates.
1697+
Label L_loop;
1698+
bind(L_loop);
1699+
1700+
// Check for wraparound.
1701+
cmp(r_array_index, r_array_length);
1702+
csel(r_array_index, zr, r_array_index, GE);
1703+
1704+
ldr(rscratch1, Address(r_array_base, r_array_index, Address::lsl(LogBytesPerWord)));
1705+
eor(result, rscratch1, r_super_klass);
1706+
cbz(result, L_fallthrough);
1707+
1708+
tbz(r_bitmap, 2, L_fallthrough); // look-ahead check (Bit 2); result is non-zero
1709+
1710+
ror(r_bitmap, r_bitmap, 1);
1711+
add(r_array_index, r_array_index, 1);
1712+
b(L_loop);
1713+
}
1714+
1715+
{ // Degenerate case: more than 64 secondary supers.
1716+
// FIXME: We could do something smarter here, maybe a vectorized
1717+
// comparison or a binary search, but is that worth any added
1718+
// complexity?
1719+
bind(L_huge);
1720+
cmp(sp, zr); // Clear Z flag; SP is never zero
1721+
repne_scan(r_array_base, r_super_klass, r_array_length, rscratch1);
1722+
cset(result, NE); // result == 0 iff we got a match.
1723+
}
1724+
1725+
bind(L_fallthrough);
1726+
}
1727+
1728+
// Make sure that the hashed lookup and a linear scan agree.
1729+
void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
1730+
Register r_super_klass,
1731+
Register temp1,
1732+
Register temp2,
1733+
Register result) {
1734+
assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, result, rscratch1);
1735+
1736+
const Register
1737+
r_array_base = temp1,
1738+
r_array_length = temp2,
1739+
r_array_index = noreg, // unused
1740+
r_bitmap = noreg; // unused
1741+
1742+
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
1743+
1744+
BLOCK_COMMENT("verify_secondary_supers_table {");
1745+
1746+
// We will consult the secondary-super array.
1747+
ldr(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset())));
1748+
1749+
// Load the array length.
1750+
ldrw(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes()));
1751+
// And adjust the array base to point to the data.
1752+
add(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes());
1753+
1754+
cmp(sp, zr); // Clear Z flag; SP is never zero
1755+
// Scan R2 words at [R5] for an occurrence of R0.
1756+
// Set NZ/Z based on last compare.
1757+
repne_scan(/*addr*/r_array_base, /*value*/r_super_klass, /*count*/r_array_length, rscratch2);
1758+
// rscratch1 == 0 iff we got a match.
1759+
cset(rscratch1, NE);
1760+
1761+
Label passed;
1762+
cmp(result, zr);
1763+
cset(result, NE); // normalize result to 0/1 for comparison
1764+
1765+
cmp(rscratch1, result);
1766+
br(EQ, passed);
1767+
{
1768+
mov(r0, r_super_klass); // r0 <- r0
1769+
mov(r1, r_sub_klass); // r1 <- r4
1770+
mov(r2, /*expected*/rscratch1); // r2 <- r8
1771+
mov(r3, result); // r3 <- r5
1772+
mov(r4, (address)("mismatch")); // r4 <- const
1773+
rt_call(CAST_FROM_FN_PTR(address, Klass::on_secondary_supers_verification_failure), rscratch2);
1774+
should_not_reach_here();
1775+
}
1776+
bind(passed);
1777+
1778+
BLOCK_COMMENT("} verify_secondary_supers_table");
1779+
}
1780+
15431781
void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_fast_path, Label* L_slow_path) {
15441782
assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required");
15451783
assert_different_registers(klass, rthread, scratch);

src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
3-
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
3+
* Copyright (c) 2014, 2024, Red Hat Inc. All rights reserved.
44
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
*
66
* This code is free software; you can redistribute it and/or modify it
@@ -997,6 +997,31 @@ class MacroAssembler: public Assembler {
997997
Label* L_failure,
998998
bool set_cond_codes = false);
999999

1000+
// As above, but with a constant super_klass.
1001+
// The result is in Register result, not the condition codes.
1002+
bool lookup_secondary_supers_table(Register r_sub_klass,
1003+
Register r_super_klass,
1004+
Register temp1,
1005+
Register temp2,
1006+
Register temp3,
1007+
FloatRegister vtemp,
1008+
Register result,
1009+
u1 super_klass_slot,
1010+
bool stub_is_near = false);
1011+
1012+
void verify_secondary_supers_table(Register r_sub_klass,
1013+
Register r_super_klass,
1014+
Register temp1,
1015+
Register temp2,
1016+
Register result);
1017+
1018+
void lookup_secondary_supers_table_slow_path(Register r_super_klass,
1019+
Register r_array_base,
1020+
Register r_array_index,
1021+
Register r_bitmap,
1022+
Register temp1,
1023+
Register result);
1024+
10001025
// Simplified, combined version, good for typical uses.
10011026
// Falls through on failure.
10021027
void check_klass_subtype(Register sub_klass,
@@ -1211,6 +1236,7 @@ class MacroAssembler: public Assembler {
12111236
// - relocInfo::virtual_call_type
12121237
//
12131238
// Return: the call PC or null if CodeCache is full.
1239+
// Clobbers: rscratch1
12141240
address trampoline_call(Address entry);
12151241

12161242
static bool far_branches() {

0 commit comments

Comments
 (0)