Skip to content

Commit 28bb8dd

Browse files
committed
8326752: Lilliput: OMCache: Add cache lookup unrolling
Reviewed-by: rkennke
1 parent ff713d9 commit 28bb8dd

File tree

5 files changed

+144
-64
lines changed

5 files changed

+144
-64
lines changed

src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp

Lines changed: 75 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -561,73 +561,95 @@ void C2_MacroAssembler::fast_lock_placeholder(Register obj, Register box, Regist
561561
{ // Handle inflated monitor.
562562
bind(inflated);
563563

564-
if (!OMUseC2Cache) {
565-
// Set Flags == NE
566-
cmp(zr, obj);
567-
b(slow_path);
568-
} else {
564+
if (!OMUseC2Cache) {
565+
// Set Flags == NE
566+
cmp(zr, obj);
567+
b(slow_path);
568+
} else {
569569

570-
if (OMCacheHitRate) increment(Address(rthread, JavaThread::lock_lookup_offset()));
570+
if (OMCacheHitRate) increment(Address(rthread, JavaThread::lock_lookup_offset()));
571571

572-
Label monitor_found, loop;
573-
// Load cache address
574-
lea(t, Address(rthread, JavaThread::om_cache_oops_offset()));
572+
Label monitor_found;
575573

576-
// Search for obj in cache.
577-
bind(loop);
574+
// Load cache address
575+
lea(t, Address(rthread, JavaThread::om_cache_oops_offset()));
578576

579-
// Check for match.
580-
ldr(t1, Address(t));
581-
cmp(obj, t1);
582-
br(Assembler::EQ, monitor_found);
577+
const int num_unrolled = MIN2(OMC2UnrollCacheEntries, OMCacheSize);
578+
for (int i = 0; i < num_unrolled; i++) {
579+
ldr(t1, Address(t));
580+
cmp(obj, t1);
581+
br(Assembler::EQ, monitor_found);
582+
if (i + 1 != num_unrolled) {
583+
increment(t, in_bytes(OMCache::oop_to_oop_difference()));
584+
}
585+
}
583586

584-
// Search until null encountered, guaranteed _null_sentinel at end.
585-
increment(t, oopSize);
586-
cbnz(t1, loop);
587-
// Cache Miss, NE set from cmp above, cbnz does not set flags
588-
b(slow_path);
587+
if (num_unrolled == 0 || (OMC2UnrollCacheLookupLoopTail && num_unrolled != OMCacheSize)) {
588+
if (num_unrolled != 0) {
589+
// Loop after unrolling, advance iterator.
590+
increment(t, in_bytes(OMCache::oop_to_oop_difference()));
591+
}
589592

590-
bind(monitor_found);
591-
ldr(t1, Address(t, OMCache::oop_to_monitor_difference()));
592-
if (OMCacheHitRate) increment(Address(rthread, JavaThread::lock_hit_offset()));
593+
Label loop;
593594

594-
// ObjectMonitor* is in t1
595-
const Register monitor = t1;
596-
const Register owner_addr = t2;
597-
const Register owner = t3;
595+
// Search for obj in cache.
596+
bind(loop);
598597

599-
Label recursive;
600-
Label monitor_locked;
598+
// Check for match.
599+
ldr(t1, Address(t));
600+
cmp(obj, t1);
601+
br(Assembler::EQ, monitor_found);
601602

602-
// Compute owner address.
603-
lea(owner_addr, Address(monitor, ObjectMonitor::owner_offset()));
603+
// Search until null encountered, guaranteed _null_sentinel at end.
604+
increment(t, in_bytes(OMCache::oop_to_oop_difference()));
605+
cbnz(t1, loop);
606+
// Cache Miss, NE set from cmp above, cbnz does not set flags
607+
b(slow_path);
608+
} else {
609+
b(slow_path);
610+
}
604611

605-
if (OMRecursiveFastPath) {
606-
ldr(owner, Address(owner_addr));
607-
cmp(owner, rthread);
608-
br(Assembler::EQ, recursive);
609-
}
612+
bind(monitor_found);
613+
ldr(t1, Address(t, OMCache::oop_to_monitor_difference()));
614+
if (OMCacheHitRate) increment(Address(rthread, JavaThread::lock_hit_offset()));
610615

611-
// CAS owner (null => current thread).
612-
cmpxchg(owner_addr, zr, rthread, Assembler::xword, /*acquire*/ true,
613-
/*release*/ false, /*weak*/ false, owner);
614-
br(Assembler::EQ, monitor_locked);
616+
// ObjectMonitor* is in t1
617+
const Register monitor = t1;
618+
const Register owner_addr = t2;
619+
const Register owner = t3;
615620

616-
if (OMRecursiveFastPath) {
617-
b(slow_path);
618-
} else {
619-
// Check if recursive.
620-
cmp(owner, rthread);
621-
br(Assembler::NE, slow_path);
622-
}
621+
Label recursive;
622+
Label monitor_locked;
623623

624-
// Recursive.
625-
bind(recursive);
626-
increment(Address(monitor, ObjectMonitor::recursions_offset()), 1);
624+
// Compute owner address.
625+
lea(owner_addr, Address(monitor, ObjectMonitor::owner_offset()));
627626

628-
bind(monitor_locked);
629-
str(monitor, Address(box, BasicLock::displaced_header_offset_in_bytes()));
630-
}
627+
if (OMRecursiveFastPath) {
628+
ldr(owner, Address(owner_addr));
629+
cmp(owner, rthread);
630+
br(Assembler::EQ, recursive);
631+
}
632+
633+
// CAS owner (null => current thread).
634+
cmpxchg(owner_addr, zr, rthread, Assembler::xword, /*acquire*/ true,
635+
/*release*/ false, /*weak*/ false, owner);
636+
br(Assembler::EQ, monitor_locked);
637+
638+
if (OMRecursiveFastPath) {
639+
b(slow_path);
640+
} else {
641+
// Check if recursive.
642+
cmp(owner, rthread);
643+
br(Assembler::NE, slow_path);
644+
}
645+
646+
// Recursive.
647+
bind(recursive);
648+
increment(Address(monitor, ObjectMonitor::recursions_offset()), 1);
649+
650+
bind(monitor_locked);
651+
str(monitor, Address(box, BasicLock::displaced_header_offset_in_bytes()));
652+
}
631653

632654
}
633655

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,22 +1258,43 @@ void C2_MacroAssembler::fast_lock_placeholder(Register obj, Register box, Regist
12581258
if (OMCacheHitRate) increment(Address(thread, JavaThread::lock_lookup_offset()));
12591259

12601260
// Fetch ObjectMonitor* from the cache or take the slow-path.
1261-
Label monitor_found, loop;
1261+
Label monitor_found;
1262+
12621263
// Load cache address
12631264
lea(t, Address(thread, JavaThread::om_cache_oops_offset()));
12641265

1265-
// Search for obj in cache.
1266-
bind(loop);
1266+
const int num_unrolled = MIN2(OMC2UnrollCacheEntries, OMCacheSize);
1267+
for (int i = 0; i < num_unrolled; i++) {
1268+
cmpptr(obj, Address(t));
1269+
jccb(Assembler::equal, monitor_found);
1270+
if (i + 1 != num_unrolled) {
1271+
increment(t, in_bytes(OMCache::oop_to_oop_difference()));
1272+
}
1273+
}
1274+
1275+
if (num_unrolled == 0 || (OMC2UnrollCacheLookupLoopTail && num_unrolled != OMCacheSize)) {
1276+
if (num_unrolled != 0) {
1277+
// Loop after unrolling, advance iterator.
1278+
increment(t, in_bytes(OMCache::oop_to_oop_difference()));
1279+
}
12671280

1268-
// Check for match.
1269-
cmpptr(obj, Address(t));
1270-
jccb(Assembler::equal, monitor_found);
1281+
Label loop;
12711282

1272-
// Search until null encountered, guaranteed _null_sentinel at end.
1273-
cmpptr(Address(t), 1);
1274-
jcc(Assembler::below, slow_path); // 0 check, but with ZF=0 when *t == 0
1275-
increment(t, oopSize);
1276-
jmpb(loop);
1283+
// Search for obj in cache.
1284+
bind(loop);
1285+
1286+
// Check for match.
1287+
cmpptr(obj, Address(t));
1288+
jccb(Assembler::equal, monitor_found);
1289+
1290+
// Search until null encountered, guaranteed _null_sentinel at end.
1291+
cmpptr(Address(t), 1);
1292+
jcc(Assembler::below, slow_path); // 0 check, but with ZF=0 when *t == 0
1293+
increment(t, in_bytes(OMCache::oop_to_oop_difference()));
1294+
jmpb(loop);
1295+
} else {
1296+
jmp(slow_path);
1297+
}
12771298

12781299
// Cache hit.
12791300
bind(monitor_found);

src/hotspot/share/runtime/globals.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1990,6 +1990,11 @@ const int ObjectAlignmentInBytes = 8;
19901990
\
19911991
product(bool, OMUseC2Cache, true, "") \
19921992
\
1993+
product(bool, OMC2UnrollCacheLookupLoopTail, true, "") \
1994+
\
1995+
product(int, OMC2UnrollCacheEntries, 0, "") \
1996+
range(0, OMCache::CAPACITY) \
1997+
\
19931998
product(int, OMCacheSize, 8, "") \
19941999
range(0, OMCache::CAPACITY) \
19952000
\

src/hotspot/share/runtime/lockStack.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ class OMCache {
147147
public:
148148
static ByteSize oops_offset() { return byte_offset_of(OMCache, _oops); }
149149
static ByteSize monitors_offset() { return byte_offset_of(OMCache, _monitors); }
150+
static ByteSize oop_to_oop_difference() { return in_ByteSize(sizeof(oop)); }
150151
static ByteSize oop_to_monitor_difference() { return monitors_offset() - oops_offset(); }
151152

152153
explicit OMCache(JavaThread* jt) : _oops(), _null_sentinel(nullptr), _monitors() {};

test/micro/org/openjdk/bench/vm/lang/LockUnlock.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ public class LockUnlock {
5555
public Object lockObject1;
5656
public Object lockObject2;
5757
public volatile Object lockObject3Inflated;
58+
public volatile Object lockObject4Inflated;
5859
public int factorial;
5960
public int dummyInt1;
6061
public int dummyInt2;
@@ -64,12 +65,16 @@ public void setup() {
6465
lockObject1 = new Object();
6566
lockObject2 = new Object();
6667
lockObject3Inflated = new Object();
68+
lockObject4Inflated = new Object();
6769

6870
// Inflate the lock to use an ObjectMonitor
6971
try {
7072
synchronized (lockObject3Inflated) {
7173
lockObject3Inflated.wait(1);
7274
}
75+
synchronized (lockObject4Inflated) {
76+
lockObject4Inflated.wait(1);
77+
}
7378
} catch (InterruptedException e) {
7479
throw new RuntimeException(e);
7580
}
@@ -205,6 +210,32 @@ public void testInflatedSerialLockUnlock() {
205210
}
206211
}
207212

213+
/** Perform two synchronized after each other on the same object. */
214+
@Benchmark
215+
public void testInflatedMultipleSerialLockUnlock() {
216+
for (int i = 0; i < innerCount; i++) {
217+
synchronized (lockObject3Inflated) {
218+
dummyInt1++;
219+
}
220+
synchronized (lockObject4Inflated) {
221+
dummyInt2++;
222+
}
223+
}
224+
}
225+
226+
/** Perform two synchronized after each other on the same object. */
227+
@Benchmark
228+
public void testInflatedMultipleRecursiveLockUnlock() {
229+
for (int i = 0; i < innerCount; i++) {
230+
synchronized (lockObject3Inflated) {
231+
dummyInt1++;
232+
synchronized (lockObject4Inflated) {
233+
dummyInt2++;
234+
}
235+
}
236+
}
237+
}
238+
208239
/** Perform a recursive-only synchronized on a local object within a loop. */
209240
@Benchmark
210241
public void testInflatedRecursiveOnlyLockUnlockLocal() {

0 commit comments

Comments
 (0)