Skip to content

Commit d4d2e34

Browse files
Evgeny AstigeevichPaul Hohensee
Evgeny Astigeevich
authored and
Paul Hohensee
committed
8280872: Reorder code cache segments to improve code density
Backport-of: e524107a74d149354c505372e7b4a8af87d6ad02
1 parent d7a3a9e commit d4d2e34

File tree

7 files changed

+202
-18
lines changed

7 files changed

+202
-18
lines changed

src/hotspot/cpu/aarch64/aarch64.ad

+3-3
Original file line numberDiff line numberDiff line change
@@ -1279,12 +1279,12 @@ class HandlerImpl {
12791279
static int emit_deopt_handler(CodeBuffer& cbuf);
12801280

12811281
static uint size_exception_handler() {
1282-
return MacroAssembler::far_branch_size();
1282+
return MacroAssembler::far_codestub_branch_size();
12831283
}
12841284

12851285
static uint size_deopt_handler() {
12861286
// count one adr and one far branch instruction
1287-
return 4 * NativeInstruction::instruction_size;
1287+
return NativeInstruction::instruction_size + MacroAssembler::far_codestub_branch_size();
12881288
}
12891289
};
12901290

@@ -2358,7 +2358,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
23582358
__ adr(lr, __ pc());
23592359
__ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
23602360

2361-
assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
2361+
assert(__ offset() - offset == (int) size_deopt_handler(), "overflow");
23622362
__ end_a_stub();
23632363
return offset;
23642364
}

src/hotspot/cpu/aarch64/icBuffer_aarch64.cpp

+8-2
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,15 @@ void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached
5252
address start = __ pc();
5353
Label l;
5454
__ ldr(rscratch2, l);
55-
__ far_jump(ExternalAddress(entry_point));
56-
__ align(wordSize);
55+
int jump_code_size = __ far_jump(ExternalAddress(entry_point));
56+
// IC stub code size is not expected to vary depending on target address.
57+
// We use NOPs to make the [ldr + far_jump + nops + int64] stub size equal to ic_stub_code_size.
58+
for (int size = NativeInstruction::instruction_size + jump_code_size + 8;
59+
size < ic_stub_code_size(); size += NativeInstruction::instruction_size) {
60+
__ nop();
61+
}
5762
__ bind(l);
63+
assert((uintptr_t)__ pc() % wordSize == 0, "");
5864
__ emit_int64((int64_t)cached_value);
5965
// Only need to invalidate the 1st two instructions - not the whole ic stub
6066
ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

+20-5
Original file line numberDiff line numberDiff line change
@@ -382,14 +382,27 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp,
382382
}
383383
}
384384

385+
static inline bool target_needs_far_branch(address addr) {
386+
// codecache size <= 128M
387+
if (!MacroAssembler::far_branches()) {
388+
return false;
389+
}
390+
// codecache size > 240M
391+
if (MacroAssembler::codestub_branch_needs_far_jump()) {
392+
return true;
393+
}
394+
// codecache size: 128M..240M
395+
return !CodeCache::is_non_nmethod(addr);
396+
}
397+
385398
void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
386399
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
387400
assert(CodeCache::find_blob(entry.target()) != NULL,
388401
"destination of far call not found in code cache");
389-
if (far_branches()) {
402+
if (target_needs_far_branch(entry.target())) {
390403
uint64_t offset;
391404
// We can use ADRP here because we know that the total size of
392-
// the code cache cannot exceed 2Gb.
405+
// the code cache cannot exceed 2Gb (ADRP limit is 4GB).
393406
adrp(tmp, entry, offset);
394407
add(tmp, tmp, offset);
395408
if (cbuf) cbuf->set_insts_mark();
@@ -400,14 +413,15 @@ void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
400413
}
401414
}
402415

403-
void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
416+
int MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
404417
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
405418
assert(CodeCache::find_blob(entry.target()) != NULL,
406419
"destination of far call not found in code cache");
407-
if (far_branches()) {
420+
address start = pc();
421+
if (target_needs_far_branch(entry.target())) {
408422
uint64_t offset;
409423
// We can use ADRP here because we know that the total size of
410-
// the code cache cannot exceed 2Gb.
424+
// the code cache cannot exceed 2Gb (ADRP limit is 4GB).
411425
adrp(tmp, entry, offset);
412426
add(tmp, tmp, offset);
413427
if (cbuf) cbuf->set_insts_mark();
@@ -416,6 +430,7 @@ void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
416430
if (cbuf) cbuf->set_insts_mark();
417431
b(entry);
418432
}
433+
return pc() - start;
419434
}
420435

421436
void MacroAssembler::reserved_stack_check() {

src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp

+8-3
Original file line numberDiff line numberDiff line change
@@ -1087,13 +1087,18 @@ class MacroAssembler: public Assembler {
10871087
return ReservedCodeCacheSize > branch_range;
10881088
}
10891089

1090+
// Check if branches to the the non nmethod section require a far jump
1091+
static bool codestub_branch_needs_far_jump() {
1092+
return CodeCache::max_distance_to_non_nmethod() > branch_range;
1093+
}
1094+
10901095
// Jumps that can reach anywhere in the code cache.
10911096
// Trashes tmp.
10921097
void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
1093-
void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
1098+
int far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
10941099

1095-
static int far_branch_size() {
1096-
if (far_branches()) {
1100+
static int far_codestub_branch_size() {
1101+
if (codestub_branch_needs_far_jump()) {
10971102
return 3 * 4; // adrp, add, br
10981103
} else {
10991104
return 4;

src/hotspot/share/code/codeCache.cpp

+23-5
Original file line numberDiff line numberDiff line change
@@ -296,19 +296,20 @@ void CodeCache::initialize_heaps() {
296296
const size_t alignment = MAX2(page_size(false, 8), (size_t) os::vm_allocation_granularity());
297297
non_nmethod_size = align_up(non_nmethod_size, alignment);
298298
profiled_size = align_down(profiled_size, alignment);
299+
non_profiled_size = align_down(non_profiled_size, alignment);
299300

300301
// Reserve one continuous chunk of memory for CodeHeaps and split it into
301302
// parts for the individual heaps. The memory layout looks like this:
302303
// ---------- high -----------
303304
// Non-profiled nmethods
304-
// Profiled nmethods
305305
// Non-nmethods
306+
// Profiled nmethods
306307
// ---------- low ------------
307308
ReservedCodeSpace rs = reserve_heap_memory(cache_size);
308-
ReservedSpace non_method_space = rs.first_part(non_nmethod_size);
309-
ReservedSpace rest = rs.last_part(non_nmethod_size);
310-
ReservedSpace profiled_space = rest.first_part(profiled_size);
311-
ReservedSpace non_profiled_space = rest.last_part(profiled_size);
309+
ReservedSpace profiled_space = rs.first_part(profiled_size);
310+
ReservedSpace rest = rs.last_part(profiled_size);
311+
ReservedSpace non_method_space = rest.first_part(non_nmethod_size);
312+
ReservedSpace non_profiled_space = rest.last_part(non_nmethod_size);
312313

313314
// Non-nmethods (stubs, adapters, ...)
314315
add_heap(non_method_space, "CodeHeap 'non-nmethods'", CodeBlobType::NonNMethod);
@@ -898,6 +899,23 @@ size_t CodeCache::max_capacity() {
898899
return max_cap;
899900
}
900901

902+
bool CodeCache::is_non_nmethod(address addr) {
903+
CodeHeap* blob = get_code_heap(CodeBlobType::NonNMethod);
904+
return blob->contains(addr);
905+
}
906+
907+
size_t CodeCache::max_distance_to_non_nmethod() {
908+
if (!SegmentedCodeCache) {
909+
return ReservedCodeCacheSize;
910+
} else {
911+
CodeHeap* blob = get_code_heap(CodeBlobType::NonNMethod);
912+
// the max distance is minimized by placing the NonNMethod segment
913+
// in between MethodProfiled and MethodNonProfiled segments
914+
size_t dist1 = (size_t)blob->high() - (size_t)_low_bound;
915+
size_t dist2 = (size_t)_high_bound - (size_t)blob->low();
916+
return dist1 > dist2 ? dist1 : dist2;
917+
}
918+
}
901919

902920
// Returns the reverse free ratio. E.g., if 25% (1/4) of the code cache
903921
// is free, reverse_free_ratio() returns 4.

src/hotspot/share/code/codeCache.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,9 @@ class CodeCache : AllStatic {
213213

214214
static double reverse_free_ratio();
215215

216+
static size_t max_distance_to_non_nmethod();
217+
static bool is_non_nmethod(address addr);
218+
216219
static void clear_inline_caches(); // clear all inline caches
217220
static void cleanup_inline_caches(); // clean unloaded/zombie nmethods from inline caches
218221

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
* Copyright (c) 2022, BELLSOFT. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
package compiler.c2.aarch64;
24+
25+
import jdk.test.lib.process.OutputAnalyzer;
26+
import jdk.test.lib.process.ProcessTools;
27+
import java.util.regex.Matcher;
28+
import java.util.regex.Pattern;
29+
import java.util.*;
30+
31+
/*
32+
* @test
33+
* @bug 8280872
34+
* @summary Far call to runtime stub should be generated with single instruction for CodeHeap up to 250MB
35+
* @library /test/lib /
36+
*
37+
* @requires vm.flagless
38+
* @requires os.arch=="aarch64"
39+
* @requires vm.debug == false
40+
* @requires vm.compiler2.enabled
41+
*
42+
* @run driver compiler.c2.aarch64.TestFarJump
43+
*/
44+
public class TestFarJump {
45+
46+
// ADRP instruction encoding:
47+
// |31 30 29 28|27 26 25 24|23 22 21 20|19 18 17 16|15 14 13 12|11 10 09 08|07 06 05 04|03 02 01 10|
48+
// | 1|immlo| 1 0 0 0 0| immhi | Rd |
49+
static boolean isADRP(int encoding) {
50+
final int mask = 0b1001_1111;
51+
final int val = 0b1001_0000;
52+
return ((encoding >> 24) & mask) == val;
53+
}
54+
55+
// Looking for adrp instruction in binary/text assembly output:
56+
// 0x0000ffff7ff1b7d0: c8ff ffd0 | 0801 1091 | 0001 1fd6
57+
// 0x0000ffff6bf20ee0: adrp x8, 0x0000ffff6bef1000
58+
static boolean containsADRP(String input) {
59+
int index = input.indexOf(": ");
60+
if (index == -1) {
61+
return false;
62+
}
63+
input = input.substring(index + 1);
64+
if (input.contains("adrp")) {
65+
return true;
66+
}
67+
Pattern pattern = Pattern.compile("[0-9a-f ]*");
68+
Matcher matcher = pattern.matcher(input);
69+
while (matcher.find()) {
70+
String match = matcher.group();
71+
match = match.replace(" " , "");
72+
if (match.length() != 8) {
73+
continue;
74+
}
75+
int dump = (int)Long.parseLong(match, 16);
76+
int encoding = Integer.reverseBytes(dump);
77+
if (isADRP(encoding)) {
78+
return true;
79+
}
80+
}
81+
return false;
82+
}
83+
84+
static void runVM(boolean bigCodeHeap) throws Exception {
85+
String className = TestFarJump.class.getName();
86+
String[] procArgs = {
87+
"-XX:-Inline",
88+
"-Xcomp",
89+
"-Xbatch",
90+
"-XX:+TieredCompilation",
91+
"-XX:+SegmentedCodeCache",
92+
"-XX:CompileOnly=" + className + "::main",
93+
"-XX:ReservedCodeCacheSize=" + (bigCodeHeap ? "256M" : "200M"),
94+
"-XX:+UnlockDiagnosticVMOptions",
95+
"-XX:+PrintAssembly",
96+
className};
97+
98+
ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(procArgs);
99+
OutputAnalyzer output = new OutputAnalyzer(pb.start());
100+
List<String> lines = output.asLines();
101+
102+
ListIterator<String> itr = lines.listIterator();
103+
while (itr.hasNext()) {
104+
String line = itr.next();
105+
if (line.contains("[Exception Handler]")) {
106+
String next1 = itr.next();
107+
String next2 = itr.next();
108+
System.out.println(line);
109+
System.out.println(next1);
110+
System.out.println(next2);
111+
boolean containsADRP = containsADRP(next1) || containsADRP(next2);
112+
if (bigCodeHeap && !containsADRP) {
113+
throw new RuntimeException("ADRP instruction is expected on far jump");
114+
}
115+
if (!bigCodeHeap && containsADRP) {
116+
throw new RuntimeException("for CodeHeap < 250MB the far jump is expected to be encoded with a single branch instruction");
117+
}
118+
return;
119+
}
120+
}
121+
throw new RuntimeException("Assembly output: exception Handler is not found");
122+
}
123+
124+
public static void main(String[] args) throws Exception {
125+
if (args.length == 0) {
126+
// Main VM: fork VM with options
127+
runVM(true);
128+
runVM(false);
129+
return;
130+
}
131+
if (args.length > 0) {
132+
// We are in a forked VM. Just exit
133+
System.out.println("Ok");
134+
}
135+
}
136+
}
137+

0 commit comments

Comments
 (0)