@@ -1113,8 +1113,75 @@ class MacroAssembler: public Assembler {
11131113#endif
11141114
11151115public:
1116- // Calls
1117-
1116+ // AArch64 OpenJDK uses four different types of calls:
1117+ // - direct call: bl pc_relative_offset
1118+ // This is the shortest and the fastest, but the offset has the range:
1119+ // +/-128MB for the release build, +/-2MB for the debug build.
1120+ //
1121+ // - far call: adrp reg, pc_relative_offset; add; bl reg
1122+ // This is longer than a direct call. The offset has
1123+ // the range +/-4GB. As the code cache size is limited to 4GB,
1124+ // far calls can reach anywhere in the code cache. If a jump is
1125+ // needed rather than a call, a far jump 'b reg' can be used instead.
1126+ // All instructions are embedded at a call site.
1127+ //
1128+ // - trampoline call:
1129+ // This is only available in C1/C2-generated code (nmethod). It is a combination
1130+ // of a direct call, which is used if the destination of a call is in range,
1131+ // and a register-indirect call. It has the advantages of reaching anywhere in
1132+ // the AArch64 address space and being patchable at runtime when the generated
1133+ // code is being executed by other threads.
1134+ //
1135+ // [Main code section]
1136+ // bl trampoline
1137+ // [Stub code section]
1138+ // trampoline:
1139+ // ldr reg, pc + 8
1140+ // br reg
1141+ // <64-bit destination address>
1142+ //
1143+ // If the destination is in range when the generated code is moved to the code
1144+ // cache, 'bl trampoline' is replaced with 'bl destination' and the trampoline
1145+ // is not used.
1146+ // The optimization does not remove the trampoline from the stub section.
1147+ // This is necessary because the trampoline may well be redirected later when
1148+ // code is patched, and the new destination may not be reachable by a simple BR
1149+ // instruction.
1150+ //
1151+ // - indirect call: move reg, address; blr reg
1152+ // This too can reach anywhere in the address space, but it cannot be
1153+ // patched while code is running, so it must only be modified at a safepoint.
1154+ // This form of call is most suitable for targets at fixed addresses, which
1155+ // will never be patched.
1156+ //
1157+ // The patching we do conforms to the "Concurrent modification and
1158+ // execution of instructions" section of the Arm Architectural
1159+ // Reference Manual, which only allows B, BL, BRK, HVC, ISB, NOP, SMC,
1160+ // or SVC instructions to be modified while another thread is
1161+ // executing them.
1162+ //
1163+ // To patch a trampoline call when the BL can't reach, we first modify
1164+ // the 64-bit destination address in the trampoline, then modify the
1165+ // BL to point to the trampoline, then flush the instruction cache to
1166+ // broadcast the change to all executing threads. See
1167+ // NativeCall::set_destination_mt_safe for the details.
1168+ //
1169+ // There is a benign race in that the other thread might observe the
1170+ // modified BL before it observes the modified 64-bit destination
1171+ // address. That does not matter because the destination method has been
1172+ // invalidated, so there will be a trap at its start.
1173+ // For this to work, the destination address in the trampoline is
1174+ // always updated, even if we're not using the trampoline.
1175+
1176+ // Emit a direct call if the entry address will always be in range,
1177+ // otherwise a trampoline call.
1178+ // Supported entry.rspec():
1179+ // - relocInfo::runtime_call_type
1180+ // - relocInfo::opt_virtual_call_type
1181+ // - relocInfo::static_call_type
1182+ // - relocInfo::virtual_call_type
1183+ //
1184+ // Return: NULL if CodeCache is full.
11181185 address trampoline_call (Address entry, CodeBuffer* cbuf = NULL ) { return trampoline_call1 (entry, cbuf, true ); }
11191186 address trampoline_call1 (Address entry, CodeBuffer* cbuf, bool check_emit_size = true );
11201187
@@ -1127,19 +1194,17 @@ class MacroAssembler: public Assembler {
11271194 return CodeCache::max_distance_to_non_nmethod () > branch_range;
11281195 }
11291196
1130- // Far_call and far_jump generate a call of/jump to the provided address.
1197+ // Emit a direct call/jump if the entry address will always be in range,
1198+ // otherwise a far call/jump.
11311199 // The address must be inside the code cache.
11321200 // Supported entry.rspec():
11331201 // - relocInfo::external_word_type
11341202 // - relocInfo::runtime_call_type
11351203 // - relocInfo::none
1136- // If the distance to the address can exceed the branch range
1137- // (128M for the release build, 2M for the debug build; see branch_range definition)
1138- // for direct calls(BL)/jumps(B), a call(BLR)/jump(BR) with the address put in
1139- // the tmp register is generated. Instructions putting the address in the tmp register
1140- // are embedded at a call site. The tmp register is invalidated.
1141- // This differs from trampoline_call which puts additional code (trampoline) including
1142- // BR into the stub code section and a BL to the trampoline at a call site.
1204+ // In the case of a far call/jump, the entry address is put in the tmp register.
1205+ // The tmp register is invalidated.
1206+ //
1207+ // Far_jump returns the amount of the emitted code.
11431208 void far_call (Address entry, CodeBuffer *cbuf = NULL , Register tmp = rscratch1);
11441209 int far_jump (Address entry, CodeBuffer *cbuf = NULL , Register tmp = rscratch1);
11451210
0 commit comments