@@ -265,19 +265,27 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slo
265
265
}
266
266
case NMethodPatchingType::conc_instruction_and_data_patch:
267
267
{
268
- // If we patch code we need both a code patching and a loadload
268
+ // If we patch code we need both a cmodx fence and a loadload
269
269
// fence. It's not super cheap, so we use a global epoch mechanism
270
270
// to hide them in a slow path.
271
271
// The high level idea of the global epoch mechanism is to detect
272
272
// when any thread has performed the required fencing, after the
273
273
// last nmethod was disarmed. This implies that the required
274
274
// fencing has been performed for all preceding nmethod disarms
275
275
// as well. Therefore, we do not need any further fencing.
276
+
276
277
__ la (t1, ExternalAddress ((address)&_patching_epoch));
277
- // Embed an artificial data dependency to order the guard load
278
- // before the epoch load.
279
- __ srli (ra, t0, 32 );
280
- __ orr (t1, t1, ra);
278
+ if (!UseZtso) {
279
+ // Embed a synthetic data dependency between the load of the guard and
280
+ // the load of the epoch. This guarantees that these loads occur in
281
+ // order, while allowing other independent instructions to be reordered.
282
+ // Note: This may be slower than using a membar(load|load) (fence r,r).
283
+ // Because processors will not start the second load until the first comes back.
284
+ // This means you can’t overlap the two loads,
285
+ // which is stronger than needed for ordering (stronger than TSO).
286
+ __ srli (ra, t0, 32 );
287
+ __ orr (t1, t1, ra);
288
+ }
281
289
// Read the global epoch value.
282
290
__ lwu (t1, t1);
283
291
// Combine the guard value (low order) with the epoch value (high order).
0 commit comments