@@ -265,19 +265,27 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slo
265265 }
266266 case NMethodPatchingType::conc_instruction_and_data_patch:
267267 {
268- // If we patch code we need both a code patching and a loadload
268+ // If we patch code we need both a cmodx fence and a loadload
269269 // fence. It's not super cheap, so we use a global epoch mechanism
270270 // to hide them in a slow path.
271271 // The high level idea of the global epoch mechanism is to detect
272272 // when any thread has performed the required fencing, after the
273273 // last nmethod was disarmed. This implies that the required
274274 // fencing has been performed for all preceding nmethod disarms
275275 // as well. Therefore, we do not need any further fencing.
276+
276277 __ la (t1, ExternalAddress ((address)&_patching_epoch));
277- // Embed an artificial data dependency to order the guard load
278- // before the epoch load.
279- __ srli (ra, t0, 32 );
280- __ orr (t1, t1, ra);
278+ if (!UseZtso) {
279+ // Embed a synthetic data dependency between the load of the guard and
280+ // the load of the epoch. This guarantees that these loads occur in
281+ // order, while allowing other independent instructions to be reordered.
282+ // Note: This may be slower than using a membar(load|load) (fence r,r).
283+ // Because processors will not start the second load until the first comes back.
284+ // This means you can’t overlap the two loads,
285+ // which is stronger than needed for ordering (stronger than TSO).
286+ __ srli (ra, t0, 32 );
287+ __ orr (t1, t1, ra);
288+ }
281289 // Read the global epoch value.
282290 __ lwu (t1, t1);
283291 // Combine the guard value (low order) with the epoch value (high order).
0 commit comments