diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
index 36bd30883cf..b95d962db66 100644
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
@@ -283,8 +283,7 @@ void LIR_Assembler::osr_entry() {
 
   // build frame
   ciMethod* m = compilation()->method();
-  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes(),
-                 needs_stack_repair(), NULL);
+  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
 
   // OSR buffer is
   //
@@ -481,7 +480,8 @@ int LIR_Assembler::emit_unwind_handler() {
   }
 
   // remove the activation and dispatch to the unwind handler
-  __ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair());
+  int initial_framesize = initial_frame_size_in_bytes();
+  __ remove_frame(initial_framesize, needs_stack_repair(), initial_framesize - wordSize);
   __ jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
 
   // Emit the slow path assembly
@@ -547,7 +547,8 @@ void LIR_Assembler::return_op(LIR_Opr result) {
   }
 
   // Pop the stack before the safepoint code
-  __ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair());
+  int initial_framesize = initial_frame_size_in_bytes();
+  __ remove_frame(initial_framesize, needs_stack_repair(), initial_framesize - wordSize);
 
   if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) {
     __ reserved_stack_check();
@@ -4269,6 +4270,9 @@ void LIR_Assembler::get_thread(LIR_Opr result_reg) {
 #endif // _LP64
 }
 
+void LIR_Assembler::check_orig_pc() {
+  __ cmpptr(frame_map()->address_for_orig_pc_addr(), (int32_t)NULL_WORD);
+}
 
 void LIR_Assembler::peephole(LIR_List*) {
   // do nothing for now
diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp
index 8fc2faca737..3c29361670e 100644
--- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp
@@ -318,18 +318,21 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
 }
 
 
-void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, bool needs_stack_repair, Label* verified_value_entry_label) {
-  assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
+void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, bool needs_stack_repair, bool has_scalarized_args, Label* verified_value_entry_label) {
+  if (has_scalarized_args) {
+    // Initialize orig_pc to detect deoptimization during buffering in the entry points
+    movptr(Address(rsp, sp_offset_for_orig_pc - frame_size_in_bytes - wordSize), 0);
+  }
+  if (!needs_stack_repair && verified_value_entry_label != NULL) {
+    bind(*verified_value_entry_label);
+  }
   // Make sure there is enough stack space for this method's activation.
   // Note that we do this before doing an enter(). This matches the
   // ordering of C2's stack overflow check / rsp decrement and allows
   // the SharedRuntime stack overflow handling to be consistent
   // between the two compilers.
+  assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
   generate_stack_overflow_check(bang_size_in_bytes);
-
-  if (!needs_stack_repair && verified_value_entry_label != NULL) {
-    bind(*verified_value_entry_label);
-  }
   push(rbp);
   if (PreserveFramePointer) {
     mov(rbp, rsp);
@@ -342,9 +345,8 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by
 #endif // !_LP64 && TIERED
   decrement(rsp, frame_size_in_bytes); // does not emit code for frame_size == 0
   if (needs_stack_repair) {
-    int real_frame_size =  frame_size_in_bytes
-           + wordSize     // skip over pushed rbp
-           + wordSize;    // skip over RA pushed by caller
+    // Save stack increment (also account for rbp)
+    int real_frame_size = frame_size_in_bytes + wordSize;
     movptr(Address(rsp, frame_size_in_bytes - wordSize), real_frame_size);
     if (verified_value_entry_label != NULL) {
       bind(*verified_value_entry_label);
@@ -355,21 +357,7 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by
   bs->nmethod_entry_barrier(this);
 }
 
-
-void C1_MacroAssembler::remove_frame(int frame_size_in_bytes, bool needs_stack_repair) {
-  if (!needs_stack_repair) {
-    increment(rsp, frame_size_in_bytes);  // Does not emit code for frame_size == 0
-    pop(rbp);
-  } else {
-    movq(r13, Address(rsp, frame_size_in_bytes + wordSize)); // return address
-    movq(rbp, Address(rsp, frame_size_in_bytes));
-    addq(rsp, Address(rsp, frame_size_in_bytes - wordSize)); // now we are back to caller frame, without the outgoing returned address
-    push(r13);                  // restore the returned address, as pushed by caller
-  }
-}
-
-
-void C1_MacroAssembler::verified_value_entry() {
+void C1_MacroAssembler::verified_entry() {
   if (C1Breakpoint || VerifyFPU || !UseStackBanging) {
     // Verified Entry first instruction should be 5 bytes long for correct
     // patching by patch_verified_entry().
@@ -385,22 +373,13 @@ void C1_MacroAssembler::verified_value_entry() {
   IA32_ONLY( verify_FPU(0, "method_entry"); )
 }
 
-int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature *ces, int frame_size_in_bytes, int bang_size_in_bytes, Label& verified_value_entry_label, bool is_value_ro_entry) {
-  if (C1Breakpoint || VerifyFPU || !UseStackBanging) {
-    // Verified Entry first instruction should be 5 bytes long for correct
-    // patching by patch_verified_entry().
-    //
-    // C1Breakpoint and VerifyFPU have one byte first instruction.
-    // Also first instruction will be one byte "push(rbp)" if stack banging
-    // code is not generated (see build_frame() above).
-    // For all these cases generate long instruction first.
-    fat_nop();
-  }
-  if (C1Breakpoint)int3();
-  IA32_ONLY( verify_FPU(0, "method_entry"); )
-
+int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature *ces, int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, Label& verified_value_entry_label, bool is_value_ro_entry) {
   assert(ValueTypePassFieldsAsArgs, "sanity");
-  GrowableArray<SigEntry>* sig   = &ces->sig();
+  // Make sure there is enough stack space for this method's activation.
+  assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
+  generate_stack_overflow_check(bang_size_in_bytes);
+
+  GrowableArray<SigEntry>* sig    = &ces->sig();
   GrowableArray<SigEntry>* sig_cc = is_value_ro_entry ? &ces->sig_cc_ro() : &ces->sig_cc();
   VMRegPair* regs      = ces->regs();
   VMRegPair* regs_cc   = is_value_ro_entry ? ces->regs_cc_ro() : ces->regs_cc();
@@ -411,32 +390,36 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature *ces, int f
   BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, sig_cc->length());
   int args_passed = sig->length();
   int args_passed_cc = SigEntry::fill_sig_bt(sig_cc, sig_bt);
-
   int extra_stack_offset = wordSize; // tos is return address.
 
-  // Create a temp frame so we can call into runtime. It must be properly set up to accommodate GC.
-  int sp_inc = (args_on_stack - args_on_stack_cc) * VMRegImpl::stack_slot_size;
-  if (sp_inc > 0) {
-    pop(r13);
+  // Check if we need to extend the stack for packing
+  int sp_inc = 0;
+  if (args_on_stack > args_on_stack_cc) {
+    // Two additional slots to account for return address
+    sp_inc = (args_on_stack + 2) * VMRegImpl::stack_slot_size;
     sp_inc = align_up(sp_inc, StackAlignmentInBytes);
+    pop(r13); // Copy return address
     subptr(rsp, sp_inc);
     push(r13);
-  } else {
-    sp_inc = 0;
   }
+
+  // Create a temp frame so we can call into the runtime. It must be properly set up to accommodate GC.
   push(rbp);
   if (PreserveFramePointer) {
     mov(rbp, rsp);
   }
   subptr(rsp, frame_size_in_bytes);
-  if (sp_inc > 0) {
-    int real_frame_size = frame_size_in_bytes +
-           + wordSize  // pushed rbp
-           + wordSize  // returned address pushed by the stack extension code
-           + sp_inc;   // stack extension
+
+  if (ces->c1_needs_stack_repair()) {
+    // Save stack increment (also account for fixed framesize and rbp)
+    assert((sp_inc & (StackAlignmentInBytes-1)) == 0, "stack increment not aligned");
+    int real_frame_size = sp_inc + frame_size_in_bytes + wordSize;
     movptr(Address(rsp, frame_size_in_bytes - wordSize), real_frame_size);
   }
 
+  // Initialize orig_pc to detect deoptimization during buffering in below runtime call
+  movptr(Address(rsp, sp_offset_for_orig_pc), 0);
+
   // FIXME -- call runtime only if we cannot in-line allocate all the incoming value args.
   movptr(rbx, (intptr_t)(ces->method()));
   if (is_value_ro_entry) {
@@ -450,16 +433,13 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature *ces, int f
   addptr(rsp, frame_size_in_bytes);
   pop(rbp);
 
-  int n = shuffle_value_args(true, is_value_ro_entry, extra_stack_offset, sig_bt, sig_cc,
-                             args_passed_cc, args_on_stack_cc, regs_cc, // from
-                             args_passed, args_on_stack, regs);         // to
-  assert(sp_inc == n, "must be");
+  shuffle_value_args(true, is_value_ro_entry, extra_stack_offset, sig_bt, sig_cc,
+                     args_passed_cc, args_on_stack_cc, regs_cc, // from
+                     args_passed, args_on_stack, regs, sp_inc); // to
 
-  if (sp_inc != 0) {
-    // Do the stack banging here, and skip over the stack repair code in the
+  if (ces->c1_needs_stack_repair()) {
+    // Skip over the stack banging and frame setup code in the
     // verified_value_entry (which has a different real_frame_size).
-    assert(sp_inc > 0, "stack should not shrink");
-    generate_stack_overflow_check(bang_size_in_bytes);
     push(rbp);
     if (PreserveFramePointer) {
       mov(rbp, rsp);
diff --git a/src/hotspot/cpu/x86/frame_x86.cpp b/src/hotspot/cpu/x86/frame_x86.cpp
index 10b5b92ff77..b594edd52ae 100644
--- a/src/hotspot/cpu/x86/frame_x86.cpp
+++ b/src/hotspot/cpu/x86/frame_x86.cpp
@@ -451,7 +451,7 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
   // It is only an FP if the sender is an interpreter frame (or C1?).
   intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);
 
-  // Repair the sender sp if this is a method with scalarized value type args
+  // Repair the sender sp if the frame has been extended
   sender_sp = repair_sender_sp(sender_sp, saved_fp_addr);
 
   // On Intel the return_address is always the word on the stack
@@ -472,22 +472,24 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
     // For C1, the runtime stub might not have oop maps, so set this flag
     // outside of update_register_map.
     bool caller_args = _cb->caller_must_gc_arguments(map->thread());
+#ifdef COMPILER1
     if (!caller_args) {
       nmethod* nm = _cb->as_nmethod_or_null();
-      if (nm != NULL && nm->is_compiled_by_c1() &&
-          nm->method()->has_scalarized_args() &&
+      if (nm != NULL && nm->is_compiled_by_c1() && nm->method()->has_scalarized_args() &&
           pc() < nm->verified_value_entry_point()) {
         // The VEP and VVEP(RO) of C1-compiled methods call buffer_value_args_xxx
         // before doing any argument shuffling, so we need to scan the oops
         // as the caller passes them.
+        caller_args = true;
+#ifdef ASSERT
         NativeCall* call = nativeCall_before(pc());
         address dest = call->destination();
-        if (dest == Runtime1::entry_for(Runtime1::buffer_value_args_no_receiver_id) ||
-            dest == Runtime1::entry_for(Runtime1::buffer_value_args_id)) {
-          caller_args = true;
-        }
+        assert(dest == Runtime1::entry_for(Runtime1::buffer_value_args_no_receiver_id) ||
+               dest == Runtime1::entry_for(Runtime1::buffer_value_args_id), "unexpected safepoint in entry point");
+#endif
       }
     }
+#endif
     map->set_include_argument_oops(caller_args);
     if (_cb->oop_maps() != NULL) {
       OopMapSet::update_register_map(this, map);
@@ -712,11 +714,7 @@ intptr_t* frame::repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr)
     // The stack increment resides just below the saved rbp on the stack
     // and does not account for the return address.
     intptr_t* real_frame_size_addr = (intptr_t*) (saved_fp_addr - 1);
-    int real_frame_size = (*real_frame_size_addr) / wordSize;
-    if (!cm->is_compiled_by_c1()) {
-      // Add size of return address (C1 already includes the RA size)
-      real_frame_size += 1;
-    }
+    int real_frame_size = ((*real_frame_size_addr) + wordSize) / wordSize;
     assert(real_frame_size >= _cb->frame_size(), "invalid frame size");
     sender_sp = unextended_sp() + real_frame_size;
   }
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
index 3b6482b3d81..57c756b5c61 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@@ -5515,26 +5515,18 @@ void MacroAssembler::unpack_value_args(Compile* C, bool receiver_only) {
   verified_entry(C, sp_inc);
 }
 
-int MacroAssembler::shuffle_value_args(bool is_packing, bool receiver_only, int extra_stack_offset,
-                                       BasicType* sig_bt, const GrowableArray<SigEntry>* sig_cc,
-                                       int args_passed, int args_on_stack, VMRegPair* regs,            // from
-                                       int args_passed_to, int args_on_stack_to, VMRegPair* regs_to) { // to
+void MacroAssembler::shuffle_value_args(bool is_packing, bool receiver_only, int extra_stack_offset,
+                                        BasicType* sig_bt, const GrowableArray<SigEntry>* sig_cc,
+                                        int args_passed, int args_on_stack, VMRegPair* regs,
+                                        int args_passed_to, int args_on_stack_to, VMRegPair* regs_to, int sp_inc) {
   // Check if we need to extend the stack for packing/unpacking
-  int sp_inc = (args_on_stack_to - args_on_stack) * VMRegImpl::stack_slot_size;
-  if (sp_inc > 0) {
-    sp_inc = align_up(sp_inc, StackAlignmentInBytes);
-    if (!is_packing) {
-      // Save the return address, adjust the stack (make sure it is properly
-      // 16-byte aligned) and copy the return address to the new top of the stack.
-      // (Note: C1 does this in C1_MacroAssembler::scalarized_entry).
-      pop(r13);
-      subptr(rsp, sp_inc);
-      push(r13);
-    }
-  } else {
-    // The scalarized calling convention needs less stack space than the unscalarized one.
-    // No need to extend the stack, the caller will take care of these adjustments.
-    sp_inc = 0;
+  if (sp_inc > 0 && !is_packing) {
+    // Save the return address, adjust the stack (make sure it is properly
+    // 16-byte aligned) and copy the return address to the new top of the stack.
+    // (Note: C1 does this in C1_MacroAssembler::scalarized_entry).
+    pop(r13);
+    subptr(rsp, sp_inc);
+    push(r13);
   }
 
   int ret_off; // make sure we don't overwrite the return address
@@ -5547,31 +5539,25 @@ int MacroAssembler::shuffle_value_args(bool is_packing, bool receiver_only, int
     ret_off = sp_inc;
   }
 
-  return shuffle_value_args_common(is_packing, receiver_only, extra_stack_offset,
-                                   sig_bt, sig_cc,
-                                   args_passed, args_on_stack, regs,
-                                   args_passed_to, args_on_stack_to, regs_to,
-                                   sp_inc, ret_off);
+  shuffle_value_args_common(is_packing, receiver_only, extra_stack_offset,
+                            sig_bt, sig_cc,
+                            args_passed, args_on_stack, regs,
+                            args_passed_to, args_on_stack_to, regs_to,
+                            sp_inc, ret_off);
 }
 
 VMReg MacroAssembler::spill_reg_for(VMReg reg) {
   return reg->is_XMMRegister() ? xmm8->as_VMReg() : r14->as_VMReg();
 }
 
-// Restores the stack on return
-void MacroAssembler::restore_stack(Compile* C) {
-  int framesize = C->output()->frame_size_in_bytes();
-  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
-  // Remove word for return addr already pushed and RBP
-  framesize -= 2*wordSize;
-
-  if (C->needs_stack_repair()) {
-    // Restore rbp and repair rsp by adding the stack increment
-    movq(rbp, Address(rsp, framesize));
-    addq(rsp, Address(rsp, C->output()->sp_inc_offset()));
+void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair, int sp_inc_offset) {
+  assert((initial_framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+  if (needs_stack_repair) {
+    movq(rbp, Address(rsp, initial_framesize));
+    addq(rsp, Address(rsp, sp_inc_offset));
   } else {
-    if (framesize > 0) {
-      addq(rsp, framesize);
+    if (initial_framesize > 0) {
+      addq(rsp, initial_framesize);
     }
     pop(rbp);
   }
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
index f4689842fa4..7a2145fed05 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -1663,12 +1663,12 @@ class MacroAssembler: public Assembler {
   bool pack_value_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index,
                          VMReg to, VMRegPair* regs_from, int regs_from_count, int& from_index, RegState reg_state[],
                          int ret_off, int extra_stack_offset);
-  void restore_stack(Compile* C);
+  void remove_frame(int initial_framesize, bool needs_stack_repair, int sp_inc_offset);
 
-  int shuffle_value_args(bool is_packing, bool receiver_only, int extra_stack_offset,
-                         BasicType* sig_bt, const GrowableArray<SigEntry>* sig_cc,
-                         int args_passed, int args_on_stack, VMRegPair* regs,
-                         int args_passed_to, int args_on_stack_to, VMRegPair* regs_to);
+  void shuffle_value_args(bool is_packing, bool receiver_only, int extra_stack_offset,
+                          BasicType* sig_bt, const GrowableArray<SigEntry>* sig_cc,
+                          int args_passed, int args_on_stack, VMRegPair* regs,
+                          int args_passed_to, int args_on_stack_to, VMRegPair* regs_to, int sp_inc);
   bool shuffle_value_args_spill(bool is_packing,  const GrowableArray<SigEntry>* sig_cc, int sig_cc_index,
                                 VMRegPair* regs_from, int from_index, int regs_from_count,
                                 RegState* reg_state, int sp_inc, int extra_stack_offset);
diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
index 416133d7555..eedf4cf9e2b 100644
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@@ -963,7 +963,9 @@ void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
     __ vzeroupper();
   }
 
-  __ restore_stack(C);
+  // Subtract two words to account for return address and rbp
+  int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
+  __ remove_frame(initial_framesize, C->needs_stack_repair(), C->output()->sp_inc_offset());
 
   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
     __ reserved_stack_check();
diff --git a/src/hotspot/share/asm/macroAssembler_common.cpp b/src/hotspot/share/asm/macroAssembler_common.cpp
index 054e4858e9e..6e08bb373ba 100644
--- a/src/hotspot/share/asm/macroAssembler_common.cpp
+++ b/src/hotspot/share/asm/macroAssembler_common.cpp
@@ -166,19 +166,24 @@ int MacroAssembler::unpack_value_args_common(Compile* C, bool receiver_only) {
   int args_passed_cc = SigEntry::fill_sig_bt(sig_cc, sig_bt);
   VMRegPair* regs_cc = NEW_RESOURCE_ARRAY(VMRegPair, sig_cc->length());
   int args_on_stack_cc = SharedRuntime::java_calling_convention(sig_bt, regs_cc, args_passed_cc, false);
-
   int extra_stack_offset = wordSize; // stack has the returned address
-  int sp_inc = shuffle_value_args(false, receiver_only, extra_stack_offset, sig_bt, sig_cc,
-                                  args_passed, args_on_stack, regs,
-                                  args_passed_cc, args_on_stack_cc, regs_cc);
+  // Compute stack increment
+  int sp_inc = 0;
+  if (args_on_stack_cc > args_on_stack) {
+    sp_inc = (args_on_stack_cc - args_on_stack) * VMRegImpl::stack_slot_size;
+    sp_inc = align_up(sp_inc, StackAlignmentInBytes);
+  }
+  shuffle_value_args(false, receiver_only, extra_stack_offset, sig_bt, sig_cc,
+                     args_passed, args_on_stack, regs,
+                     args_passed_cc, args_on_stack_cc, regs_cc, sp_inc);
   return sp_inc;
 }
 
-int MacroAssembler::shuffle_value_args_common(bool is_packing, bool receiver_only, int extra_stack_offset,
-                                              BasicType* sig_bt, const GrowableArray<SigEntry>* sig_cc,
-                                              int args_passed, int args_on_stack, VMRegPair* regs,            // from
-                                              int args_passed_to, int args_on_stack_to, VMRegPair* regs_to,   // to
-                                              int sp_inc, int ret_off) {
+void MacroAssembler::shuffle_value_args_common(bool is_packing, bool receiver_only, int extra_stack_offset,
+                                               BasicType* sig_bt, const GrowableArray<SigEntry>* sig_cc,
+                                               int args_passed, int args_on_stack, VMRegPair* regs,
+                                               int args_passed_to, int args_on_stack_to, VMRegPair* regs_to,
+                                               int sp_inc, int ret_off) {
   int max_stack = MAX2(args_on_stack + sp_inc/VMRegImpl::stack_slot_size, args_on_stack_to);
   RegState* reg_state = init_reg_state(is_packing, sig_cc, regs, args_passed, sp_inc, max_stack);
 
@@ -236,7 +241,6 @@ int MacroAssembler::shuffle_value_args_common(bool is_packing, bool receiver_onl
     }
   }
   guarantee(done, "Could not resolve circular dependency when shuffling value type arguments");
-  return sp_inc;
 }
 
 bool MacroAssembler::shuffle_value_args_spill(bool is_packing, const GrowableArray<SigEntry>* sig_cc, int sig_cc_index,
diff --git a/src/hotspot/share/asm/macroAssembler_common.hpp b/src/hotspot/share/asm/macroAssembler_common.hpp
index 3e2c509ea31..16479a61a3b 100644
--- a/src/hotspot/share/asm/macroAssembler_common.hpp
+++ b/src/hotspot/share/asm/macroAssembler_common.hpp
@@ -40,11 +40,11 @@
                            VMRegPair* regs, int num_regs, int sp_inc, int max_stack);
 
   int unpack_value_args_common(Compile* C, bool receiver_only);
-  int shuffle_value_args_common(bool is_packing, bool receiver_only, int extra_stack_offset,
-                                BasicType* sig_bt, const GrowableArray<SigEntry>* sig_cc,
-                                int args_passed, int args_on_stack, VMRegPair* regs,            // from
-                                int args_passed_to, int args_on_stack_to, VMRegPair* regs_to,   // to
-                                int sp_inc, int ret_off);
+  void shuffle_value_args_common(bool is_packing, bool receiver_only, int extra_stack_offset,
+                                 BasicType* sig_bt, const GrowableArray<SigEntry>* sig_cc,
+                                 int args_passed, int args_on_stack, VMRegPair* regs,
+                                 int args_passed_to, int args_on_stack_to, VMRegPair* regs_to,
+                                 int sp_inc, int ret_off);
 
 // };
 
diff --git a/src/hotspot/share/c1/c1_FrameMap.hpp b/src/hotspot/share/c1/c1_FrameMap.hpp
index f0e52ed8824..8c81841f8cb 100644
--- a/src/hotspot/share/c1/c1_FrameMap.hpp
+++ b/src/hotspot/share/c1/c1_FrameMap.hpp
@@ -216,6 +216,9 @@ class FrameMap : public CompilationResourceObj {
   Address address_for_monitor_object(int monitor_index) const {
     return make_new_address(sp_offset_for_monitor_object(monitor_index));
   }
+  Address address_for_orig_pc_addr() const {
+    return make_new_address(sp_offset_for_monitor_base(_num_monitors));
+  }
 
   // Creates Location describing desired slot and returns it via pointer
   // to Location object. Returns true if the stack frame offset was legal
diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
index a2db5cb39e6..dc7888fd941 100644
--- a/src/hotspot/share/c1/c1_LIR.cpp
+++ b/src/hotspot/share/c1/c1_LIR.cpp
@@ -468,6 +468,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
     case lir_membar_storestore:        // result and info always invalid
     case lir_membar_loadstore:         // result and info always invalid
     case lir_membar_storeload:         // result and info always invalid
+    case lir_check_orig_pc:            // result and info always invalid
     case lir_on_spin_wait:
     {
       assert(op->as_Op0() != NULL, "must be");
@@ -1817,6 +1818,7 @@ const char * LIR_Op::name() const {
      case lir_fpop_raw:              s = "fpop_raw";      break;
      case lir_breakpoint:            s = "breakpoint";    break;
      case lir_get_thread:            s = "get_thread";    break;
+     case lir_check_orig_pc:         s = "check_orig_pc"; break;
      // LIR_Op1
      case lir_fxch:                  s = "fxch";          break;
      case lir_fld:                   s = "fld";           break;
diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
index 71728960528..8f0a588418b 100644
--- a/src/hotspot/share/c1/c1_LIR.hpp
+++ b/src/hotspot/share/c1/c1_LIR.hpp
@@ -906,6 +906,7 @@ enum LIR_Code {
       , lir_membar_storeload
       , lir_get_thread
       , lir_on_spin_wait
+      , lir_check_orig_pc
   , end_op0
   , begin_op1
       , lir_fxch
diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
index df18d1dbfc0..0d0d9f9bc9b 100644
--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
+++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
@@ -635,58 +635,55 @@ void LIR_Assembler::add_scalarized_entry_info(int pc_offset) {
 // (1)               (2)                 (3)                    (4)
 // UEP/UVEP:         VEP:                UEP:                   UEP:
 //   check_icache      pack receiver       check_icache           check_icache
-// VEP/VVEP/VVEP_RO  UEP/UVEP:           VEP/VVEP_RO:           VVEP_RO:
-//   body              check_icache        pack value args        pack value args (except receiver)
+// VEP/VVEP/VVEP_RO    jump to VVEP      VEP/VVEP_RO:           VVEP_RO:
+//   body            UEP/UVEP:             pack value args        pack value args (except receiver)
+//                     check_icache        jump to VVEP           jump to VVEP
 //                   VVEP/VVEP_RO        UVEP:                  VEP:
 //                     body                check_icache           pack all value args
-//                                       VVEP:                  UVEP:
-//                                         body                   check_icache
+//                                       VVEP:                    jump to VVEP
+//                                         body                 UVEP:
+//                                                                check_icache
 //                                                              VVEP:
 //                                                                body
-//
-// Note: after packing, we jump to the method body.
 void LIR_Assembler::emit_std_entries() {
   offsets()->set_value(CodeOffsets::OSR_Entry, _masm->offset());
 
-  const CompiledEntrySignature* ces = compilation()->compiled_entry_signature();
-
   _masm->align(CodeEntryAlignment);
-
+  const CompiledEntrySignature* ces = compilation()->compiled_entry_signature();
   if (ces->has_scalarized_args()) {
     assert(ValueTypePassFieldsAsArgs && method()->get_Method()->has_scalarized_args(), "must be");
-
     CodeOffsets::Entries ro_entry_type = ces->c1_value_ro_entry_type();
 
+    // UEP: check icache and fall-through
     if (ro_entry_type != CodeOffsets::Verified_Value_Entry) {
-      // This is the UEP. It will fall-through to VEP or VVEP(RO)
       offsets()->set_value(CodeOffsets::Entry, _masm->offset());
-      if (needs_icache(compilation()->method())) {
+      if (needs_icache(method())) {
         check_icache();
       }
     }
 
+    // VVEP_RO: pack all value parameters, except the receiver
     if (ro_entry_type == CodeOffsets::Verified_Value_Entry_RO) {
-      // VVEP(RO) = pack all value parameters, except the <this> object.
-      add_scalarized_entry_info(emit_std_entry(CodeOffsets::Verified_Value_Entry_RO, ces));
+      emit_std_entry(CodeOffsets::Verified_Value_Entry_RO, ces);
     }
 
-    // VEP = pack all value parameters
+    // VEP: pack all value parameters
     _masm->align(CodeEntryAlignment);
-    add_scalarized_entry_info(emit_std_entry(CodeOffsets::Verified_Entry, ces));
+    emit_std_entry(CodeOffsets::Verified_Entry, ces);
 
+    // UVEP: check icache and fall-through
     _masm->align(CodeEntryAlignment);
-    // This is the UVEP. It will fall-through to VVEP.
     offsets()->set_value(CodeOffsets::Value_Entry, _masm->offset());
     if (ro_entry_type == CodeOffsets::Verified_Value_Entry) {
       // Special case if we have VVEP == VVEP(RO):
       // this means UVEP (called by C1) == UEP (called by C2).
       offsets()->set_value(CodeOffsets::Entry, _masm->offset());
     }
-
-    if (needs_icache(compilation()->method())) {
+    if (needs_icache(method())) {
       check_icache();
     }
-    // VVEP = all value parameters are passed as refs - no packing.
+
+    // VVEP: all value parameters are passed as refs - no packing.
     emit_std_entry(CodeOffsets::Verified_Value_Entry, NULL);
 
     if (ro_entry_type != CodeOffsets::Verified_Value_Entry_RO) {
@@ -700,42 +697,44 @@ void LIR_Assembler::emit_std_entries() {
     // All 3 entries are the same (no value-type packing)
     offsets()->set_value(CodeOffsets::Entry, _masm->offset());
     offsets()->set_value(CodeOffsets::Value_Entry, _masm->offset());
-    if (needs_icache(compilation()->method())) {
+    if (needs_icache(method())) {
       check_icache();
     }
-    int offset = emit_std_entry(CodeOffsets::Verified_Value_Entry, NULL);
-    offsets()->set_value(CodeOffsets::Verified_Entry, offset);
-    offsets()->set_value(CodeOffsets::Verified_Value_Entry_RO, offset);
+    emit_std_entry(CodeOffsets::Verified_Value_Entry, NULL);
+    offsets()->set_value(CodeOffsets::Verified_Entry, offsets()->value(CodeOffsets::Verified_Value_Entry));
+    offsets()->set_value(CodeOffsets::Verified_Value_Entry_RO, offsets()->value(CodeOffsets::Verified_Value_Entry));
   }
 }
 
-int LIR_Assembler::emit_std_entry(CodeOffsets::Entries entry, const CompiledEntrySignature* ces) {
+void LIR_Assembler::emit_std_entry(CodeOffsets::Entries entry, const CompiledEntrySignature* ces) {
   offsets()->set_value(entry, _masm->offset());
-  int offset = _masm->offset();
+  _masm->verified_entry();
   switch (entry) {
-  case CodeOffsets::Verified_Entry:
-    offset = _masm->verified_entry(ces, initial_frame_size_in_bytes(), bang_size_in_bytes(), _verified_value_entry);
-    if (needs_clinit_barrier_on_entry(compilation()->method())) {
-      clinit_barrier(compilation()->method());
+  case CodeOffsets::Verified_Entry: {
+    if (needs_clinit_barrier_on_entry(method())) {
+      clinit_barrier(method());
     }
-    return offset;
-  case CodeOffsets::Verified_Value_Entry_RO:
-    offset = _masm->verified_value_ro_entry(ces, initial_frame_size_in_bytes(), bang_size_in_bytes(), _verified_value_entry);
-    if (needs_clinit_barrier_on_entry(compilation()->method())) {
-      clinit_barrier(compilation()->method());
+    int rt_call_offset = _masm->verified_entry(ces, initial_frame_size_in_bytes(), bang_size_in_bytes(), in_bytes(frame_map()->sp_offset_for_orig_pc()), _verified_value_entry);
+    add_scalarized_entry_info(rt_call_offset);
+    break;
+  }
+  case CodeOffsets::Verified_Value_Entry_RO: {
+    assert(!needs_clinit_barrier_on_entry(method()), "can't be static");
+    int rt_call_offset = _masm->verified_value_ro_entry(ces, initial_frame_size_in_bytes(), bang_size_in_bytes(), in_bytes(frame_map()->sp_offset_for_orig_pc()), _verified_value_entry);
+    add_scalarized_entry_info(rt_call_offset);
+    break;
+  }
+  case CodeOffsets::Verified_Value_Entry: {
+    if (needs_clinit_barrier_on_entry(method())) {
+      clinit_barrier(method());
     }
-    return offset;
+    build_frame();
+    offsets()->set_value(CodeOffsets::Frame_Complete, _masm->offset());
+    break;
+  }
   default:
-    {
-      assert(entry == CodeOffsets::Verified_Value_Entry, "must be");
-      _masm->verified_value_entry();
-      if (needs_clinit_barrier_on_entry(compilation()->method())) {
-        clinit_barrier(compilation()->method());
-      }
-      build_frame();
-      offsets()->set_value(CodeOffsets::Frame_Complete, _masm->offset());
-      return offset;
-    }
+    ShouldNotReachHere();
+    break;
   }
 }
 
@@ -814,6 +813,10 @@ void LIR_Assembler::emit_op0(LIR_Op0* op) {
       on_spin_wait();
       break;
 
+    case lir_check_orig_pc:
+      check_orig_pc();
+      break;
+
     default:
       ShouldNotReachHere();
       break;
@@ -907,9 +910,8 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
 
 
 void LIR_Assembler::build_frame() {
-  _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes(),
-                     compilation()->needs_stack_repair(),
-                     &_verified_value_entry);
+  _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes(), in_bytes(frame_map()->sp_offset_for_orig_pc()),
+                     needs_stack_repair(), method()->has_scalarized_args(), &_verified_value_entry);
 }
 
 
diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
index f64350500a5..8b09596c5bc 100644
--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp
+++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp
@@ -212,7 +212,7 @@ class LIR_Assembler: public CompilationResourceObj {
   void emit_profile_type(LIR_OpProfileType* op);
   void emit_delay(LIR_OpDelay* op);
   void emit_std_entries();
-  int  emit_std_entry(CodeOffsets::Entries entry, const CompiledEntrySignature* ces);
+  void emit_std_entry(CodeOffsets::Entries entry, const CompiledEntrySignature* ces);
   void add_scalarized_entry_info(int call_offset);
 
   void arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack);
@@ -262,6 +262,7 @@ class LIR_Assembler: public CompilationResourceObj {
   void membar_storeload();
   void on_spin_wait();
   void get_thread(LIR_Opr result);
+  void check_orig_pc();
 
   void verify_oop_map(CodeEmitInfo* info);
 
diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp
index 6b32d3e58ea..87df6ce2aa6 100644
--- a/src/hotspot/share/c1/c1_LIRGenerator.cpp
+++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp
@@ -3120,6 +3120,14 @@ void LIRGenerator::do_Base(Base* x) {
     CodeEmitInfo* info = new CodeEmitInfo(scope()->start()->state()->copy(ValueStack::StateBefore, SynchronizationEntryBCI), NULL, false);
     increment_invocation_counter(info);
   }
+  if (method()->has_scalarized_args()) {
+    // Check if deoptimization was triggered (i.e. orig_pc was set) while buffering scalarized value type arguments
+    // in the entry point (see comments in frame::deoptimize). If so, deoptimize only now that we have the right state.
+    CodeEmitInfo* info = new CodeEmitInfo(scope()->start()->state()->copy(ValueStack::StateBefore, 0), NULL, false);
+    CodeStub* deopt_stub = new DeoptimizeStub(info, Deoptimization::Reason_none, Deoptimization::Action_none);
+    __ append(new LIR_Op0(lir_check_orig_pc));
+    __ branch(lir_cond_notEqual, T_ADDRESS, deopt_stub);
+  }
 
   // all blocks with a successor must end with an unconditional jump
   // to the successor even if they are consecutive
diff --git a/src/hotspot/share/c1/c1_MacroAssembler.hpp b/src/hotspot/share/c1/c1_MacroAssembler.hpp
index 618285fe749..a58d6f84b97 100644
--- a/src/hotspot/share/c1/c1_MacroAssembler.hpp
+++ b/src/hotspot/share/c1/c1_MacroAssembler.hpp
@@ -32,7 +32,7 @@ class CodeEmitInfo;
 class CompiledEntrySignature;
 class C1_MacroAssembler: public MacroAssembler {
  private:
-  int scalarized_entry(const CompiledEntrySignature *ces, int frame_size_in_bytes, int bang_size_in_bytes, Label& verified_value_entry_label, bool is_value_ro_entry);
+  int scalarized_entry(const CompiledEntrySignature *ces, int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, Label& verified_value_entry_label, bool is_value_ro_entry);
  public:
   // creation
   C1_MacroAssembler(CodeBuffer* code) : MacroAssembler(code) { pd_init(); }
@@ -41,16 +41,15 @@ class C1_MacroAssembler: public MacroAssembler {
   void explicit_null_check(Register base);
 
   void inline_cache_check(Register receiver, Register iCache);
-  void build_frame(int frame_size_in_bytes, int bang_size_in_bytes, bool needs_stack_repair, Label* verified_value_entry_label);
-  void remove_frame(int frame_size_in_bytes, bool needs_stack_repair);
+  void build_frame(int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc = 0, bool needs_stack_repair = false, bool has_scalarized_args = false, Label* verified_value_entry_label = NULL);
 
-  int verified_entry(const CompiledEntrySignature *ces, int frame_size_in_bytes, int bang_size_in_bytes, Label& verified_value_entry_label) {
-    return scalarized_entry(ces, frame_size_in_bytes, bang_size_in_bytes, verified_value_entry_label, false);
+  int verified_entry(const CompiledEntrySignature *ces, int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, Label& verified_value_entry_label) {
+    return scalarized_entry(ces, frame_size_in_bytes, bang_size_in_bytes, sp_offset_for_orig_pc, verified_value_entry_label, false);
   }
-  int verified_value_ro_entry(const CompiledEntrySignature *ces, int frame_size_in_bytes, int bang_size_in_bytes, Label& verified_value_entry_label) {
-    return scalarized_entry(ces, frame_size_in_bytes, bang_size_in_bytes, verified_value_entry_label, true);
+  int verified_value_ro_entry(const CompiledEntrySignature *ces, int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, Label& verified_value_entry_label) {
+    return scalarized_entry(ces, frame_size_in_bytes, bang_size_in_bytes, sp_offset_for_orig_pc, verified_value_entry_label, true);
   }
-  void verified_value_entry();
+  void verified_entry();
   void verify_stack_oop(int offset) PRODUCT_RETURN;
   void verify_not_null_oop(Register r)  PRODUCT_RETURN;
 
diff --git a/src/hotspot/share/classfile/classFileParser.cpp b/src/hotspot/share/classfile/classFileParser.cpp
index 6064a5389a3..581822f09f0 100644
--- a/src/hotspot/share/classfile/classFileParser.cpp
+++ b/src/hotspot/share/classfile/classFileParser.cpp
@@ -83,6 +83,7 @@
 #include "utilities/macros.hpp"
 #include "utilities/ostream.hpp"
 #include "utilities/resourceHash.hpp"
+#include "utilities/stringUtils.hpp"
 #include "utilities/utf8.hpp"
 
 #if INCLUDE_CDS
@@ -943,10 +944,17 @@ static bool put_after_lookup(const Symbol* name, const Symbol* sig, NameSigHash*
 }
 
 // Side-effects: populates the _local_interfaces field
-void ClassFileParser::parse_interfaces(const ClassFileStream* const stream,
-                                       const int itfs_len,
-                                       ConstantPool* const cp,
+void ClassFileParser::parse_interfaces(const ClassFileStream* stream,
+                                       int itfs_len,
+                                       ConstantPool* cp,
                                        bool* const has_nonstatic_concrete_methods,
+                                       // FIXME: lots of these functions
+                                       // declare their parameters as const,
+                                       // which adds only noise to the code.
+                                       // Remove the spurious const modifiers.
+                                       // Many are of the form "const int x"
+                                       // or "T* const x".
+                                       bool* const is_declared_atomic,
                                        TRAPS) {
   assert(stream != NULL, "invariant");
   assert(cp != NULL, "invariant");
@@ -994,10 +1002,14 @@ void ClassFileParser::parse_interfaces(const ClassFileStream* const stream,
                           interf->class_in_module_of_loader()));
       }
 
-      if (InstanceKlass::cast(interf)->has_nonstatic_concrete_methods()) {
+      InstanceKlass* ik = InstanceKlass::cast(interf);
+      if (ik->has_nonstatic_concrete_methods()) {
         *has_nonstatic_concrete_methods = true;
       }
-      _local_interfaces->at_put(index, InstanceKlass::cast(interf));
+      if (ik->is_declared_atomic()) {
+        *is_declared_atomic = true;
+      }
+      _local_interfaces->at_put(index, ik);
     }
 
     if (!_need_verify || itfs_len <= 1) {
@@ -4346,6 +4358,7 @@ void ClassFileParser::layout_fields(ConstantPool* cp,
   Klass** nonstatic_value_type_klasses = NULL;
   unsigned int value_type_oop_map_count = 0;
   int not_flattened_value_types = 0;
+  int not_atomic_value_types = 0;
 
   int max_nonstatic_value_type = fac->count[NONSTATIC_FLATTENABLE] + 1;
 
@@ -4380,7 +4393,16 @@ void ClassFileParser::layout_fields(ConstantPool* cp,
       }
       ValueKlass* vk = ValueKlass::cast(klass);
       // Conditions to apply flattening or not should be defined in a single place
-      if ((ValueFieldMaxFlatSize < 0) || (vk->size_helper() * HeapWordSize) <= ValueFieldMaxFlatSize) {
+      bool too_big_to_flatten = (ValueFieldMaxFlatSize >= 0 &&
+                                 (vk->size_helper() * HeapWordSize) > ValueFieldMaxFlatSize);
+      bool too_atomic_to_flatten = vk->is_declared_atomic();
+      bool too_volatile_to_flatten = fs.access_flags().is_volatile();
+      if (vk->is_naturally_atomic()) {
+        too_atomic_to_flatten = false;
+        //too_volatile_to_flatten = false; //FIXME
+        // volatile fields are currently never flattened, this could change in the future
+      }
+      if (!(too_big_to_flatten | too_atomic_to_flatten | too_volatile_to_flatten)) {
         nonstatic_value_type_indexes[nonstatic_value_type_count] = fs.index();
         nonstatic_value_type_klasses[nonstatic_value_type_count] = klass;
         nonstatic_value_type_count++;
@@ -4390,6 +4412,9 @@ void ClassFileParser::layout_fields(ConstantPool* cp,
           value_type_oop_map_count += vklass->nonstatic_oop_map_count();
         }
         fs.set_flattened(true);
+        if (!vk->is_atomic()) {  // flat and non-atomic: take note
+          not_atomic_value_types++;
+        }
       } else {
         not_flattened_value_types++;
         fs.set_flattened(false);
@@ -4848,6 +4873,19 @@ void ClassFileParser::layout_fields(ConstantPool* cp,
   info->_static_field_size = static_field_size;
   info->_nonstatic_field_size = nonstatic_field_size;
   info->_has_nonstatic_fields = has_nonstatic_fields;
+
+  // A value type is naturally atomic if it has just one field, and
+  // that field is simple enough.
+  info->_is_naturally_atomic = (is_value_type() &&
+                                !super_has_nonstatic_fields &&
+                                (nonstatic_fields_count <= 1) &&
+                                (not_atomic_value_types == 0) &&
+                                (nonstatic_contended_count == 0));
+  // This may be too restrictive, since if all the fields fit in 64
+  // bits we could make the decision to align instances of this class
+  // to 64-bit boundaries, and load and store them as single words.
+  // And on machines which supported larger atomics we could similarly
+  // allow larger values to be atomic, if properly aligned.
 }
 
 void ClassFileParser::set_precomputed_flags(InstanceKlass* ik) {
@@ -5983,6 +6021,7 @@ static void check_methods_for_intrinsics(const InstanceKlass* ik,
   }
 }
 
+// Called from a factory method in KlassFactory, not from this file.
 InstanceKlass* ClassFileParser::create_instance_klass(bool changed_by_loadhook, TRAPS) {
   if (_klass != NULL) {
     return _klass;
@@ -6052,6 +6091,9 @@ void ClassFileParser::fill_instance_klass(InstanceKlass* ik, bool changed_by_loa
   // Not yet: supers are done below to support the new subtype-checking fields
   ik->set_nonstatic_field_size(_field_info->_nonstatic_field_size);
   ik->set_has_nonstatic_fields(_field_info->_has_nonstatic_fields);
+  if (_field_info->_is_naturally_atomic && ik->is_value()) {
+    ik->set_is_naturally_atomic();
+  }
   if (_is_empty_value) {
     ik->set_is_empty_value();
   }
@@ -6101,6 +6143,9 @@ void ClassFileParser::fill_instance_klass(InstanceKlass* ik, bool changed_by_loa
   ik->set_major_version(_major_version);
   ik->set_has_nonstatic_concrete_methods(_has_nonstatic_concrete_methods);
   ik->set_declares_nonstatic_concrete_methods(_declares_nonstatic_concrete_methods);
+  if (_is_declared_atomic) {
+    ik->set_is_declared_atomic();
+  }
 
   if (_unsafe_anonymous_host != NULL) {
     assert (ik->is_unsafe_anonymous(), "should be the same");
@@ -6433,6 +6478,8 @@ ClassFileParser::ClassFileParser(ClassFileStream* stream,
   _has_contended_fields(false),
   _has_flattenable_fields(false),
   _is_empty_value(false),
+  _is_naturally_atomic(false),
+  _is_declared_atomic(false),
   _has_finalizer(false),
   _has_empty_finalizer(false),
   _has_vanilla_constructor(false),
@@ -6772,6 +6819,7 @@ void ClassFileParser::parse_stream(const ClassFileStream* const stream,
                    _itfs_len,
                    cp,
                    &_has_nonstatic_concrete_methods,
+                   &_is_declared_atomic,
                    CHECK);
 
   assert(_local_interfaces != NULL, "invariant");
@@ -6779,8 +6827,8 @@ void ClassFileParser::parse_stream(const ClassFileStream* const stream,
   // Fields (offsets are filled in later)
   _fac = new FieldAllocationCount();
   parse_fields(stream,
-               _access_flags.is_interface(),
-               _access_flags.is_value_type(),
+               is_interface(),
+               is_value_type(),
                _fac,
                cp,
                cp_size,
@@ -6792,8 +6840,8 @@ void ClassFileParser::parse_stream(const ClassFileStream* const stream,
   // Methods
   AccessFlags promoted_flags;
   parse_methods(stream,
-                _access_flags.is_interface(),
-                _access_flags.is_value_type(),
+                is_interface(),
+                is_value_type(),
                 &promoted_flags,
                 &_has_final_method,
                 &_declares_nonstatic_concrete_methods,
@@ -6842,7 +6890,7 @@ void ClassFileParser::post_process_parsed_stream(const ClassFileStream* const st
   // We check super class after class file is parsed and format is checked
   if (_super_class_index > 0 && NULL ==_super_klass) {
     Symbol* const super_class_name = cp->klass_name_at(_super_class_index);
-    if (_access_flags.is_interface()) {
+    if (is_interface()) {
       // Before attempting to resolve the superclass, check for class format
       // errors not checked yet.
       guarantee_property(super_class_name == vmSymbols::java_lang_Object(),
@@ -6863,6 +6911,9 @@ void ClassFileParser::post_process_parsed_stream(const ClassFileStream* const st
     if (_super_klass->has_nonstatic_concrete_methods()) {
       _has_nonstatic_concrete_methods = true;
     }
+    if (_super_klass->is_declared_atomic()) {
+      _is_declared_atomic = true;
+    }
 
     if (_super_klass->is_interface()) {
       ResourceMark rm(THREAD);
@@ -6889,6 +6940,18 @@ void ClassFileParser::post_process_parsed_stream(const ClassFileStream* const st
     }
   }
 
+  if (_class_name == vmSymbols::java_lang_NonTearable() && _loader_data->class_loader() == NULL) {
+    // This is the original source of this condition.
+    // It propagates by inheritance, as if testing "instanceof NonTearable".
+    _is_declared_atomic = true;
+  } else if (*ForceNonTearable != '\0') {
+    // Allow a command line switch to force the same atomicity property:
+    const char* class_name_str = _class_name->as_C_string();
+    if (StringUtils::class_list_match(ForceNonTearable, class_name_str)) {
+      _is_declared_atomic = true;
+    }
+  }
+
   // Compute the transitive list of all unique interfaces implemented by this class
   _transitive_interfaces =
     compute_transitive_interfaces(_super_klass,
@@ -6917,7 +6980,7 @@ void ClassFileParser::post_process_parsed_stream(const ClassFileStream* const st
                                                     CHECK);
 
   // Size of Java itable (in words)
-  _itable_size = _access_flags.is_interface() ? 0 :
+  _itable_size = is_interface() ? 0 :
     klassItable::compute_itable_size(_transitive_interfaces);
 
   assert(_fac != NULL, "invariant");
diff --git a/src/hotspot/share/classfile/classFileParser.hpp b/src/hotspot/share/classfile/classFileParser.hpp
index fe04aa606d7..43bb8c26d61 100644
--- a/src/hotspot/share/classfile/classFileParser.hpp
+++ b/src/hotspot/share/classfile/classFileParser.hpp
@@ -73,6 +73,7 @@ class FieldLayoutInfo : public ResourceObj {
   int _nonstatic_field_size;
   int _static_field_size;
   bool  _has_nonstatic_fields;
+  bool  _is_naturally_atomic;
 };
 
 // Parser for for .class files
@@ -199,6 +200,8 @@ class ClassFileParser {
 
   bool _has_flattenable_fields;
   bool _is_empty_value;
+  bool _is_naturally_atomic;
+  bool _is_declared_atomic;
 
   // precomputed flags
   bool _has_finalizer;
@@ -246,6 +249,7 @@ class ClassFileParser {
                         const int itfs_len,
                         ConstantPool* const cp,
                         bool* has_nonstatic_concrete_methods,
+                        bool* is_declared_atomic,
                         TRAPS);
 
   const InstanceKlass* parse_super_class(ConstantPool* const cp,
diff --git a/src/hotspot/share/classfile/fieldLayoutBuilder.cpp b/src/hotspot/share/classfile/fieldLayoutBuilder.cpp
index bf71d2d5537..69523171a94 100644
--- a/src/hotspot/share/classfile/fieldLayoutBuilder.cpp
+++ b/src/hotspot/share/classfile/fieldLayoutBuilder.cpp
@@ -539,7 +539,10 @@ FieldLayoutBuilder::FieldLayoutBuilder(const Symbol* classname, const InstanceKl
   _has_nonstatic_fields(false),
   _is_contended(is_contended),
   _is_value_type(is_value_type),
-  _has_flattening_information(is_value_type) {}
+  _has_flattening_information(is_value_type),
+  _has_nonatomic_values(false),
+  _atomic_field_count(0)
+ {}
 
 FieldGroup* FieldLayoutBuilder::get_or_create_contended_group(int g) {
   assert(g > 0, "must only be called for named contended groups");
@@ -579,6 +582,7 @@ void FieldLayoutBuilder::regular_field_sorting() {
       group = _static_fields;
     } else {
       _has_nonstatic_fields = true;
+      _atomic_field_count++;  // we might decrement this
       if (fs.is_contended()) {
         int g = fs.contended_group();
         if (g == 0) {
@@ -626,14 +630,23 @@ void FieldLayoutBuilder::regular_field_sorting() {
                                                                 _protection_domain, true, THREAD);
         assert(klass != NULL, "Sanity check");
         ValueKlass* vk = ValueKlass::cast(klass);
-        bool has_flattenable_size = (ValueFieldMaxFlatSize < 0)
-                                   || (vk->size_helper() * HeapWordSize) <= ValueFieldMaxFlatSize;
-        // volatile fields are currently never flattened, this could change in the future
-        bool flattened = !fs.access_flags().is_volatile() && has_flattenable_size;
-        if (flattened) {
+        bool too_big_to_flatten = (ValueFieldMaxFlatSize >= 0 &&
+                                   (vk->size_helper() * HeapWordSize) > ValueFieldMaxFlatSize);
+        bool too_atomic_to_flatten = vk->is_declared_atomic();
+        bool too_volatile_to_flatten = fs.access_flags().is_volatile();
+        if (vk->is_naturally_atomic()) {
+          too_atomic_to_flatten = false;
+          //too_volatile_to_flatten = false; //FIXME
+          // volatile fields are currently never flattened, this could change in the future
+        }
+        if (!(too_big_to_flatten | too_atomic_to_flatten | too_volatile_to_flatten)) {
           group->add_flattened_field(fs, vk);
           _nonstatic_oopmap_count += vk->nonstatic_oop_map_count();
           fs.set_flattened(true);
+          if (!vk->is_atomic()) {  // flat and non-atomic: take note
+            _has_nonatomic_values = true;
+            _atomic_field_count--;  // every other field is atomic but this one
+          }
         } else {
           _nonstatic_oopmap_count++;
           group->add_oop_field(fs);
@@ -674,6 +687,7 @@ void FieldLayoutBuilder::inline_class_field_sorting(TRAPS) {
       group = _static_fields;
     } else {
       _has_nonstatic_fields = true;
+      _atomic_field_count++;  // we might decrement this
       group = _root_group;
     }
     assert(group != NULL, "invariant");
@@ -716,13 +730,24 @@ void FieldLayoutBuilder::inline_class_field_sorting(TRAPS) {
                 _protection_domain, true, CHECK);
         assert(klass != NULL, "Sanity check");
         ValueKlass* vk = ValueKlass::cast(klass);
-        bool flattened = (ValueFieldMaxFlatSize < 0)
-                         || (vk->size_helper() * HeapWordSize) <= ValueFieldMaxFlatSize;
-        if (flattened) {
+        bool too_big_to_flatten = (ValueFieldMaxFlatSize >= 0 &&
+                                   (vk->size_helper() * HeapWordSize) > ValueFieldMaxFlatSize);
+        bool too_atomic_to_flatten = vk->is_declared_atomic();
+        bool too_volatile_to_flatten = fs.access_flags().is_volatile();
+        if (vk->is_naturally_atomic()) {
+          too_atomic_to_flatten = false;
+          //too_volatile_to_flatten = false; //FIXME
+          // volatile fields are currently never flattened, this could change in the future
+        }
+        if (!(too_big_to_flatten | too_atomic_to_flatten | too_volatile_to_flatten)) {
           group->add_flattened_field(fs, vk);
           _nonstatic_oopmap_count += vk->nonstatic_oop_map_count();
           field_alignment = vk->get_alignment();
           fs.set_flattened(true);
+          if (!vk->is_atomic()) {  // flat and non-atomic: take note
+            _has_nonatomic_values = true;
+            _atomic_field_count--;  // every other field is atomic but this one
+          }
         } else {
           _nonstatic_oopmap_count++;
           field_alignment = type2aelembytes(T_OBJECT);
@@ -983,6 +1008,19 @@ void FieldLayoutBuilder::epilogue() {
   _info->_nonstatic_field_size = (nonstatic_field_end - instanceOopDesc::base_offset_in_bytes()) / heapOopSize;
   _info->_has_nonstatic_fields = _has_nonstatic_fields;
 
+  // A value type is naturally atomic if it has just one field, and
+  // that field is simple enough.
+  _info->_is_naturally_atomic = (_is_value_type &&
+                                 (_atomic_field_count <= 1) &&
+                                 !_has_nonatomic_values &&
+                                 _contended_groups.is_empty());
+  // This may be too restrictive, since if all the fields fit in 64
+  // bits we could make the decision to align instances of this class
+  // to 64-bit boundaries, and load and store them as single words.
+  // And on machines which supported larger atomics we could similarly
+  // allow larger values to be atomic, if properly aligned.
+
+
   if (PrintFieldLayout) {
     ResourceMark rm;
     tty->print_cr("Layout of class %s", _classname->as_C_string());
diff --git a/src/hotspot/share/classfile/fieldLayoutBuilder.hpp b/src/hotspot/share/classfile/fieldLayoutBuilder.hpp
index fad76377d2f..d4c195e3cf5 100644
--- a/src/hotspot/share/classfile/fieldLayoutBuilder.hpp
+++ b/src/hotspot/share/classfile/fieldLayoutBuilder.hpp
@@ -256,6 +256,8 @@ class FieldLayoutBuilder : public ResourceObj {
   bool _is_contended;
   bool _is_value_type;
   bool _has_flattening_information;
+  bool _has_nonatomic_values;
+  int _atomic_field_count;
 
   FieldGroup* get_or_create_contended_group(int g);
 
diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp
index 44c11da2023..cde1210290f 100644
--- a/src/hotspot/share/classfile/vmSymbols.hpp
+++ b/src/hotspot/share/classfile/vmSymbols.hpp
@@ -64,6 +64,7 @@
   template(java_lang_Thread,                          "java/lang/Thread")                         \
   template(java_lang_ThreadGroup,                     "java/lang/ThreadGroup")                    \
   template(java_lang_Cloneable,                       "java/lang/Cloneable")                      \
+  template(java_lang_NonTearable,                     "java/lang/NonTearable")                    \
   template(java_lang_Throwable,                       "java/lang/Throwable")                      \
   template(java_lang_ClassLoader,                     "java/lang/ClassLoader")                    \
   template(java_lang_ThreadDeath,                     "java/lang/ThreadDeath")                    \
diff --git a/src/hotspot/share/oops/arrayKlass.hpp b/src/hotspot/share/oops/arrayKlass.hpp
index 366d24eae8c..c4c3e954345 100644
--- a/src/hotspot/share/oops/arrayKlass.hpp
+++ b/src/hotspot/share/oops/arrayKlass.hpp
@@ -69,6 +69,10 @@ class ArrayKlass: public Klass {
   // Presented with an ArrayKlass, which storage_properties should be encoded into arrayOop
   virtual ArrayStorageProperties storage_properties() { return ArrayStorageProperties::empty; }
 
+  // Are loads and stores to this concrete array type atomic?
+  // Note that Object[] is naturally atomic, but its subtypes may not be.
+  virtual bool element_access_is_atomic() { return true; }
+
   // Testing operation
   DEBUG_ONLY(bool is_array_klass_slow() const { return true; })
 
diff --git a/src/hotspot/share/oops/instanceKlass.hpp b/src/hotspot/share/oops/instanceKlass.hpp
index 5e42813e708..825ced76290 100644
--- a/src/hotspot/share/oops/instanceKlass.hpp
+++ b/src/hotspot/share/oops/instanceKlass.hpp
@@ -291,7 +291,9 @@ class InstanceKlass: public Klass {
     _misc_is_being_redefined                  = 1 << 17, // used for locking redefinition
     _misc_has_contended_annotations           = 1 << 18, // has @Contended annotation
     _misc_has_value_fields                    = 1 << 19, // has value fields and related embedded section is not empty
-    _misc_is_empty_value                      = 1 << 20  // empty value type
+    _misc_is_empty_value                      = 1 << 20, // empty value type
+    _misc_is_naturally_atomic                 = 1 << 21, // loaded/stored in one instruction
+    _misc_is_declared_atomic                  = 1 << 22  // implements jl.NonTearable
   };
   u2 shared_loader_type_bits() const {
     return _misc_is_shared_boot_class|_misc_is_shared_platform_class|_misc_is_shared_app_class;
@@ -433,6 +435,32 @@ class InstanceKlass: public Klass {
     _misc_flags |= _misc_is_empty_value;
   }
 
+  // Note:  The naturally_atomic property only applies to
+  // inline classes; it is never true on identity classes.
+  // The bit is placed on instanceKlass for convenience.
+
+  // Query if h/w provides atomic load/store for instances.
+  bool is_naturally_atomic() const {
+    return (_misc_flags & _misc_is_naturally_atomic) != 0;
+  }
+  // Initialized in the class file parser, not changed later.
+  void set_is_naturally_atomic() {
+    _misc_flags |= _misc_is_naturally_atomic;
+  }
+
+  // Query if this class implements jl.NonTearable or was
+  // mentioned in the JVM option AlwaysAtomicValueTypes.
+  // This bit can occur anywhere, but is only significant
+  // for inline classes *and* their super types.
+  // It inherits from supers along with NonTearable.
+  bool is_declared_atomic() const {
+    return (_misc_flags & _misc_is_declared_atomic) != 0;
+  }
+  // Initialized in the class file parser, not changed later.
+  void set_is_declared_atomic() {
+    _misc_flags |= _misc_is_declared_atomic;
+  }
+
   // field sizes
   int nonstatic_field_size() const         { return _nonstatic_field_size; }
   void set_nonstatic_field_size(int size)  { _nonstatic_field_size = size; }
diff --git a/src/hotspot/share/oops/valueArrayKlass.cpp b/src/hotspot/share/oops/valueArrayKlass.cpp
index d56a6e3eabd..6e8cec3e927 100644
--- a/src/hotspot/share/oops/valueArrayKlass.cpp
+++ b/src/hotspot/share/oops/valueArrayKlass.cpp
@@ -83,7 +83,7 @@ void ValueArrayKlass::set_element_klass(Klass* k) {
 
 ValueArrayKlass* ValueArrayKlass::allocate_klass(Klass* element_klass, TRAPS) {
   assert(ValueArrayFlatten, "Flatten array required");
-  assert(ValueKlass::cast(element_klass)->is_atomic() || (!ValueArrayAtomicAccess), "Atomic by-default");
+  assert(ValueKlass::cast(element_klass)->is_naturally_atomic() || (!ValueArrayAtomicAccess), "Atomic by-default");
 
   /*
    *  MVT->LWorld, now need to allocate secondaries array types, just like objArrayKlass...
diff --git a/src/hotspot/share/oops/valueArrayKlass.hpp b/src/hotspot/share/oops/valueArrayKlass.hpp
index 92ac6ece0b7..efaebd48a3c 100644
--- a/src/hotspot/share/oops/valueArrayKlass.hpp
+++ b/src/hotspot/share/oops/valueArrayKlass.hpp
@@ -87,7 +87,8 @@ class ValueArrayKlass : public ArrayKlass {
     return element_klass()->contains_oops();
   }
 
-  bool is_atomic() {
+  // Override.
+  bool element_access_is_atomic() {
     return element_klass()->is_atomic();
   }
 
diff --git a/src/hotspot/share/oops/valueKlass.cpp b/src/hotspot/share/oops/valueKlass.cpp
index 20848021b5c..138c04c935d 100644
--- a/src/hotspot/share/oops/valueKlass.cpp
+++ b/src/hotspot/share/oops/valueKlass.cpp
@@ -139,10 +139,6 @@ instanceOop ValueKlass::allocate_instance_buffer(TRAPS) {
   return oop;
 }
 
-bool ValueKlass::is_atomic() {
-  return (nonstatic_field_size() * heapOopSize) <= longSize;
-}
-
 int ValueKlass::nonstatic_oop_count() {
   int oops = 0;
   int map_count = nonstatic_oop_map_count();
@@ -195,6 +191,11 @@ bool ValueKlass::flatten_array() {
     return false;
   }
 
+  // Declared atomic but not naturally atomic.
+  if (is_declared_atomic() && !is_naturally_atomic()) {
+    return false;
+  }
+
   return true;
 }
 
@@ -253,7 +254,7 @@ Klass* ValueKlass::value_array_klass(ArrayStorageProperties storage_props, bool
 }
 
 Klass* ValueKlass::allocate_value_array_klass(TRAPS) {
-  if (flatten_array() && (is_atomic() || (!ValueArrayAtomicAccess))) {
+  if (flatten_array() && (is_naturally_atomic() || (!ValueArrayAtomicAccess))) {
     return ValueArrayKlass::allocate_klass(ArrayStorageProperties::flattened_and_null_free, this, THREAD);
   }
   return ObjArrayKlass::allocate_objArray_klass(ArrayStorageProperties::null_free, 1, this, THREAD);
diff --git a/src/hotspot/share/oops/valueKlass.hpp b/src/hotspot/share/oops/valueKlass.hpp
index a44c662a1d9..8fe79f3781e 100644
--- a/src/hotspot/share/oops/valueKlass.hpp
+++ b/src/hotspot/share/oops/valueKlass.hpp
@@ -214,8 +214,8 @@ class ValueKlass: public InstanceKlass {
   address data_for_oop(oop o) const;
   oop oop_for_data(address data) const;
 
-  // Query if h/w provides atomic load/store
-  bool is_atomic();
+  // Query if this class promises atomicity one way or another
+  bool is_atomic() { return is_naturally_atomic() || is_declared_atomic(); }
 
   bool flatten_array();
 
diff --git a/src/hotspot/share/opto/macro.cpp b/src/hotspot/share/opto/macro.cpp
index c50e4b7b1de..178319ad5f4 100644
--- a/src/hotspot/share/opto/macro.cpp
+++ b/src/hotspot/share/opto/macro.cpp
@@ -83,18 +83,6 @@ int PhaseMacroExpand::replace_input(Node *use, Node *oldref, Node *newref) {
   return nreplacements;
 }
 
-void PhaseMacroExpand::migrate_outs(Node *old, Node *target) {
-  assert(old != NULL, "sanity");
-  for (DUIterator_Fast imax, i = old->fast_outs(imax); i < imax; i++) {
-    Node* use = old->fast_out(i);
-    _igvn.rehash_node_delayed(use);
-    imax -= replace_input(use, old, target);
-    // back up iterator
-    --i;
-  }
-  assert(old->outcnt() == 0, "all uses must be deleted");
-}
-
 Node* PhaseMacroExpand::opt_bits_test(Node* ctrl, Node* region, int edge, Node* word, int mask, int bits, bool return_fast_path) {
   Node* cmp;
   if (mask != 0) {
@@ -1565,7 +1553,7 @@ void PhaseMacroExpand::expand_allocate_common(
   // result_phi_rawmem (unless we are only generating a slow call when
   // both memory projections are combined)
   if (expand_fast_path && _memproj_fallthrough != NULL) {
-    migrate_outs(_memproj_fallthrough, result_phi_rawmem);
+    _igvn.replace_in_uses(_memproj_fallthrough, result_phi_rawmem);
   }
   // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete
   // _memproj_catchall so we end up with a call that has only 1 memory projection.
@@ -1574,7 +1562,7 @@ void PhaseMacroExpand::expand_allocate_common(
       _memproj_fallthrough = new ProjNode(call, TypeFunc::Memory);
       transform_later(_memproj_fallthrough);
     }
-    migrate_outs(_memproj_catchall, _memproj_fallthrough);
+    _igvn.replace_in_uses(_memproj_catchall, _memproj_fallthrough);
     _igvn.remove_dead_node(_memproj_catchall);
   }
 
@@ -1584,7 +1572,7 @@ void PhaseMacroExpand::expand_allocate_common(
   // (it is different from memory projections where both projections are
   // combined in such case).
   if (_ioproj_fallthrough != NULL) {
-    migrate_outs(_ioproj_fallthrough, result_phi_i_o);
+    _igvn.replace_in_uses(_ioproj_fallthrough, result_phi_i_o);
   }
   // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete
   // _ioproj_catchall so we end up with a call that has only 1 i_o projection.
@@ -1593,7 +1581,7 @@ void PhaseMacroExpand::expand_allocate_common(
       _ioproj_fallthrough = new ProjNode(call, TypeFunc::I_O);
       transform_later(_ioproj_fallthrough);
     }
-    migrate_outs(_ioproj_catchall, _ioproj_fallthrough);
+    _igvn.replace_in_uses(_ioproj_catchall, _ioproj_fallthrough);
     _igvn.remove_dead_node(_ioproj_catchall);
   }
 
@@ -1661,7 +1649,7 @@ void PhaseMacroExpand::yank_alloc_node(AllocateNode* alloc) {
     _igvn.remove_dead_node(_resproj);
   }
   if (_fallthroughcatchproj != NULL) {
-    migrate_outs(_fallthroughcatchproj, ctrl);
+    _igvn.replace_in_uses(_fallthroughcatchproj, ctrl);
     _igvn.remove_dead_node(_fallthroughcatchproj);
   }
   if (_catchallcatchproj != NULL) {
@@ -1674,11 +1662,11 @@ void PhaseMacroExpand::yank_alloc_node(AllocateNode* alloc) {
     _igvn.remove_dead_node(_fallthroughproj);
   }
   if (_memproj_fallthrough != NULL) {
-    migrate_outs(_memproj_fallthrough, mem);
+    _igvn.replace_in_uses(_memproj_fallthrough, mem);
     _igvn.remove_dead_node(_memproj_fallthrough);
   }
   if (_ioproj_fallthrough != NULL) {
-    migrate_outs(_ioproj_fallthrough, i_o);
+    _igvn.replace_in_uses(_ioproj_fallthrough, i_o);
     _igvn.remove_dead_node(_ioproj_fallthrough);
   }
   if (_memproj_catchall != NULL) {
diff --git a/src/hotspot/share/opto/macro.hpp b/src/hotspot/share/opto/macro.hpp
index 3ebb2740279..c75910e57a1 100644
--- a/src/hotspot/share/opto/macro.hpp
+++ b/src/hotspot/share/opto/macro.hpp
@@ -199,7 +199,6 @@ class PhaseMacroExpand : public Phase {
   void expand_subtypecheck_node(SubTypeCheckNode *check);
 
   int replace_input(Node *use, Node *oldref, Node *newref);
-  void migrate_outs(Node *old, Node *target);
   Node* opt_bits_test(Node* ctrl, Node* region, int edge, Node* word, int mask, int bits, bool return_fast_path = false);
   void copy_predefined_input_for_runtime_call(Node * ctrl, CallNode* oldcall, CallNode* call);
   CallNode* make_slow_call(CallNode *oldcall, const TypeFunc* slow_call_type, address slow_call,
diff --git a/src/hotspot/share/opto/phaseX.cpp b/src/hotspot/share/opto/phaseX.cpp
index 9310187593c..4d157f3b1eb 100644
--- a/src/hotspot/share/opto/phaseX.cpp
+++ b/src/hotspot/share/opto/phaseX.cpp
@@ -1496,6 +1496,7 @@ void PhaseIterGVN::subsume_node( Node *old, Node *nn ) {
 }
 
 void PhaseIterGVN::replace_in_uses(Node* n, Node* m) {
+  assert(n != NULL, "sanity");
   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
     Node* u = n->fast_out(i);
     if (u != n) {
@@ -1504,6 +1505,7 @@ void PhaseIterGVN::replace_in_uses(Node* n, Node* m) {
       --i, imax -= nb;
     }
   }
+  assert(n->outcnt() == 0, "all uses must be deleted");
 }
 
 //------------------------------add_users_to_worklist--------------------------
diff --git a/src/hotspot/share/opto/valuetypenode.cpp b/src/hotspot/share/opto/valuetypenode.cpp
index 6d1076ad56c..af4eaa9e17d 100644
--- a/src/hotspot/share/opto/valuetypenode.cpp
+++ b/src/hotspot/share/opto/valuetypenode.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/hotspot/share/opto/valuetypenode.hpp b/src/hotspot/share/opto/valuetypenode.hpp
index 407c3a3947e..2aadbf61427 100644
--- a/src/hotspot/share/opto/valuetypenode.hpp
+++ b/src/hotspot/share/opto/valuetypenode.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/hotspot/share/prims/jvm.cpp b/src/hotspot/share/prims/jvm.cpp
index 0298004abc7..f41b698fc13 100644
--- a/src/hotspot/share/prims/jvm.cpp
+++ b/src/hotspot/share/prims/jvm.cpp
@@ -2337,10 +2337,7 @@ JVM_ENTRY(jboolean, JVM_ArrayIsAccessAtomic(JNIEnv *env, jclass unused, jobject
   if ((o == NULL) || (!k->is_array_klass())) {
     THROW_0(vmSymbols::java_lang_IllegalArgumentException());
   }
-  if (k->is_valueArray_klass()) {
-    return ValueArrayKlass::cast(k)->is_atomic();
-  }
-  return true;
+  return ArrayKlass::cast(k)->element_access_is_atomic();
 JVM_END
 
 JVM_ENTRY(jobject, JVM_ArrayEnsureAccessAtomic(JNIEnv *env, jclass unused, jobject array))
@@ -2352,7 +2349,7 @@ JVM_ENTRY(jobject, JVM_ArrayEnsureAccessAtomic(JNIEnv *env, jclass unused, jobje
   }
   if (k->is_valueArray_klass()) {
     ValueArrayKlass* vk = ValueArrayKlass::cast(k);
-    if (!vk->is_atomic()) {
+    if (!vk->element_access_is_atomic()) {
       /**
        * Need to decide how to implement:
        *
diff --git a/src/hotspot/share/runtime/deoptimization.cpp b/src/hotspot/share/runtime/deoptimization.cpp
index 359d0e09a49..f4256fb515a 100644
--- a/src/hotspot/share/runtime/deoptimization.cpp
+++ b/src/hotspot/share/runtime/deoptimization.cpp
@@ -1595,7 +1595,7 @@ void Deoptimization::deoptimize_single_frame(JavaThread* thread, frame fr, Deopt
 }
 
 void Deoptimization::deoptimize(JavaThread* thread, frame fr, DeoptReason reason) {
-  // Deoptimize only if the frame comes from compile code.
+  // Deoptimize only if the frame comes from compiled code.
   // Do not deoptimize the frame which is already patched
   // during the execution of the loops below.
   if (!fr.is_compiled_frame() || fr.is_deoptimized_frame()) {
diff --git a/src/hotspot/share/runtime/frame.cpp b/src/hotspot/share/runtime/frame.cpp
index 0dba59826e4..f43a37120e1 100644
--- a/src/hotspot/share/runtime/frame.cpp
+++ b/src/hotspot/share/runtime/frame.cpp
@@ -53,6 +53,9 @@
 #include "utilities/debug.hpp"
 #include "utilities/decoder.hpp"
 #include "utilities/formatBuffer.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
 
 RegisterMap::RegisterMap(JavaThread *thread, bool update_map) {
   _thread         = thread;
@@ -285,6 +288,25 @@ void frame::deoptimize(JavaThread* thread) {
 
   // Save the original pc before we patch in the new one
   cm->set_original_pc(this, pc());
+
+#ifdef COMPILER1
+  if (cm->is_compiled_by_c1() && cm->method()->has_scalarized_args() &&
+      pc() < cm->verified_value_entry_point()) {
+    // The VEP and VVEP(RO) of C1-compiled methods call into the runtime to buffer scalarized value
+    // type args. We can't deoptimize at that point because the buffers have not yet been initialized.
+    // Also, if the method is synchronized, we first need to acquire the lock.
+    // Don't patch the return pc to delay deoptimization until we enter the method body (the check
+    // addedin LIRGenerator::do_Base will detect the pending deoptimization by checking the original_pc).
+#ifdef ASSERT
+    NativeCall* call = nativeCall_before(this->pc());
+    address dest = call->destination();
+    assert(dest == Runtime1::entry_for(Runtime1::buffer_value_args_no_receiver_id) ||
+           dest == Runtime1::entry_for(Runtime1::buffer_value_args_id), "unexpected safepoint in entry point");
+#endif
+    return;
+  }
+#endif
+
   patch_pc(thread, deopt);
 
 #ifdef ASSERT
diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp
index 75d51b13d01..a7d48abf4b4 100644
--- a/src/hotspot/share/runtime/globals.hpp
+++ b/src/hotspot/share/runtime/globals.hpp
@@ -2516,6 +2516,11 @@ const size_t minimumSymbolTableSize = 1024;
   develop(bool, ScalarizeValueTypes, true,                                  \
           "Scalarize value types in compiled code")                         \
                                                                             \
+  diagnostic(ccstrlist, ForceNonTearable, "",                               \
+          "List of inline classes which are forced to be atomic "           \
+          "(whitespace and commas separate names, "                         \
+          "and leading and trailing stars '*' are wildcards)")              \
+                                                                            \
   product(bool, PrintNewLayout, false,                                      \
                "Print layout compute by new algorithm")                     \
                                                                             \
diff --git a/src/hotspot/share/utilities/stringUtils.cpp b/src/hotspot/share/utilities/stringUtils.cpp
index 21fb7a6e8d3..d4ed3a3e537 100644
--- a/src/hotspot/share/utilities/stringUtils.cpp
+++ b/src/hotspot/share/utilities/stringUtils.cpp
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "utilities/debug.hpp"
+#include "utilities/ostream.hpp"
 #include "utilities/stringUtils.hpp"
 
 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) {
@@ -65,3 +66,218 @@ double StringUtils::similarity(const char* str1, size_t len1, const char* str2,
 
   return 2.0 * (double) hit / (double) total;
 }
+
+class StringMatcher {
+ public:
+  typedef int getc_function_t(const char* &source, const char* limit);
+
+ private:
+  // These do not get properly inlined.
+  // For full performance, this should be a template class
+  // parameterized by two function arguments.
+  getc_function_t* _pattern_getc;
+  getc_function_t* _string_getc;
+
+ public:
+  StringMatcher(getc_function_t pattern_getc,
+                getc_function_t string_getc)
+    : _pattern_getc(pattern_getc),
+      _string_getc(string_getc)
+  { }
+
+  enum {  // special results from _pattern_getc
+    string_match_comma  = -0x100 + ',',
+    string_match_star   = -0x100 + '*',
+    string_match_eos    = -0x100 + '\0'
+  };
+
+ private:
+  const char*
+  skip_anchor_word(const char* match,
+                   const char* match_end,
+                   int anchor_length,
+                   const char* pattern,
+                   const char* pattern_end) {
+    assert(pattern < pattern_end && anchor_length > 0, "");
+    const char* begp = pattern;
+    int ch1 = _pattern_getc(begp, pattern_end);
+    // note that begp is now advanced over ch1
+    assert(ch1 > 0, "regular char only");
+    const char* matchp = match;
+    const char* limitp = match_end - anchor_length;
+    while (matchp <= limitp) {
+      int mch = _string_getc(matchp, match_end);
+      if (mch == ch1) {
+        const char* patp = begp;
+        const char* anchorp = matchp;
+        while (patp < pattern_end) {
+          char ch = _pattern_getc(patp, pattern_end);
+          char mch = _string_getc(anchorp, match_end);
+          if (mch != ch) {
+            anchorp = NULL;
+            break;
+          }
+        }
+        if (anchorp != NULL) {
+          return anchorp;  // Found a full copy of the anchor.
+        }
+        // That did not work, so restart the search for ch1.
+      }
+    }
+    return NULL;
+  }
+
+ public:
+  bool string_match(const char* pattern,
+                    const char* string) {
+    return string_match(pattern, pattern + strlen(pattern),
+                        string, string + strlen(string));
+  }
+  bool string_match(const char* pattern, const char* pattern_end,
+                    const char* string, const char* string_end) {
+    const char* patp = pattern;
+    switch (_pattern_getc(patp, pattern_end)) {
+    case string_match_eos:
+      return false;  // Empty pattern is always false.
+    case string_match_star:
+      if (patp == pattern_end) {
+        return true;   // Lone star pattern is always true.
+      }
+      break;
+    }
+    patp = pattern;  // Reset after lookahead.
+    const char* matchp = string;  // NULL if failing
+    for (;;) {
+      int ch = _pattern_getc(patp, pattern_end);
+      switch (ch) {
+      case string_match_eos:
+      case string_match_comma:
+        // End of a list item; see if it's a match.
+        if (matchp == string_end) {
+          return true;
+        }
+        if (ch == string_match_comma) {
+          // Get ready to match the next item.
+          matchp = string;
+          continue;
+        }
+        return false;  // End of all items.
+
+      case string_match_star:
+        if (matchp != NULL) {
+          // Wildcard:  Parse out following anchor word and look for it.
+          const char* begp = patp;
+          const char* endp = patp;
+          int anchor_len = 0;
+          for (;;) {
+            // get as many following regular characters as possible
+            endp = patp;
+            ch = _pattern_getc(patp, pattern_end);
+            if (ch <= 0) {
+              break;
+            }
+            anchor_len += 1;
+          }
+          // Anchor word [begp..endp) does not contain ch, so back up.
+          // Now do an eager match to the anchor word, and commit to it.
+          patp = endp;
+          if (ch == string_match_eos ||
+              ch == string_match_comma) {
+            // Anchor word is at end of pattern, so treat it as a fixed pattern.
+            const char* limitp = string_end - anchor_len;
+            matchp = limitp;
+            patp = begp;
+            // Resume normal scanning at the only possible match position.
+            continue;
+          }
+          // Find a floating occurrence of the anchor and continue matching.
+          // Note:  This is greedy; there is no backtrack here.  Good enough.
+          matchp = skip_anchor_word(matchp, string_end, anchor_len, begp, endp);
+        }
+        continue;
+      }
+      // Normal character.
+      if (matchp != NULL) {
+        int mch = _string_getc(matchp, string_end);
+        if (mch != ch) {
+          matchp = NULL;
+        }
+      }
+    }
+  }
+};
+
+// Match a wildcarded class list to a proposed class name (in internal form).
+// Commas or newlines separate multiple possible matches; stars are shell-style wildcards.
+class ClassListMatcher : public StringMatcher {
+ public:
+  ClassListMatcher()
+    : StringMatcher(pattern_list_getc, class_name_getc)
+  { }
+
+ private:
+  static int pattern_list_getc(const char* &pattern_ptr,
+                               const char* pattern_end) {
+    if (pattern_ptr == pattern_end) {
+      return string_match_eos;
+    }
+    int ch = (unsigned char) *pattern_ptr++;
+    switch (ch) {
+    case ' ': case '\t': case '\n': case '\r':
+    case ',':
+      // End of list item.
+      for (;;) {
+        switch (*pattern_ptr) {
+        case ' ': case '\t': case '\n': case '\r':
+        case ',':
+          pattern_ptr += 1;  // Collapse multiple commas or spaces.
+          continue;
+        }
+        break;
+      }
+      return string_match_comma;
+
+    case '*':
+      // Wildcard, matching any number of chars.
+      while (*pattern_ptr == '*') {
+        pattern_ptr += 1;  // Collapse multiple stars.
+      }
+      return string_match_star;
+
+    case '.':
+      ch = '/';   // Look for internal form of package separator
+      break;
+
+    case '\\':
+      // Superquote in pattern escapes * , whitespace, and itself.
+      if (pattern_ptr < pattern_end) {
+        ch = (unsigned char) *pattern_ptr++;
+      }
+      break;
+    }
+
+    assert(ch > 0, "regular char only");
+    return ch;
+  }
+
+  static int class_name_getc(const char* &name_ptr,
+                             const char* name_end) {
+    if (name_ptr == name_end) {
+      return string_match_eos;
+    }
+    int ch = (unsigned char) *name_ptr++;
+    if (ch == '.') {
+      ch = '/';   // Normalize to internal form of package separator
+    }
+    return ch;  // plain character
+  }
+};
+
+bool StringUtils::class_list_match(const char* class_pattern_list,
+                                   const char* class_name) {
+  if (class_pattern_list == NULL || class_name == NULL || class_name[0] == '\0')
+    return false;
+  ClassListMatcher clm;
+  return clm.string_match(class_pattern_list, class_name);
+}
+
diff --git a/src/hotspot/share/utilities/stringUtils.hpp b/src/hotspot/share/utilities/stringUtils.hpp
index 372222d7c70..ca378407708 100644
--- a/src/hotspot/share/utilities/stringUtils.hpp
+++ b/src/hotspot/share/utilities/stringUtils.hpp
@@ -40,6 +40,10 @@ class StringUtils : AllStatic {
 
   // Compute string similarity based on Dice's coefficient
   static double similarity(const char* str1, size_t len1, const char* str2, size_t len2);
+
+  // Match a wildcarded class list to a proposed class name (in internal form).
+  // Commas separate multiple possible matches; stars are shell-style wildcards.
+  static bool class_list_match(const char* class_list, const char* class_name);
 };
 
 #endif // SHARE_UTILITIES_STRINGUTILS_HPP
diff --git a/src/java.base/share/classes/java/lang/NonTearable.java b/src/java.base/share/classes/java/lang/NonTearable.java
new file mode 100644
index 00000000000..a72c43b60ac
--- /dev/null
+++ b/src/java.base/share/classes/java/lang/NonTearable.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package java.lang;
+
+/**
+ * An inline class implements the {@code NonTearable} interface to
+ * request that the JVM take extra care to avoid structure tearing
+ * when loading or storing any value of the class to a field or array
+ * element.  Normally, only fields declared {@code volatile} are
+ * protected against structure tearing, but a class that implements
+ * this marker interface will never have its values torn, even when
+ * they are stored in array elements or in non-{@code volatile}
+ * fields, and even when multiple threads perform racing writes.
+ *
+ * <p> An inline instance of multiple components is said to be "torn"
+ * when two racing threads compete to write those components, and one
+ * thread writes some components while another thread writes other
+ * components, so a subsequent observer will read a hybrid composed,
+ * as if "out of thin air", of field values from both racing writes.
+ * Tearing can also occur when the effects of two non-racing writes
+ * are observed by a racing read.  In general, structure tearing
+ * requires a read and two writes (initialization counting as a write)
+ * of a multi-component value, with a race between any two of the
+ * accesses.  The effect can also be described as if the Java memory
+ * model break up inline instance reads and writes into reads and
+ * writes of their various fields, as it does with longs and doubles
+ * (JLS 17.7).
+ *
+ * <p> In extreme cases, the hybrid observed after structure tearing
+ * might be a value which is impossible to construct by normal means.
+ * If data integrity or security depends on proper construction,
+ * the class should be declared as implementing {@code NonTearable}.
+ *
+ * @author  John Rose
+ * @since   (valhalla)
+ */
+public interface NonTearable {
+    // TO DO: Finalize name.
+    // TO DO: Decide whether and how to restrict this type to to
+    // inline classes only, or if not, whether to document its
+    // non-effect on identity classes.
+}
diff --git a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestC2CCalls.java b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestC2CCalls.java
index d84c1221961..5e5d4865873 100644
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestC2CCalls.java
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestC2CCalls.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,32 +23,46 @@
 
 /**
  * @test
- * @library /test/lib
  * @summary Test value type calling convention with compiled to compiled calls.
- * @run main/othervm TestC2CCalls
- * @run main/othervm -XX:-UseBimorphicInlining -Xbatch
+ * @library /test/lib /test/lib /compiler/whitebox /
+ * @compile TestC2CCalls.java
+ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   TestC2CCalls
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:-UseBimorphicInlining -Xbatch
  *                   -XX:CompileCommand=compileonly,TestC2CCalls*::test*
  *                   -XX:CompileCommand=dontinline,TestC2CCalls*::test*
  *                   TestC2CCalls
- * @run main/othervm -XX:-UseBimorphicInlining -Xbatch -XX:-ProfileInterpreter
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:-UseBimorphicInlining -Xbatch -XX:-ProfileInterpreter
  *                   -XX:CompileCommand=compileonly,TestC2CCalls*::test*
  *                   -XX:CompileCommand=dontinline,TestC2CCalls*::test*
  *                   TestC2CCalls
- * @run main/othervm -XX:-UseBimorphicInlining -Xbatch
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:-UseBimorphicInlining -Xbatch
  *                   -XX:CompileCommand=compileonly,TestC2CCalls::test*
  *                   -XX:CompileCommand=dontinline,TestC2CCalls*::test*
  *                   TestC2CCalls
- * @run main/othervm -XX:-UseBimorphicInlining -Xbatch -XX:-ProfileInterpreter
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:-UseBimorphicInlining -Xbatch -XX:-ProfileInterpreter
  *                   -XX:CompileCommand=compileonly,TestC2CCalls::test*
  *                   -XX:CompileCommand=dontinline,TestC2CCalls*::test*
  *                   TestC2CCalls
  */
 
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.Collections;
+
 import jdk.test.lib.Asserts;
 import jdk.test.lib.Utils;
 
-public class TestC2CCalls {
+import sun.hotspot.WhiteBox;
 
+public class TestC2CCalls {
+    public static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
+    public static final int COMP_LEVEL_FULL_OPTIMIZATION = 4; // C2 or JVMCI
     public static final int rI = Utils.getRandomInstance().nextInt() % 1000;
 
     static inline class OtherVal {
@@ -466,6 +480,24 @@ public static int test21(MyInterface1 intf, MyValue4 v, int y) {
     }
 
     public static void main(String[] args) {
+        // Sometimes, exclude some methods from compilation with C2 to stress test the calling convention
+        if (Utils.getRandomInstance().nextBoolean()) {
+            ArrayList<Method> methods = new ArrayList<Method>();
+            Collections.addAll(methods, MyValue1.class.getDeclaredMethods());
+            Collections.addAll(methods, MyValue2.class.getDeclaredMethods());
+            Collections.addAll(methods, MyValue3.class.getDeclaredMethods());
+            Collections.addAll(methods, MyValue4.class.getDeclaredMethods());
+            Collections.addAll(methods, MyObject.class.getDeclaredMethods());
+            Collections.addAll(methods, TestC2CCalls.class.getDeclaredMethods());
+            System.out.println("Excluding methods from C2 compilation:");
+            for (Method m : methods) {
+                if (Utils.getRandomInstance().nextBoolean()) {
+                    System.out.println(m);
+                    WHITE_BOX.makeMethodNotCompilable(m, COMP_LEVEL_FULL_OPTIMIZATION, false);
+                }
+            }
+        }
+
         MyValue1 val1 = new MyValue1(rI);
         MyValue2 val2 = new MyValue2(rI+1);
         MyValue3 val3 = new MyValue3(rI+2);
diff --git a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestDeoptimizationWhenBuffering.java b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestDeoptimizationWhenBuffering.java
index 6231626cb9d..3681a48d380 100644
--- a/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestDeoptimizationWhenBuffering.java
+++ b/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestDeoptimizationWhenBuffering.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,34 +28,47 @@
 
 import jdk.test.lib.Asserts;
 
+import sun.hotspot.WhiteBox;
+
 /**
  * @test TestDeoptimizationWhenBuffering
  * @summary Test correct execution after deoptimizing from inline type specific runtime calls.
  * @library /testlibrary /test/lib /compiler/whitebox /
  * @compile -XDallowWithFieldOperator TestDeoptimizationWhenBuffering.java
- * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+DeoptimizeALot -XX:-UseTLAB -Xbatch
+ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:+DeoptimizeALot -XX:CompileCommand=dontinline,compiler.valhalla.valuetypes.*::test*
+ *                   compiler.valhalla.valuetypes.TestDeoptimizationWhenBuffering C1
+ * @run main/othervm -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:+DeoptimizeALot -XX:-UseTLAB -Xbatch
  *                   compiler.valhalla.valuetypes.TestDeoptimizationWhenBuffering
- * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+DeoptimizeALot -XX:-UseTLAB -Xbatch -XX:-MonomorphicArrayCheck -XX:-AlwaysIncrementalInline
+ * @run main/othervm -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:+DeoptimizeALot -XX:-UseTLAB -Xbatch -XX:-MonomorphicArrayCheck -XX:-AlwaysIncrementalInline
  *                   -XX:-ValueTypePassFieldsAsArgs -XX:-ValueTypeReturnedAsFields -XX:ValueArrayElemMaxFlatSize=1
  *                   -XX:CompileCommand=dontinline,compiler.valhalla.valuetypes.*::test*
  *                   compiler.valhalla.valuetypes.TestDeoptimizationWhenBuffering
- * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+DeoptimizeALot -XX:-UseTLAB -Xbatch -XX:-MonomorphicArrayCheck -XX:+AlwaysIncrementalInline
+ * @run main/othervm -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:+DeoptimizeALot -XX:-UseTLAB -Xbatch -XX:-MonomorphicArrayCheck -XX:+AlwaysIncrementalInline
  *                   -XX:-ValueTypePassFieldsAsArgs -XX:-ValueTypeReturnedAsFields -XX:ValueArrayElemMaxFlatSize=1
  *                   -XX:CompileCommand=dontinline,compiler.valhalla.valuetypes.*::test*
  *                   compiler.valhalla.valuetypes.TestDeoptimizationWhenBuffering
- * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+DeoptimizeALot -XX:-UseTLAB -Xbatch -XX:-MonomorphicArrayCheck -XX:-AlwaysIncrementalInline
+ * @run main/othervm -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:+DeoptimizeALot -XX:-UseTLAB -Xbatch -XX:-MonomorphicArrayCheck -XX:-AlwaysIncrementalInline
  *                   -XX:+ValueTypePassFieldsAsArgs -XX:+ValueTypeReturnedAsFields -XX:ValueArrayElemMaxFlatSize=-1
  *                   -XX:CompileCommand=dontinline,compiler.valhalla.valuetypes.*::test*
  *                   compiler.valhalla.valuetypes.TestDeoptimizationWhenBuffering
- * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+DeoptimizeALot -XX:-UseTLAB -Xbatch -XX:-MonomorphicArrayCheck -XX:+AlwaysIncrementalInline
+ * @run main/othervm -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:+DeoptimizeALot -XX:-UseTLAB -Xbatch -XX:-MonomorphicArrayCheck -XX:+AlwaysIncrementalInline
  *                   -XX:+ValueTypePassFieldsAsArgs -XX:+ValueTypeReturnedAsFields -XX:ValueArrayElemMaxFlatSize=-1
  *                   -XX:CompileCommand=dontinline,compiler.valhalla.valuetypes.*::test*
  *                   compiler.valhalla.valuetypes.TestDeoptimizationWhenBuffering
- * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+DeoptimizeALot -XX:-UseTLAB -Xbatch -XX:-MonomorphicArrayCheck -XX:-AlwaysIncrementalInline
+ * @run main/othervm -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:+DeoptimizeALot -XX:-UseTLAB -Xbatch -XX:-MonomorphicArrayCheck -XX:-AlwaysIncrementalInline
  *                   -XX:+ValueTypePassFieldsAsArgs -XX:+ValueTypeReturnedAsFields -XX:ValueArrayElemMaxFlatSize=-1 -XX:ValueFieldMaxFlatSize=0
  *                   -XX:CompileCommand=dontinline,compiler.valhalla.valuetypes.*::test*
  *                   compiler.valhalla.valuetypes.TestDeoptimizationWhenBuffering
- * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+DeoptimizeALot -XX:-UseTLAB -Xbatch -XX:-MonomorphicArrayCheck -XX:+AlwaysIncrementalInline
+ * @run main/othervm -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:+DeoptimizeALot -XX:-UseTLAB -Xbatch -XX:-MonomorphicArrayCheck -XX:+AlwaysIncrementalInline
  *                   -XX:+ValueTypePassFieldsAsArgs -XX:+ValueTypeReturnedAsFields -XX:ValueArrayElemMaxFlatSize=-1 -XX:ValueFieldMaxFlatSize=0
  *                   -XX:CompileCommand=dontinline,compiler.valhalla.valuetypes.*::test*
  *                   compiler.valhalla.valuetypes.TestDeoptimizationWhenBuffering
@@ -91,6 +104,9 @@ public MyValue2() {
 }
 
 public class TestDeoptimizationWhenBuffering {
+    static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
+    static final int COMP_LEVEL_FULL_OPTIMIZATION = 4; // C2 or JVMCI
+
     static {
         try {
             Class<?> clazz = TestDeoptimizationWhenBuffering.class;
@@ -116,7 +132,7 @@ MyValue1 test2() {
         return vtField1;
     }
 
-    int test3Callee(MyValue1 vt) {
+    public int test3Callee(MyValue1 vt) {
         return vt.hash();
     }
 
@@ -152,7 +168,7 @@ MyValue1? test8(MyValue1?[] obj) {
 
     static final MethodHandle test9_mh;
 
-    static MyValue1 test9Callee() {
+    public static MyValue1 test9Callee() {
         return new MyValue1();
     }
 
@@ -164,7 +180,7 @@ MyValue1 test9() throws Throwable {
     static final MyValue1 test10Field = new MyValue1();
     static int test10Counter = 0;
 
-    static MyValue1 test10Callee() {
+    public static MyValue1 test10Callee() {
         test10Counter++;
         return test10Field;
     }
@@ -185,6 +201,19 @@ MyValue1 test12() {
     }
 
     public static void main(String[] args) throws Throwable {
+        if (args.length > 0) {
+            // Compile callees with C1 only, to exercise deoptimization while buffering at method entry
+            Asserts.assertEQ(args[0], "C1", "unsupported mode");
+            Method m = MyValue1.class.getMethod("testWithField", int.class);
+            WHITE_BOX.makeMethodNotCompilable(m, COMP_LEVEL_FULL_OPTIMIZATION, false);
+            m = TestDeoptimizationWhenBuffering.class.getMethod("test3Callee", MyValue1.class);
+            WHITE_BOX.makeMethodNotCompilable(m, COMP_LEVEL_FULL_OPTIMIZATION, false);
+            m = TestDeoptimizationWhenBuffering.class.getMethod("test9Callee");
+            WHITE_BOX.makeMethodNotCompilable(m, COMP_LEVEL_FULL_OPTIMIZATION, false);
+            m = TestDeoptimizationWhenBuffering.class.getMethod("test10Callee");
+            WHITE_BOX.makeMethodNotCompilable(m, COMP_LEVEL_FULL_OPTIMIZATION, false);
+        }
+
         MyValue1[] va = new MyValue1[3];
         va[0] = new MyValue1();
         Object[] oa = new Object[3];
diff --git a/test/hotspot/jtreg/runtime/valhalla/valuetypes/FlattenableSemanticTest.java b/test/hotspot/jtreg/runtime/valhalla/valuetypes/FlattenableSemanticTest.java
index 3b2c211d179..d3b7fabf180 100644
--- a/test/hotspot/jtreg/runtime/valhalla/valuetypes/FlattenableSemanticTest.java
+++ b/test/hotspot/jtreg/runtime/valhalla/valuetypes/FlattenableSemanticTest.java
@@ -37,7 +37,9 @@
  * @compile -XDemitQtypes -XDenableValueTypes -XDallowWithFieldOperator Point.java JumboValue.java
  * @compile -XDemitQtypes -XDenableValueTypes -XDallowWithFieldOperator FlattenableSemanticTest.java
  * @run main/othervm -Xint -XX:ValueFieldMaxFlatSize=64 runtime.valhalla.valuetypes.FlattenableSemanticTest
+ * @run main/othervm -Xint -XX:+UnlockDiagnosticVMOptions -XX:ForceNonTearable=* runtime.valhalla.valuetypes.FlattenableSemanticTest
  * @run main/othervm -Xcomp -XX:ValueFieldMaxFlatSize=64 runtime.valhalla.valuetypes.FlattenableSemanticTest
+ * @run main/othervm -Xcomp -XX:+UnlockDiagnosticVMOptions -XX:ForceNonTearable=* runtime.valhalla.valuetypes.FlattenableSemanticTest
  * // debug: -XX:+PrintValueLayout -XX:-ShowMessageBoxOnError
  */
 public class FlattenableSemanticTest {
diff --git a/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTearing.java b/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTearing.java
new file mode 100644
index 00000000000..18008d4da95
--- /dev/null
+++ b/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTearing.java
@@ -0,0 +1,280 @@
+/*
+ * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package runtime.valhalla.valuetypes;
+
+import java.lang.reflect.Array;
+import java.lang.reflect.Field;
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Supplier;
+import java.util.Optional;
+
+import jdk.internal.misc.Unsafe;
+import sun.hotspot.WhiteBox;
+import static jdk.test.lib.Asserts.*;
+
+/*
+ * @test ValueTearing
+ * @summary Test tearing of inline fields and array elements
+ * @modules java.base/jdk.internal.misc
+ * @library /test/lib
+ * @compile ValueTearing.java
+ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xint  -XX:+UnlockDiagnosticVMOptions -XX:ForceNonTearable=
+ *                   -DSTEP_COUNT=10000
+ *                   -Xbootclasspath/a:. -XX:+WhiteBoxAPI
+ *                                   runtime.valhalla.valuetypes.ValueTearing
+ * @run main/othervm -Xint  -XX:+UnlockDiagnosticVMOptions -XX:ForceNonTearable=*
+ *                   -DSTEP_COUNT=10000
+ *                   -Xbootclasspath/a:. -XX:+WhiteBoxAPI
+ *                                   runtime.valhalla.valuetypes.ValueTearing
+ * @run main/othervm -Xbatch -DSTEP_COUNT=10000000
+ *                   -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                                   runtime.valhalla.valuetypes.ValueTearing
+ * @run main/othervm -Xbatch -XX:+UnlockDiagnosticVMOptions -XX:ForceNonTearable=
+ *                   -DTEAR_MODE=fieldonly
+ *                   -Xbootclasspath/a:. -XX:+WhiteBoxAPI
+ *                                   runtime.valhalla.valuetypes.ValueTearing
+ * @run main/othervm -Xbatch -XX:+UnlockDiagnosticVMOptions -XX:ForceNonTearable=
+ *                   -DTEAR_MODE=arrayonly
+ *                   -Xbootclasspath/a:. -XX:+WhiteBoxAPI
+ *                                   runtime.valhalla.valuetypes.ValueTearing
+ * @run main/othervm -Xbatch -XX:+UnlockDiagnosticVMOptions -XX:ForceNonTearable=*
+ *                   -DTEAR_MODE=both
+ *                   -Xbootclasspath/a:. -XX:+WhiteBoxAPI
+ *                                   runtime.valhalla.valuetypes.ValueTearing
+ */
+public class ValueTearing {
+    private static final Unsafe UNSAFE = Unsafe.getUnsafe();
+    private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
+    private static final boolean USE_COMPILER = WHITE_BOX.getBooleanVMFlag("UseCompiler");
+    private static final boolean ALWAYS_ATOMIC = WHITE_BOX.getStringVMFlag("ForceNonTearable").contains("*");
+    private static final String TEAR_MODE = System.getProperty("TEAR_MODE", "both");
+    private static final boolean TEAR_FIELD = !TEAR_MODE.equals("arrayonly");
+    private static final boolean TEAR_ARRAY = !TEAR_MODE.equals("fieldonly");
+    private static final int STEP_COUNT = Integer.getInteger("STEP_COUNT", 100_000);
+    private static final boolean TFIELD_FLAT, TARRAY_FLAT;
+    private static final boolean NTFIELD_FLAT, NTARRAY_FLAT;
+    static {
+        try {
+            Field TPB_field = TPointBox.class.getDeclaredField("field");
+            Field TPB_array = TPointBox.class.getDeclaredField("array");
+            Field NTPB_field = NTPointBox.class.getDeclaredField("field");
+            Field NTPB_array = NTPointBox.class.getDeclaredField("array");
+            TFIELD_FLAT = UNSAFE.isFlattened(TPB_field);
+            TARRAY_FLAT = UNSAFE.isFlattenedArray(TPB_array.getType());
+            NTFIELD_FLAT = UNSAFE.isFlattened(NTPB_field);
+            NTARRAY_FLAT = UNSAFE.isFlattenedArray(NTPB_array.getType());
+        } catch (ReflectiveOperationException ex) {
+            throw new AssertionError(ex);
+        }
+    }
+    private static final String SETTINGS =
+        String.format("USE_COMPILER=%s ALWAYS_ATOMIC=%s TEAR_MODE=%s STEP_COUNT=%s FLAT TF/TA=%s/%s NTF/NTA=%s/%s",
+                      USE_COMPILER, ALWAYS_ATOMIC, TEAR_MODE, STEP_COUNT,
+                      TFIELD_FLAT, TARRAY_FLAT, NTFIELD_FLAT, NTARRAY_FLAT);
+    private static final String NOTE_TORN_POINT = "Note: torn point";
+
+    public static void main(String[] args) throws Exception {
+        System.out.println(SETTINGS);
+        ValueTearing valueTearing = new ValueTearing();
+        valueTearing.run();
+        // Extra representation check:
+        assert(!NTFIELD_FLAT) : "NT field must be indirect not flat";
+        assert(!NTARRAY_FLAT) : "NT array must be indirect not flat";
+        if (ALWAYS_ATOMIC) {
+            assert(!TFIELD_FLAT) : "field must be indirect not flat";
+            assert(!TARRAY_FLAT) : "array must be indirect not flat";
+        }
+    }
+
+    // A normally tearable inline value.
+    static inline class TPoint {
+        TPoint(long x, long y) { this.x = x; this.y = y; }
+        final long x, y;
+        public String toString() { return String.format("(%d,%d)", x, y); }
+    }
+
+    static class TooTearable extends AssertionError {
+        final Object badPoint;
+        TooTearable(String msg, Object badPoint) {
+            super(msg);
+            this.badPoint = badPoint;
+        }
+    }
+
+    interface PointBox {
+        void step();    // mutate inline value state
+        void check();   // check sanity of inline value state
+    }
+
+    class TPointBox implements PointBox {
+        TPoint field;
+        TPoint[] array = new TPoint[1];
+        // Step the points forward by incrementing their components
+        // "simultaneously".  A racing thread will catch flaws in the
+        // simultaneity.
+        TPoint step(TPoint p) {
+            return new TPoint(p.x + 1, p.y + 1);
+        }
+        public @Override
+        void step() {
+            if (TEAR_FIELD) {
+                field = step(field);
+            }
+            if (TEAR_ARRAY) {
+                array[0] = step(array[0]);
+            }
+            check();
+        }
+        // Invariant:  The components of each point are "always" equal.
+        // As long as simultaneity is preserved, this is true.
+        public @Override
+        void check() {
+            if (TEAR_FIELD) {
+                check(field, "field");
+            }
+            if (TEAR_ARRAY) {
+                check(array[0], "array element");
+            }
+        }
+        void check(TPoint p, String where) {
+            if (p.x == p.y)  return;
+            String msg = String.format("%s %s in %s; settings = %s",
+                                       NOTE_TORN_POINT,
+                                       p, where, SETTINGS);
+            throw new TooTearable(msg, p);
+        }
+        public String toString() {
+            return String.format("TPB[%s, {%s}]", field, array[0]);
+        }
+    }
+
+    // Add an indirection, as an extra test.
+    interface NT extends NonTearable { }
+
+    // A hardened, non-tearable version of TPoint.
+    static inline class NTPoint implements NT {
+        NTPoint(long x, long y) { this.x = x; this.y = y; }
+        final long x, y;
+        public String toString() { return String.format("(%d,%d)", x, y); }
+    }
+
+    class NTPointBox implements PointBox {
+        NTPoint field;
+        NTPoint[] array = new NTPoint[1];
+        // Step the points forward by incrementing their components
+        // "simultaneously".  A racing thread will catch flaws in the
+        // simultaneity.
+        NTPoint step(NTPoint p) {
+            return new NTPoint(p.x + 1, p.y + 1);
+        }
+        public @Override
+        void step() {
+            field = step(field);
+            array[0] = step(array[0]);
+            check();
+        }
+        // Invariant:  The components of each point are "always" equal.
+        public @Override
+        void check() {
+            check(field, "field");
+            check(array[0], "array element");
+        }
+        void check(NTPoint p, String where) {
+            if (p.x == p.y)  return;
+            String msg = String.format("%s *NonTearable* %s in %s; settings = %s",
+                                       NOTE_TORN_POINT,
+                                       p, where, SETTINGS);
+            throw new TooTearable(msg, p);
+        }
+        public String toString() {
+            return String.format("NTPB[%s, {%s}]", field, array[0]);
+        }
+    }
+
+    class AsyncObserver extends Thread {
+        volatile boolean done;
+        long observationCount;
+        final PointBox pointBox;
+        volatile Object badPointObserved;
+        AsyncObserver(PointBox pointBox) {
+            this.pointBox = pointBox;
+        }
+        public void run() {
+            try {
+                while (!done) {
+                    observationCount++;
+                    pointBox.check();
+                }
+            } catch (TooTearable ex) {
+                done = true;
+                badPointObserved = ex.badPoint;
+                System.out.println(ex);
+                if (ALWAYS_ATOMIC || ex.badPoint instanceof NonTearable) {
+                    throw ex;
+                }
+            }
+        }
+    }
+
+    public void run() throws Exception {
+        System.out.println("Test for tearing of NTPoint, which must not happen...");
+        run(new NTPointBox(), false);
+        System.out.println("Test for tearing of TPoint, which "+
+                           (ALWAYS_ATOMIC ? "must not" : "is allowed to")+
+                           " happen...");
+        run(new TPointBox(), ALWAYS_ATOMIC ? false : true);
+    }
+    public void run(PointBox pointBox, boolean canTear) throws Exception {
+        var observer = new AsyncObserver(pointBox);
+        observer.start();
+        for (int i = 0; i < STEP_COUNT; i++) {
+            pointBox.step();
+            if (observer.done)  break;
+        }
+        observer.done = true;
+        observer.join();
+        var obCount = observer.observationCount;
+        var badPoint = observer.badPointObserved;
+        System.out.println(String.format("finished after %d observations at %s; %s",
+                                         obCount, pointBox,
+                                         (badPoint == null
+                                          ? "no tearing observed"
+                                          : "bad point = " + badPoint)));
+        if (canTear && badPoint == null) {
+            var complain = String.format("%s NOT observed after %d observations",
+                                         NOTE_TORN_POINT, obCount);
+            System.out.println("?????? "+complain);
+            if (STEP_COUNT >= 3_000_000) {
+                // If it's a small count, OK, but if it's big the test is broken.
+                throw new AssertionError(complain + ", but it should have been");
+            }
+        }
+        if (!canTear && badPoint != null) {
+            throw new AssertionError("should not reach here; other thread must throw");
+        }
+    }
+}
diff --git a/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypeArray.java b/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypeArray.java
index f381cfa7f17..3b0d553ec39 100644
--- a/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypeArray.java
+++ b/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypeArray.java
@@ -39,6 +39,7 @@
  * @run main/othervm -Xint  -XX:ValueArrayElemMaxFlatSize=0  runtime.valhalla.valuetypes.ValueTypeArray
  * @run main/othervm -Xcomp -XX:ValueArrayElemMaxFlatSize=-1 runtime.valhalla.valuetypes.ValueTypeArray
  * @run main/othervm -Xcomp -XX:ValueArrayElemMaxFlatSize=0  runtime.valhalla.valuetypes.ValueTypeArray
+ * @run main/othervm -Xbatch -XX:+UnlockDiagnosticVMOptions -XX:ForceNonTearable=* runtime.valhalla.valuetypes.ValueTypeArray
  */
 public class ValueTypeArray {
     public static void main(String[] args) {
diff --git a/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypeDensity.java b/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypeDensity.java
index a65f6eaa505..f6886a0313f 100644
--- a/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypeDensity.java
+++ b/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypeDensity.java
@@ -39,6 +39,9 @@
  * @run main/othervm -Xcomp -XX:ValueArrayElemMaxFlatSize=-1
  *                   -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+WhiteBoxAPI ValueTypeDensity
+ * @run main/othervm -Xbatch -XX:+UnlockDiagnosticVMOptions
+ *                   -Xbootclasspath/a:. -XX:ForceNonTearable=*
+ *                   -XX:+WhiteBoxAPI ValueTypeDensity
  */
 
 public class ValueTypeDensity {
diff --git a/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypesTest.java b/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypesTest.java
index d82753d430d..f60cf72dbc6 100644
--- a/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypesTest.java
+++ b/test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypesTest.java
@@ -62,6 +62,12 @@
  *                   -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -Djava.lang.invoke.MethodHandle.DUMP_CLASS_FILES=false
  *                   runtime.valhalla.valuetypes.ValueTypesTest
+ * @run main/othervm -Xbatch -Xmx128m -XX:-ShowMessageBoxOnError
+ *                   -XX:+ExplicitGCInvokesConcurrent
+ *                   -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
+ *                   -Djava.lang.invoke.MethodHandle.DUMP_CLASS_FILES=false
+ *                   -XX:ForceNonTearable=*
+ *                   runtime.valhalla.valuetypes.ValueTypesTest
  */
 public class ValueTypesTest {
 
diff --git a/test/micro/org/openjdk/bench/valhalla/corelibs/InlineCursor.java b/test/micro/org/openjdk/bench/valhalla/corelibs/InlineCursor.java
new file mode 100644
index 00000000000..1af8e5f228c
--- /dev/null
+++ b/test/micro/org/openjdk/bench/valhalla/corelibs/InlineCursor.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
+ */
+
+package org.openjdk.bench.valhalla.corelibs;
+
+import java.util.ConcurrentModificationException;
+import java.util.NoSuchElementException;
+
+/**
+ * An inline cursor is a reference to an existing or non-existent element
+ * of a collection.
+ * <p>
+ * Cursor values are immutable, the reference to an element
+ * does not change but the state of the collection can change
+ * so the element is no longer accessible.
+ * Calling {@link #get()} throws a {@link ConcurrentModificationException}.
+ * Iterating through a Collection proceeds by creating new Cursor
+ * from the Collection or advancing to the next or retreating to previous elements.
+ * Advancing past the end of the Collection or retreating before the beginning
+ * results in Cursor values that are non-existent.
+ * A Cursor for an empty Collection does not refer to an element and
+ * throws {@link NoSuchElementException}.
+ * Modifications to the Collection invalidate every Cursor that was created
+ * before the modification.
+ * The typical traversal pattern is:
+ * <pre>{@code
+ *  Collection<T> c = ...;
+ *  for (var cursor = c.cursor(); cursor.exists(); cursor = cursor.advance()) {
+ *      var el = cursor.get();
+ *  }
+ * }
+ * </pre>
+ * <p>
+ * Cursors can be used to {@link #remove()} remove an element from the collection.
+ * Removing an element modifies the collection making that cursor invalid.
+ * The cursor returned from the {@link #remove()} method is a placeholder
+ * for the position, the element occupied, between the next and previous elements.
+ * It can be moved to the next or previous element to continue the iteration.
+ * <p>
+ * The typical traversal and remove pattern follows; when an element is
+ * removed, the cursor returned from the remove is used to continue the iteration:
+ * <pre>{@code
+ *  Collection<T> c = ...;
+ *  for (var cursor = c.cursor(); cursor.exists(); cursor = cursor.advance()) {
+ *      var el = cursor.get();
+ *      if (el.equals(...)) {
+ *          cursor = cursor.remove();
+ *      }
+ *  }
+ * }
+ * </pre>
+ * <p>
+ * @param <T> the type of the element.
+ */
+public interface InlineCursor<T> {
+    /**
+     * Return true if the Cursor refers to an element.
+     *
+     * If the collection has been modified since the Cursor was created
+     * the element can not be known to exist.
+     * This method does not throw {@link ConcurrentModificationException}
+     * if the collection has been modified but returns false.
+     *
+     * @return  true if this Cursor refers to an element in the collection and
+     *          the collection has not been modified since the cursor was created;
+     *          false otherwise
+     */
+    boolean exists();
+
+    /**
+     * Return a Cursor for the next element after the current element.
+     * If there is no element following this element the returned
+     * Cursor will be non-existent. To wit: {@code Cursor.exists() == false}.
+     *
+     * @return return a cursor for the next element after this element
+     * @throws ConcurrentModificationException if the collection
+     *         has been modified since this Cursor was created
+     */
+    InlineCursor<T> advance();
+
+    /**
+     * Return the current element referred to by the Cursor.
+     *
+     * The behavior must be consistent with {@link #exists()}
+     * as long as the collection has not been modified.
+     *
+     * @return  return the element in the collection if the collection
+     *          has not been modified since the cursor was created
+     * @throws NoSuchElementException if the referenced element does not exist
+     *         or no longer exists
+     * @throws ConcurrentModificationException if the collection
+     *         has been modified since this Cursor was created
+     */
+    T get();
+}
diff --git a/test/micro/org/openjdk/bench/valhalla/corelibs/XArrayList.java b/test/micro/org/openjdk/bench/valhalla/corelibs/XArrayList.java
new file mode 100644
index 00000000000..041f311f579
--- /dev/null
+++ b/test/micro/org/openjdk/bench/valhalla/corelibs/XArrayList.java
@@ -0,0 +1,1906 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package org.openjdk.bench.valhalla.corelibs;
+
+import java.util.AbstractList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.ConcurrentModificationException;
+import java.util.Iterator;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.NoSuchElementException;
+import java.util.Objects;
+import java.util.RandomAccess;
+import java.util.Spliterator;
+
+import java.util.function.Consumer;
+import java.util.function.Predicate;
+import java.util.function.UnaryOperator;
+//import jdk.internal.access.SharedSecrets;
+//import jdk.internal.util.ArraysSupport;
+
+/**
+ * Resizable-array implementation of the {@code List} interface.  Implements
+ * all optional list operations, and permits all elements, including
+ * {@code null}.  In addition to implementing the {@code List} interface,
+ * this class provides methods to manipulate the size of the array that is
+ * used internally to store the list.  (This class is roughly equivalent to
+ * {@code Vector}, except that it is unsynchronized.)
+ *
+ * <p>The {@code size}, {@code isEmpty}, {@code get}, {@code set},
+ * {@code iterator}, and {@code listIterator} operations run in constant
+ * time.  The {@code add} operation runs in <i>amortized constant time</i>,
+ * that is, adding n elements requires O(n) time.  All of the other operations
+ * run in linear time (roughly speaking).  The constant factor is low compared
+ * to that for the {@code LinkedList} implementation.
+ *
+ * <p>Each {@code XArrayList} instance has a <i>capacity</i>.  The capacity is
+ * the size of the array used to store the elements in the list.  It is always
+ * at least as large as the list size.  As elements are added to an XArrayList,
+ * its capacity grows automatically.  The details of the growth policy are not
+ * specified beyond the fact that adding an element has constant amortized
+ * time cost.
+ *
+ * <p>An application can increase the capacity of an {@code XArrayList} instance
+ * before adding a large number of elements using the {@code ensureCapacity}
+ * operation.  This may reduce the amount of incremental reallocation.
+ *
+ * <p><strong>Note that this implementation is not synchronized.</strong>
+ * If multiple threads access an {@code XArrayList} instance concurrently,
+ * and at least one of the threads modifies the list structurally, it
+ * <i>must</i> be synchronized externally.  (A structural modification is
+ * any operation that adds or deletes one or more elements, or explicitly
+ * resizes the backing array; merely setting the value of an element is not
+ * a structural modification.)  This is typically accomplished by
+ * synchronizing on some object that naturally encapsulates the list.
+ *
+ * If no such object exists, the list should be "wrapped" using the
+ * {@link Collections#synchronizedList Collections.synchronizedList}
+ * method.  This is best done at creation time, to prevent accidental
+ * unsynchronized access to the list:<pre>
+ *   List list = Collections.synchronizedList(new XArrayList(...));</pre>
+ *
+ * <p id="fail-fast">
+ * The iterators returned by this class's {@link #iterator() iterator} and
+ * {@link #listIterator(int) listIterator} methods are <em>fail-fast</em>:
+ * if the list is structurally modified at any time after the iterator is
+ * created, in any way except through the iterator's own
+ * {@link ListIterator#remove() remove} or
+ * {@link ListIterator#add(Object) add} methods, the iterator will throw a
+ * {@link ConcurrentModificationException}.  Thus, in the face of
+ * concurrent modification, the iterator fails quickly and cleanly, rather
+ * than risking arbitrary, non-deterministic behavior at an undetermined
+ * time in the future.
+ *
+ * <p>Note that the fail-fast behavior of an iterator cannot be guaranteed
+ * as it is, generally speaking, impossible to make any hard guarantees in the
+ * presence of unsynchronized concurrent modification.  Fail-fast iterators
+ * throw {@code ConcurrentModificationException} on a best-effort basis.
+ * Therefore, it would be wrong to write a program that depended on this
+ * exception for its correctness:  <i>the fail-fast behavior of iterators
+ * should be used only to detect bugs.</i>
+ *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
+ * @param <E> the type of elements in this list
+ *
+ * @author  Josh Bloch
+ * @author  Neal Gafter
+ * @see     Collection
+ * @see     List
+ * @see     LinkedList
+ * @see     Vector
+ * @since   1.2
+ */
+public class XArrayList<E> extends AbstractList<E>
+        implements List<E>, RandomAccess, Cloneable, java.io.Serializable
+{
+    private static final long serialVersionUID = 8683452581122892189L;
+
+    /**
+     * Default initial capacity.
+     */
+    private static final int DEFAULT_CAPACITY = 10;
+
+    /**
+     * Shared empty array instance used for empty instances.
+     */
+    private static final Object[] EMPTY_ELEMENTDATA = {};
+
+    /**
+     * Shared empty array instance used for default sized empty instances. We
+     * distinguish this from EMPTY_ELEMENTDATA to know how much to inflate when
+     * first element is added.
+     */
+    private static final Object[] DEFAULTCAPACITY_EMPTY_ELEMENTDATA = {};
+
+    /**
+     * The array buffer into which the elements of the ArrayList are stored.
+     * The capacity of the ArrayList is the length of this array buffer. Any
+     * empty ArrayList with elementData == DEFAULTCAPACITY_EMPTY_ELEMENTDATA
+     * will be expanded to DEFAULT_CAPACITY when the first element is added.
+     */
+    transient Object[] elementData; // non-private to simplify nested class access
+
+    /**
+     * The size of the ArrayList (the number of elements it contains).
+     *
+     * @serial
+     */
+    int size;
+
+    /**
+     * Constructs an empty list with the specified initial capacity.
+     *
+     * @param  initialCapacity  the initial capacity of the list
+     * @throws IllegalArgumentException if the specified initial capacity
+     *         is negative
+     */
+    public XArrayList(int initialCapacity) {
+        if (initialCapacity > 0) {
+            this.elementData = new Object[initialCapacity];
+        } else if (initialCapacity == 0) {
+            this.elementData = EMPTY_ELEMENTDATA;
+        } else {
+            throw new IllegalArgumentException("Illegal Capacity: "+
+                                               initialCapacity);
+        }
+    }
+
+    /**
+     * Constructs an empty list with an initial capacity of ten.
+     */
+    public XArrayList() {
+        this.elementData = DEFAULTCAPACITY_EMPTY_ELEMENTDATA;
+    }
+
+    /**
+     * Constructs a list containing the elements of the specified
+     * collection, in the order they are returned by the collection's
+     * iterator.
+     *
+     * @param c the collection whose elements are to be placed into this list
+     * @throws NullPointerException if the specified collection is null
+     */
+    public XArrayList(Collection<? extends E> c) {
+        elementData = c.toArray();
+        if ((size = elementData.length) != 0) {
+            // defend against c.toArray (incorrectly) not returning Object[]
+            // (see e.g. https://bugs.openjdk.java.net/browse/JDK-6260652)
+            if (elementData.getClass() != Object[].class)
+                elementData = Arrays.copyOf(elementData, size, Object[].class);
+        } else {
+            // replace with empty array.
+            this.elementData = EMPTY_ELEMENTDATA;
+        }
+    }
+
+    /**
+     * Trims the capacity of this {@code XArrayList} instance to be the
+     * list's current size.  An application can use this operation to minimize
+     * the storage of an {@code XArrayList} instance.
+     */
+    public void trimToSize() {
+        modCount++;
+        if (size < elementData.length) {
+            elementData = (size == 0)
+              ? EMPTY_ELEMENTDATA
+              : Arrays.copyOf(elementData, size);
+        }
+    }
+
+    /**
+     * Increases the capacity of this {@code XArrayList} instance, if
+     * necessary, to ensure that it can hold at least the number of elements
+     * specified by the minimum capacity argument.
+     *
+     * @param minCapacity the desired minimum capacity
+     */
+    public void ensureCapacity(int minCapacity) {
+        if (minCapacity > elementData.length
+            && !(elementData == DEFAULTCAPACITY_EMPTY_ELEMENTDATA
+                 && minCapacity <= DEFAULT_CAPACITY)) {
+            modCount++;
+            grow(minCapacity);
+        }
+    }
+
+    /**
+     * Increases the capacity to ensure that it can hold at least the
+     * number of elements specified by the minimum capacity argument.
+     *
+     * @param minCapacity the desired minimum capacity
+     * @throws OutOfMemoryError if minCapacity is less than zero
+     */
+    private Object[] grow(int minCapacity) {
+        int oldCapacity = elementData.length;
+        if (oldCapacity > 0 || elementData != DEFAULTCAPACITY_EMPTY_ELEMENTDATA) {
+            int newCapacity = newLength(oldCapacity,
+                    minCapacity - oldCapacity, /* minimum growth */
+                    oldCapacity >> 1           /* preferred growth */);
+            return elementData = Arrays.copyOf(elementData, newCapacity);
+        } else {
+            return elementData = new Object[Math.max(DEFAULT_CAPACITY, minCapacity)];
+        }
+    }
+
+    private Object[] grow() {
+        return grow(size + 1);
+    }
+
+    /**
+     * Returns the number of elements in this list.
+     *
+     * @return the number of elements in this list
+     */
+    public int size() {
+        return size;
+    }
+
+    /**
+     * Returns {@code true} if this list contains no elements.
+     *
+     * @return {@code true} if this list contains no elements
+     */
+    public boolean isEmpty() {
+        return size == 0;
+    }
+
+    /**
+     * Returns {@code true} if this list contains the specified element.
+     * More formally, returns {@code true} if and only if this list contains
+     * at least one element {@code e} such that
+     * {@code Objects.equals(o, e)}.
+     *
+     * @param o element whose presence in this list is to be tested
+     * @return {@code true} if this list contains the specified element
+     */
+    public boolean contains(Object o) {
+        return indexOf(o) >= 0;
+    }
+
+    /**
+     * Returns the index of the first occurrence of the specified element
+     * in this list, or -1 if this list does not contain the element.
+     * More formally, returns the lowest index {@code i} such that
+     * {@code Objects.equals(o, get(i))},
+     * or -1 if there is no such index.
+     */
+    public int indexOf(Object o) {
+        return indexOfRange(o, 0, size);
+    }
+
+    int indexOfRange(Object o, int start, int end) {
+        Object[] es = elementData;
+        if (o == null) {
+            for (int i = start; i < end; i++) {
+                if (es[i] == null) {
+                    return i;
+                }
+            }
+        } else {
+            for (int i = start; i < end; i++) {
+                if (o.equals(es[i])) {
+                    return i;
+                }
+            }
+        }
+        return -1;
+    }
+
+    /**
+     * Returns the index of the last occurrence of the specified element
+     * in this list, or -1 if this list does not contain the element.
+     * More formally, returns the highest index {@code i} such that
+     * {@code Objects.equals(o, get(i))},
+     * or -1 if there is no such index.
+     */
+    public int lastIndexOf(Object o) {
+        return lastIndexOfRange(o, 0, size);
+    }
+
+    int lastIndexOfRange(Object o, int start, int end) {
+        Object[] es = elementData;
+        if (o == null) {
+            for (int i = end - 1; i >= start; i--) {
+                if (es[i] == null) {
+                    return i;
+                }
+            }
+        } else {
+            for (int i = end - 1; i >= start; i--) {
+                if (o.equals(es[i])) {
+                    return i;
+                }
+            }
+        }
+        return -1;
+    }
+
+    /**
+     * Returns a shallow copy of this {@code XArrayList} instance.  (The
+     * elements themselves are not copied.)
+     *
+     * @return a clone of this {@code XArrayList} instance
+     */
+    public Object clone() {
+        try {
+            XArrayList<?> v = (XArrayList<?>) super.clone();
+            v.elementData = Arrays.copyOf(elementData, size);
+            v.modCount = 0;
+            return v;
+        } catch (CloneNotSupportedException e) {
+            // this shouldn't happen, since we are Cloneable
+            throw new InternalError(e);
+        }
+    }
+
+    /**
+     * Returns an array containing all of the elements in this list
+     * in proper sequence (from first to last element).
+     *
+     * <p>The returned array will be "safe" in that no references to it are
+     * maintained by this list.  (In other words, this method must allocate
+     * a new array).  The caller is thus free to modify the returned array.
+     *
+     * <p>This method acts as bridge between array-based and collection-based
+     * APIs.
+     *
+     * @return an array containing all of the elements in this list in
+     *         proper sequence
+     */
+    public Object[] toArray() {
+        return Arrays.copyOf(elementData, size);
+    }
+
+    /**
+     * Returns an array containing all of the elements in this list in proper
+     * sequence (from first to last element); the runtime type of the returned
+     * array is that of the specified array.  If the list fits in the
+     * specified array, it is returned therein.  Otherwise, a new array is
+     * allocated with the runtime type of the specified array and the size of
+     * this list.
+     *
+     * <p>If the list fits in the specified array with room to spare
+     * (i.e., the array has more elements than the list), the element in
+     * the array immediately following the end of the collection is set to
+     * {@code null}.  (This is useful in determining the length of the
+     * list <i>only</i> if the caller knows that the list does not contain
+     * any null elements.)
+     *
+     * @param a the array into which the elements of the list are to
+     *          be stored, if it is big enough; otherwise, a new array of the
+     *          same runtime type is allocated for this purpose.
+     * @return an array containing the elements of the list
+     * @throws ArrayStoreException if the runtime type of the specified array
+     *         is not a supertype of the runtime type of every element in
+     *         this list
+     * @throws NullPointerException if the specified array is null
+     */
+    @SuppressWarnings("unchecked")
+    public <T> T[] toArray(T[] a) {
+        if (a.length < size)
+            // Make a new array of a's runtime type, but my contents:
+            return (T[]) Arrays.copyOf(elementData, size, a.getClass());
+        System.arraycopy(elementData, 0, a, 0, size);
+        if (a.length > size)
+            a[size] = null;
+        return a;
+    }
+
+    // Positional Access Operations
+
+    @SuppressWarnings("unchecked")
+    E elementData(int index) {
+        return (E) elementData[index];
+    }
+
+    @SuppressWarnings("unchecked")
+    static <E> E elementAt(Object[] es, int index) {
+        return (E) es[index];
+    }
+
+    /**
+     * Returns the element at the specified position in this list.
+     *
+     * @param  index index of the element to return
+     * @return the element at the specified position in this list
+     * @throws IndexOutOfBoundsException {@inheritDoc}
+     */
+    public E get(int index) {
+        Objects.checkIndex(index, size);
+        return elementData(index);
+    }
+
+    /**
+     * Replaces the element at the specified position in this list with
+     * the specified element.
+     *
+     * @param index index of the element to replace
+     * @param element element to be stored at the specified position
+     * @return the element previously at the specified position
+     * @throws IndexOutOfBoundsException {@inheritDoc}
+     */
+    public E set(int index, E element) {
+        Objects.checkIndex(index, size);
+        E oldValue = elementData(index);
+        elementData[index] = element;
+        return oldValue;
+    }
+
+    /**
+     * This helper method split out from add(E) to keep method
+     * bytecode size under 35 (the -XX:MaxInlineSize default value),
+     * which helps when add(E) is called in a C1-compiled loop.
+     */
+    private void add(E e, Object[] elementData, int s) {
+        if (s == elementData.length)
+            elementData = grow();
+        elementData[s] = e;
+        size = s + 1;
+    }
+
+    /**
+     * Appends the specified element to the end of this list.
+     *
+     * @param e element to be appended to this list
+     * @return {@code true} (as specified by {@link Collection#add})
+     */
+    public boolean add(E e) {
+        modCount++;
+        add(e, elementData, size);
+        return true;
+    }
+
+    /**
+     * Inserts the specified element at the specified position in this
+     * list. Shifts the element currently at that position (if any) and
+     * any subsequent elements to the right (adds one to their indices).
+     *
+     * @param index index at which the specified element is to be inserted
+     * @param element element to be inserted
+     * @throws IndexOutOfBoundsException {@inheritDoc}
+     */
+    public void add(int index, E element) {
+        rangeCheckForAdd(index);
+        modCount++;
+        final int s;
+        Object[] elementData;
+        if ((s = size) == (elementData = this.elementData).length)
+            elementData = grow();
+        System.arraycopy(elementData, index,
+                         elementData, index + 1,
+                         s - index);
+        elementData[index] = element;
+        size = s + 1;
+    }
+
+    /**
+     * Removes the element at the specified position in this list.
+     * Shifts any subsequent elements to the left (subtracts one from their
+     * indices).
+     *
+     * @param index the index of the element to be removed
+     * @return the element that was removed from the list
+     * @throws IndexOutOfBoundsException {@inheritDoc}
+     */
+    public E remove(int index) {
+        Objects.checkIndex(index, size);
+        final Object[] es = elementData;
+
+        @SuppressWarnings("unchecked") E oldValue = (E) es[index];
+        fastRemove(es, index);
+
+        return oldValue;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public boolean equals(Object o) {
+        if (o == this) {
+            return true;
+        }
+
+        if (!(o instanceof List)) {
+            return false;
+        }
+
+        final int expectedModCount = modCount;
+        // XArrayList can be subclassed and given arbitrary behavior, but we can
+        // still deal with the common case where o is XArrayList precisely
+        boolean equal = (o.getClass() == XArrayList.class)
+            ? equalsArrayList((XArrayList<?>) o)
+            : equalsRange((List<?>) o, 0, size);
+
+        checkForComodification(expectedModCount);
+        return equal;
+    }
+
+    boolean equalsRange(List<?> other, int from, int to) {
+        final Object[] es = elementData;
+        if (to > es.length) {
+            throw new ConcurrentModificationException();
+        }
+        var oit = other.iterator();
+        for (; from < to; from++) {
+            if (!oit.hasNext() || !Objects.equals(es[from], oit.next())) {
+                return false;
+            }
+        }
+        return !oit.hasNext();
+    }
+
+    private boolean equalsArrayList(XArrayList<?> other) {
+        final int otherModCount = other.modCount;
+        final int s = size;
+        boolean equal;
+        if (equal = (s == other.size)) {
+            final Object[] otherEs = other.elementData;
+            final Object[] es = elementData;
+            if (s > es.length || s > otherEs.length) {
+                throw new ConcurrentModificationException();
+            }
+            for (int i = 0; i < s; i++) {
+                if (!Objects.equals(es[i], otherEs[i])) {
+                    equal = false;
+                    break;
+                }
+            }
+        }
+        other.checkForComodification(otherModCount);
+        return equal;
+    }
+
+    private void checkForComodification(final int expectedModCount) {
+        if (modCount != expectedModCount) {
+            throw new ConcurrentModificationException();
+        }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public int hashCode() {
+        int expectedModCount = modCount;
+        int hash = hashCodeRange(0, size);
+        checkForComodification(expectedModCount);
+        return hash;
+    }
+
+    int hashCodeRange(int from, int to) {
+        final Object[] es = elementData;
+        if (to > es.length) {
+            throw new ConcurrentModificationException();
+        }
+        int hashCode = 1;
+        for (int i = from; i < to; i++) {
+            Object e = es[i];
+            hashCode = 31 * hashCode + (e == null ? 0 : e.hashCode());
+        }
+        return hashCode;
+    }
+
+    /**
+     * Removes the first occurrence of the specified element from this list,
+     * if it is present.  If the list does not contain the element, it is
+     * unchanged.  More formally, removes the element with the lowest index
+     * {@code i} such that
+     * {@code Objects.equals(o, get(i))}
+     * (if such an element exists).  Returns {@code true} if this list
+     * contained the specified element (or equivalently, if this list
+     * changed as a result of the call).
+     *
+     * @param o element to be removed from this list, if present
+     * @return {@code true} if this list contained the specified element
+     */
+    public boolean remove(Object o) {
+        final Object[] es = elementData;
+        final int size = this.size;
+        int i = 0;
+        found: {
+            if (o == null) {
+                for (; i < size; i++)
+                    if (es[i] == null)
+                        break found;
+            } else {
+                for (; i < size; i++)
+                    if (o.equals(es[i]))
+                        break found;
+            }
+            return false;
+        }
+        fastRemove(es, i);
+        return true;
+    }
+
+    /**
+     * Private remove method that skips bounds checking and does not
+     * return the value removed.
+     */
+    private void fastRemove(Object[] es, int i) {
+        modCount++;
+        final int newSize;
+        if ((newSize = size - 1) > i)
+            System.arraycopy(es, i + 1, es, i, newSize - i);
+        es[size = newSize] = null;
+    }
+
+    /**
+     * Removes all of the elements from this list.  The list will
+     * be empty after this call returns.
+     */
+    public void clear() {
+        modCount++;
+        final Object[] es = elementData;
+        for (int to = size, i = size = 0; i < to; i++)
+            es[i] = null;
+    }
+
+    /**
+     * Appends all of the elements in the specified collection to the end of
+     * this list, in the order that they are returned by the
+     * specified collection's Iterator.  The behavior of this operation is
+     * undefined if the specified collection is modified while the operation
+     * is in progress.  (This implies that the behavior of this call is
+     * undefined if the specified collection is this list, and this
+     * list is nonempty.)
+     *
+     * @param c collection containing elements to be added to this list
+     * @return {@code true} if this list changed as a result of the call
+     * @throws NullPointerException if the specified collection is null
+     */
+    public boolean addAll(Collection<? extends E> c) {
+        Object[] a = c.toArray();
+        modCount++;
+        int numNew = a.length;
+        if (numNew == 0)
+            return false;
+        Object[] elementData;
+        final int s;
+        if (numNew > (elementData = this.elementData).length - (s = size))
+            elementData = grow(s + numNew);
+        System.arraycopy(a, 0, elementData, s, numNew);
+        size = s + numNew;
+        return true;
+    }
+
+    /**
+     * Inserts all of the elements in the specified collection into this
+     * list, starting at the specified position.  Shifts the element
+     * currently at that position (if any) and any subsequent elements to
+     * the right (increases their indices).  The new elements will appear
+     * in the list in the order that they are returned by the
+     * specified collection's iterator.
+     *
+     * @param index index at which to insert the first element from the
+     *              specified collection
+     * @param c collection containing elements to be added to this list
+     * @return {@code true} if this list changed as a result of the call
+     * @throws IndexOutOfBoundsException {@inheritDoc}
+     * @throws NullPointerException if the specified collection is null
+     */
+    public boolean addAll(int index, Collection<? extends E> c) {
+        rangeCheckForAdd(index);
+
+        Object[] a = c.toArray();
+        modCount++;
+        int numNew = a.length;
+        if (numNew == 0)
+            return false;
+        Object[] elementData;
+        final int s;
+        if (numNew > (elementData = this.elementData).length - (s = size))
+            elementData = grow(s + numNew);
+
+        int numMoved = s - index;
+        if (numMoved > 0)
+            System.arraycopy(elementData, index,
+                             elementData, index + numNew,
+                             numMoved);
+        System.arraycopy(a, 0, elementData, index, numNew);
+        size = s + numNew;
+        return true;
+    }
+
+    /**
+     * Removes from this list all of the elements whose index is between
+     * {@code fromIndex}, inclusive, and {@code toIndex}, exclusive.
+     * Shifts any succeeding elements to the left (reduces their index).
+     * This call shortens the list by {@code (toIndex - fromIndex)} elements.
+     * (If {@code toIndex==fromIndex}, this operation has no effect.)
+     *
+     * @throws IndexOutOfBoundsException if {@code fromIndex} or
+     *         {@code toIndex} is out of range
+     *         ({@code fromIndex < 0 ||
+     *          toIndex > size() ||
+     *          toIndex < fromIndex})
+     */
+    protected void removeRange(int fromIndex, int toIndex) {
+        if (fromIndex > toIndex) {
+            throw new IndexOutOfBoundsException(
+                    outOfBoundsMsg(fromIndex, toIndex));
+        }
+        modCount++;
+        shiftTailOverGap(elementData, fromIndex, toIndex);
+    }
+
+    /** Erases the gap from lo to hi, by sliding down following elements. */
+    private void shiftTailOverGap(Object[] es, int lo, int hi) {
+        System.arraycopy(es, hi, es, lo, size - hi);
+        for (int to = size, i = (size -= hi - lo); i < to; i++)
+            es[i] = null;
+    }
+
+    /**
+     * A version of rangeCheck used by add and addAll.
+     */
+    private void rangeCheckForAdd(int index) {
+        if (index > size || index < 0)
+            throw new IndexOutOfBoundsException(outOfBoundsMsg(index));
+    }
+
+    /**
+     * Constructs an IndexOutOfBoundsException detail message.
+     * Of the many possible refactorings of the error handling code,
+     * this "outlining" performs best with both server and client VMs.
+     */
+    private String outOfBoundsMsg(int index) {
+        return "Index: "+index+", Size: "+size;
+    }
+
+    /**
+     * A version used in checking (fromIndex > toIndex) condition
+     */
+    private static String outOfBoundsMsg(int fromIndex, int toIndex) {
+        return "From Index: " + fromIndex + " > To Index: " + toIndex;
+    }
+
+    /**
+     * Removes from this list all of its elements that are contained in the
+     * specified collection.
+     *
+     * @param c collection containing elements to be removed from this list
+     * @return {@code true} if this list changed as a result of the call
+     * @throws ClassCastException if the class of an element of this list
+     *         is incompatible with the specified collection
+     * (<a href="Collection.html#optional-restrictions">optional</a>)
+     * @throws NullPointerException if this list contains a null element and the
+     *         specified collection does not permit null elements
+     * (<a href="Collection.html#optional-restrictions">optional</a>),
+     *         or if the specified collection is null
+     * @see Collection#contains(Object)
+     */
+    public boolean removeAll(Collection<?> c) {
+        return batchRemove(c, false, 0, size);
+    }
+
+    /**
+     * Retains only the elements in this list that are contained in the
+     * specified collection.  In other words, removes from this list all
+     * of its elements that are not contained in the specified collection.
+     *
+     * @param c collection containing elements to be retained in this list
+     * @return {@code true} if this list changed as a result of the call
+     * @throws ClassCastException if the class of an element of this list
+     *         is incompatible with the specified collection
+     * (<a href="Collection.html#optional-restrictions">optional</a>)
+     * @throws NullPointerException if this list contains a null element and the
+     *         specified collection does not permit null elements
+     * (<a href="Collection.html#optional-restrictions">optional</a>),
+     *         or if the specified collection is null
+     * @see Collection#contains(Object)
+     */
+    public boolean retainAll(Collection<?> c) {
+        return batchRemove(c, true, 0, size);
+    }
+
+    boolean batchRemove(Collection<?> c, boolean complement,
+                        final int from, final int end) {
+        Objects.requireNonNull(c);
+        final Object[] es = elementData;
+        int r;
+        // Optimize for initial run of survivors
+        for (r = from;; r++) {
+            if (r == end)
+                return false;
+            if (c.contains(es[r]) != complement)
+                break;
+        }
+        int w = r++;
+        try {
+            for (Object e; r < end; r++)
+                if (c.contains(e = es[r]) == complement)
+                    es[w++] = e;
+        } catch (Throwable ex) {
+            // Preserve behavioral compatibility with AbstractCollection,
+            // even if c.contains() throws.
+            System.arraycopy(es, r, es, w, end - r);
+            w += end - r;
+            throw ex;
+        } finally {
+            modCount += end - w;
+            shiftTailOverGap(es, w, end);
+        }
+        return true;
+    }
+
+    /**
+     * Saves the state of the {@code XArrayList} instance to a stream
+     * (that is, serializes it).
+     *
+     * @param s the stream
+     * @throws java.io.IOException if an I/O error occurs
+     * @serialData The length of the array backing the {@code XArrayList}
+     *             instance is emitted (int), followed by all of its elements
+     *             (each an {@code Object}) in the proper order.
+     */
+    private void writeObject(java.io.ObjectOutputStream s)
+        throws java.io.IOException {
+        // Write out element count, and any hidden stuff
+        int expectedModCount = modCount;
+        s.defaultWriteObject();
+
+        // Write out size as capacity for behavioral compatibility with clone()
+        s.writeInt(size);
+
+        // Write out all elements in the proper order.
+        for (int i=0; i<size; i++) {
+            s.writeObject(elementData[i]);
+        }
+
+        if (modCount != expectedModCount) {
+            throw new ConcurrentModificationException();
+        }
+    }
+
+    /**
+     * Reconstitutes the {@code XArrayList} instance from a stream (that is,
+     * deserializes it).
+     * @param s the stream
+     * @throws ClassNotFoundException if the class of a serialized object
+     *         could not be found
+     * @throws java.io.IOException if an I/O error occurs
+     */
+    private void readObject(java.io.ObjectInputStream s)
+        throws java.io.IOException, ClassNotFoundException {
+
+        // Read in size, and any hidden stuff
+        s.defaultReadObject();
+
+        // Read in capacity
+        s.readInt(); // ignored
+
+        if (size > 0) {
+            // like clone(), allocate array based upon size not capacity
+            //SharedSecrets.getJavaObjectInputStreamAccess().checkArray(s, Object[].class, size);
+            Object[] elements = new Object[size];
+
+            // Read in all elements in the proper order.
+            for (int i = 0; i < size; i++) {
+                elements[i] = s.readObject();
+            }
+
+            elementData = elements;
+        } else if (size == 0) {
+            elementData = EMPTY_ELEMENTDATA;
+        } else {
+            throw new java.io.InvalidObjectException("Invalid size: " + size);
+        }
+    }
+
+    /**
+     * Returns a list iterator over the elements in this list (in proper
+     * sequence), starting at the specified position in the list.
+     * The specified index indicates the first element that would be
+     * returned by an initial call to {@link ListIterator#next next}.
+     * An initial call to {@link ListIterator#previous previous} would
+     * return the element with the specified index minus one.
+     *
+     * <p>The returned list iterator is <a href="#fail-fast"><i>fail-fast</i></a>.
+     *
+     * @throws IndexOutOfBoundsException {@inheritDoc}
+     */
+    public ListIterator<E> listIterator(int index) {
+        rangeCheckForAdd(index);
+        return new ListItr(index);
+    }
+
+    /**
+     * Returns a list iterator over the elements in this list (in proper
+     * sequence).
+     *
+     * <p>The returned list iterator is <a href="#fail-fast"><i>fail-fast</i></a>.
+     *
+     * @see #listIterator(int)
+     */
+    public ListIterator<E> listIterator() {
+        return new ListItr(0);
+    }
+
+    /**
+     * Returns an iterator over the elements in this list in proper sequence.
+     *
+     * <p>The returned iterator is <a href="#fail-fast"><i>fail-fast</i></a>.
+     *
+     * @return an iterator over the elements in this list in proper sequence
+     */
+    public Iterator<E> iterator() {
+        return new Itr();
+    }
+
+    /**
+     * An optimized version of AbstractList.Itr
+     */
+    private class Itr implements Iterator<E> {
+        int cursor;       // index of next element to return
+        int lastRet = -1; // index of last element returned; -1 if no such
+        int expectedModCount = modCount;
+
+        // prevent creating a synthetic constructor
+        Itr() {}
+
+        public boolean hasNext() {
+            return cursor != size;
+        }
+
+        @SuppressWarnings("unchecked")
+        public E next() {
+            checkForComodification();
+            int i = cursor;
+            if (i >= size)
+                throw new NoSuchElementException();
+            Object[] elementData = XArrayList.this.elementData;
+            if (i >= elementData.length)
+                throw new ConcurrentModificationException();
+            cursor = i + 1;
+            return (E) elementData[lastRet = i];
+        }
+
+        public void remove() {
+            if (lastRet < 0)
+                throw new IllegalStateException();
+            checkForComodification();
+
+            try {
+                XArrayList.this.remove(lastRet);
+                cursor = lastRet;
+                lastRet = -1;
+                expectedModCount = modCount;
+            } catch (IndexOutOfBoundsException ex) {
+                throw new ConcurrentModificationException();
+            }
+        }
+
+        @Override
+        public void forEachRemaining(Consumer<? super E> action) {
+            Objects.requireNonNull(action);
+            final int size = XArrayList.this.size;
+            int i = cursor;
+            if (i < size) {
+                final Object[] es = elementData;
+                if (i >= es.length)
+                    throw new ConcurrentModificationException();
+                for (; i < size && modCount == expectedModCount; i++)
+                    action.accept(elementAt(es, i));
+                // update once at end to reduce heap write traffic
+                cursor = i;
+                lastRet = i - 1;
+                checkForComodification();
+            }
+        }
+
+        final void checkForComodification() {
+            if (modCount != expectedModCount)
+                throw new ConcurrentModificationException();
+        }
+    }
+
+    /**
+     * An optimized version of AbstractList.ListItr
+     */
+    private class ListItr extends Itr implements ListIterator<E> {
+        ListItr(int index) {
+            super();
+            cursor = index;
+        }
+
+        public boolean hasPrevious() {
+            return cursor != 0;
+        }
+
+        public int nextIndex() {
+            return cursor;
+        }
+
+        public int previousIndex() {
+            return cursor - 1;
+        }
+
+        @SuppressWarnings("unchecked")
+        public E previous() {
+            checkForComodification();
+            int i = cursor - 1;
+            if (i < 0)
+                throw new NoSuchElementException();
+            Object[] elementData = XArrayList.this.elementData;
+            if (i >= elementData.length)
+                throw new ConcurrentModificationException();
+            cursor = i;
+            return (E) elementData[lastRet = i];
+        }
+
+        public void set(E e) {
+            if (lastRet < 0)
+                throw new IllegalStateException();
+            checkForComodification();
+
+            try {
+                XArrayList.this.set(lastRet, e);
+            } catch (IndexOutOfBoundsException ex) {
+                throw new ConcurrentModificationException();
+            }
+        }
+
+        public void add(E e) {
+            checkForComodification();
+
+            try {
+                int i = cursor;
+                XArrayList.this.add(i, e);
+                cursor = i + 1;
+                lastRet = -1;
+                expectedModCount = modCount;
+            } catch (IndexOutOfBoundsException ex) {
+                throw new ConcurrentModificationException();
+            }
+        }
+    }
+
+    /**
+     * Return a new cursor for this XArrayList.
+     * @return a cursor
+     */
+    public InlineCursor<E> cursor() {
+        return new AListCursor<>(0);
+    }
+
+    /**
+     * Create an inline cursor for this XArrayList.
+     */
+    private inline class AListCursor<E> implements InlineCursor<E> {
+        // Inner class field 'this' is initialized
+        int index;
+        int expectedModCount;
+
+        /**
+         * Create a new Cursor for this XArrayList.
+         *
+         * @param cursor index
+         */
+        public AListCursor(int cursor) {
+            this.index = cursor;
+            this.expectedModCount = XArrayList.this.modCount;
+        }
+
+        @Override
+        public boolean exists() {
+            return index < XArrayList.this.size;
+        }
+
+        @SuppressWarnings("unchecked")
+        public E get() {
+            if (exists()) {
+                checkForComodification();
+                try {
+                    return (E) XArrayList.this.elementData[index];
+                } catch (ArrayIndexOutOfBoundsException aioobe) {
+                    throw new ConcurrentModificationException();
+                }
+            }
+            throw new NoSuchElementException();
+        }
+
+        @Override
+        public AListCursor<E> advance() {
+            // new Cursor will have a current expectedModCount
+            // TBD: Saturate index?  So calling adv, adv, adv, prev == last
+            return new AListCursor<>(Math.min(index + 1, size));
+        }
+
+        final void checkForComodification() {
+            if (XArrayList.this.modCount != expectedModCount)
+                throw new ConcurrentModificationException();
+        }
+    }
+
+    /**
+     * Returns a iterator (Using an InlineCursor) over the elements in this list in proper sequence.
+     *
+     * <p>The returned iterator is <a href="#fail-fast"><i>fail-fast</i></a>.
+     *
+     * @return an iterator over the elements in this list in proper sequence
+     */
+    public Iterator<E> iteratorCurs() {
+        return new CurItr();
+    }
+
+    /**
+     * Iterate using a Cursor.
+     */
+    private class CurItr implements Iterator<E> {
+        AListCursor<E> cursor;
+        AListCursor<E> lastRet;
+
+        // prevent creating a synthetic constructor
+        CurItr() {
+            this.cursor = new AListCursor<E>(0);
+            this.lastRet = this.cursor;
+        }
+
+        public boolean hasNext() {
+            return cursor.advance().exists();
+        }
+
+        @SuppressWarnings("unchecked")
+        public E next() {
+            E val = cursor.get();
+            lastRet = cursor;
+            cursor = cursor.advance();
+            return val;
+        }
+
+        @Override
+        public void forEachRemaining(Consumer<? super E> action) {
+            Objects.requireNonNull(action);
+
+            AListCursor<E> cur = cursor;
+            while (cur.exists()) {
+                E val = cur.get();
+                action.accept(val);
+                cur = cur.advance();
+            }
+            cursor = cur;
+        }
+
+        public String toString() {
+            return "cur: " + cursor;
+        }
+    }
+
+
+    /**
+     * Returns a view of the portion of this list between the specified
+     * {@code fromIndex}, inclusive, and {@code toIndex}, exclusive.  (If
+     * {@code fromIndex} and {@code toIndex} are equal, the returned list is
+     * empty.)  The returned list is backed by this list, so non-structural
+     * changes in the returned list are reflected in this list, and vice-versa.
+     * The returned list supports all of the optional list operations.
+     *
+     * <p>This method eliminates the need for explicit range operations (of
+     * the sort that commonly exist for arrays).  Any operation that expects
+     * a list can be used as a range operation by passing a subList view
+     * instead of a whole list.  For example, the following idiom
+     * removes a range of elements from a list:
+     * <pre>
+     *      list.subList(from, to).clear();
+     * </pre>
+     * Similar idioms may be constructed for {@link #indexOf(Object)} and
+     * {@link #lastIndexOf(Object)}, and all of the algorithms in the
+     * {@link Collections} class can be applied to a subList.
+     *
+     * <p>The semantics of the list returned by this method become undefined if
+     * the backing list (i.e., this list) is <i>structurally modified</i> in
+     * any way other than via the returned list.  (Structural modifications are
+     * those that change the size of this list, or otherwise perturb it in such
+     * a fashion that iterations in progress may yield incorrect results.)
+     *
+     * @throws IndexOutOfBoundsException {@inheritDoc}
+     * @throws IllegalArgumentException {@inheritDoc}
+     */
+    public List<E> subList(int fromIndex, int toIndex) {
+        subListRangeCheck(fromIndex, toIndex, size);
+        return new SubList<>(this, fromIndex, toIndex);
+    }
+
+    private static class SubList<E> extends AbstractList<E> implements RandomAccess {
+        private final XArrayList<E> root;
+        private final SubList<E> parent;
+        private final int offset;
+        private int size;
+
+        /**
+         * Constructs a sublist of an arbitrary XArrayList.
+         */
+        public SubList(XArrayList<E> root, int fromIndex, int toIndex) {
+            this.root = root;
+            this.parent = null;
+            this.offset = fromIndex;
+            this.size = toIndex - fromIndex;
+            this.modCount = root.modCount;
+        }
+
+        /**
+         * Constructs a sublist of another SubList.
+         */
+        private SubList(SubList<E> parent, int fromIndex, int toIndex) {
+            this.root = parent.root;
+            this.parent = parent;
+            this.offset = parent.offset + fromIndex;
+            this.size = toIndex - fromIndex;
+            this.modCount = root.modCount;
+        }
+
+        public E set(int index, E element) {
+            Objects.checkIndex(index, size);
+            checkForComodification();
+            E oldValue = root.elementData(offset + index);
+            root.elementData[offset + index] = element;
+            return oldValue;
+        }
+
+        public E get(int index) {
+            Objects.checkIndex(index, size);
+            checkForComodification();
+            return root.elementData(offset + index);
+        }
+
+        public int size() {
+            checkForComodification();
+            return size;
+        }
+
+        public void add(int index, E element) {
+            rangeCheckForAdd(index);
+            checkForComodification();
+            root.add(offset + index, element);
+            updateSizeAndModCount(1);
+        }
+
+        public E remove(int index) {
+            Objects.checkIndex(index, size);
+            checkForComodification();
+            E result = root.remove(offset + index);
+            updateSizeAndModCount(-1);
+            return result;
+        }
+
+        protected void removeRange(int fromIndex, int toIndex) {
+            checkForComodification();
+            root.removeRange(offset + fromIndex, offset + toIndex);
+            updateSizeAndModCount(fromIndex - toIndex);
+        }
+
+        public boolean addAll(Collection<? extends E> c) {
+            return addAll(this.size, c);
+        }
+
+        public boolean addAll(int index, Collection<? extends E> c) {
+            rangeCheckForAdd(index);
+            int cSize = c.size();
+            if (cSize==0)
+                return false;
+            checkForComodification();
+            root.addAll(offset + index, c);
+            updateSizeAndModCount(cSize);
+            return true;
+        }
+
+        public void replaceAll(UnaryOperator<E> operator) {
+            root.replaceAllRange(operator, offset, offset + size);
+        }
+
+        public boolean removeAll(Collection<?> c) {
+            return batchRemove(c, false);
+        }
+
+        public boolean retainAll(Collection<?> c) {
+            return batchRemove(c, true);
+        }
+
+        private boolean batchRemove(Collection<?> c, boolean complement) {
+            checkForComodification();
+            int oldSize = root.size;
+            boolean modified =
+                root.batchRemove(c, complement, offset, offset + size);
+            if (modified)
+                updateSizeAndModCount(root.size - oldSize);
+            return modified;
+        }
+
+        public boolean removeIf(Predicate<? super E> filter) {
+            checkForComodification();
+            int oldSize = root.size;
+            boolean modified = root.removeIf(filter, offset, offset + size);
+            if (modified)
+                updateSizeAndModCount(root.size - oldSize);
+            return modified;
+        }
+
+        public Object[] toArray() {
+            checkForComodification();
+            return Arrays.copyOfRange(root.elementData, offset, offset + size);
+        }
+
+        @SuppressWarnings("unchecked")
+        public <T> T[] toArray(T[] a) {
+            checkForComodification();
+            if (a.length < size)
+                return (T[]) Arrays.copyOfRange(
+                        root.elementData, offset, offset + size, a.getClass());
+            System.arraycopy(root.elementData, offset, a, 0, size);
+            if (a.length > size)
+                a[size] = null;
+            return a;
+        }
+
+        public boolean equals(Object o) {
+            if (o == this) {
+                return true;
+            }
+
+            if (!(o instanceof List)) {
+                return false;
+            }
+
+            boolean equal = root.equalsRange((List<?>)o, offset, offset + size);
+            checkForComodification();
+            return equal;
+        }
+
+        public int hashCode() {
+            int hash = root.hashCodeRange(offset, offset + size);
+            checkForComodification();
+            return hash;
+        }
+
+        public int indexOf(Object o) {
+            int index = root.indexOfRange(o, offset, offset + size);
+            checkForComodification();
+            return index >= 0 ? index - offset : -1;
+        }
+
+        public int lastIndexOf(Object o) {
+            int index = root.lastIndexOfRange(o, offset, offset + size);
+            checkForComodification();
+            return index >= 0 ? index - offset : -1;
+        }
+
+        public boolean contains(Object o) {
+            return indexOf(o) >= 0;
+        }
+
+        public Iterator<E> iterator() {
+            return listIterator();
+        }
+
+        public ListIterator<E> listIterator(int index) {
+            checkForComodification();
+            rangeCheckForAdd(index);
+
+            return new ListIterator<E>() {
+                int cursor = index;
+                int lastRet = -1;
+                int expectedModCount = root.modCount;
+
+                public boolean hasNext() {
+                    return cursor != SubList.this.size;
+                }
+
+                @SuppressWarnings("unchecked")
+                public E next() {
+                    checkForComodification();
+                    int i = cursor;
+                    if (i >= SubList.this.size)
+                        throw new NoSuchElementException();
+                    Object[] elementData = root.elementData;
+                    if (offset + i >= elementData.length)
+                        throw new ConcurrentModificationException();
+                    cursor = i + 1;
+                    return (E) elementData[offset + (lastRet = i)];
+                }
+
+                public boolean hasPrevious() {
+                    return cursor != 0;
+                }
+
+                @SuppressWarnings("unchecked")
+                public E previous() {
+                    checkForComodification();
+                    int i = cursor - 1;
+                    if (i < 0)
+                        throw new NoSuchElementException();
+                    Object[] elementData = root.elementData;
+                    if (offset + i >= elementData.length)
+                        throw new ConcurrentModificationException();
+                    cursor = i;
+                    return (E) elementData[offset + (lastRet = i)];
+                }
+
+                public void forEachRemaining(Consumer<? super E> action) {
+                    Objects.requireNonNull(action);
+                    final int size = SubList.this.size;
+                    int i = cursor;
+                    if (i < size) {
+                        final Object[] es = root.elementData;
+                        if (offset + i >= es.length)
+                            throw new ConcurrentModificationException();
+                        for (; i < size && modCount == expectedModCount; i++)
+                            action.accept(elementAt(es, offset + i));
+                        // update once at end to reduce heap write traffic
+                        cursor = i;
+                        lastRet = i - 1;
+                        checkForComodification();
+                    }
+                }
+
+                public int nextIndex() {
+                    return cursor;
+                }
+
+                public int previousIndex() {
+                    return cursor - 1;
+                }
+
+                public void remove() {
+                    if (lastRet < 0)
+                        throw new IllegalStateException();
+                    checkForComodification();
+
+                    try {
+                        SubList.this.remove(lastRet);
+                        cursor = lastRet;
+                        lastRet = -1;
+                        expectedModCount = root.modCount;
+                    } catch (IndexOutOfBoundsException ex) {
+                        throw new ConcurrentModificationException();
+                    }
+                }
+
+                public void set(E e) {
+                    if (lastRet < 0)
+                        throw new IllegalStateException();
+                    checkForComodification();
+
+                    try {
+                        root.set(offset + lastRet, e);
+                    } catch (IndexOutOfBoundsException ex) {
+                        throw new ConcurrentModificationException();
+                    }
+                }
+
+                public void add(E e) {
+                    checkForComodification();
+
+                    try {
+                        int i = cursor;
+                        SubList.this.add(i, e);
+                        cursor = i + 1;
+                        lastRet = -1;
+                        expectedModCount = root.modCount;
+                    } catch (IndexOutOfBoundsException ex) {
+                        throw new ConcurrentModificationException();
+                    }
+                }
+
+                final void checkForComodification() {
+                    if (root.modCount != expectedModCount)
+                        throw new ConcurrentModificationException();
+                }
+            };
+        }
+
+        public List<E> subList(int fromIndex, int toIndex) {
+            subListRangeCheck(fromIndex, toIndex, size);
+            return new SubList<>(this, fromIndex, toIndex);
+        }
+
+        private void rangeCheckForAdd(int index) {
+            if (index < 0 || index > this.size)
+                throw new IndexOutOfBoundsException(outOfBoundsMsg(index));
+        }
+
+        private String outOfBoundsMsg(int index) {
+            return "Index: "+index+", Size: "+this.size;
+        }
+
+        private void checkForComodification() {
+            if (root.modCount != modCount)
+                throw new ConcurrentModificationException();
+        }
+
+        private void updateSizeAndModCount(int sizeChange) {
+            SubList<E> slist = this;
+            do {
+                slist.size += sizeChange;
+                slist.modCount = root.modCount;
+                slist = slist.parent;
+            } while (slist != null);
+        }
+
+        public Spliterator<E> spliterator() {
+            checkForComodification();
+
+            // ArrayListSpliterator not used here due to late-binding
+            return new Spliterator<E>() {
+                private int index = offset; // current index, modified on advance/split
+                private int fence = -1; // -1 until used; then one past last index
+                private int expectedModCount; // initialized when fence set
+
+                private int getFence() { // initialize fence to size on first use
+                    int hi; // (a specialized variant appears in method forEach)
+                    if ((hi = fence) < 0) {
+                        expectedModCount = modCount;
+                        hi = fence = offset + size;
+                    }
+                    return hi;
+                }
+
+                public XArrayList<E>.ArrayListSpliterator trySplit() {
+                    int hi = getFence(), lo = index, mid = (lo + hi) >>> 1;
+                    // ArrayListSpliterator can be used here as the source is already bound
+                    return (lo >= mid) ? null : // divide range in half unless too small
+                        root.new ArrayListSpliterator(lo, index = mid, expectedModCount);
+                }
+
+                public boolean tryAdvance(Consumer<? super E> action) {
+                    Objects.requireNonNull(action);
+                    int hi = getFence(), i = index;
+                    if (i < hi) {
+                        index = i + 1;
+                        @SuppressWarnings("unchecked") E e = (E)root.elementData[i];
+                        action.accept(e);
+                        if (root.modCount != expectedModCount)
+                            throw new ConcurrentModificationException();
+                        return true;
+                    }
+                    return false;
+                }
+
+                public void forEachRemaining(Consumer<? super E> action) {
+                    Objects.requireNonNull(action);
+                    int i, hi, mc; // hoist accesses and checks from loop
+                    XArrayList<E> lst = root;
+                    Object[] a;
+                    if ((a = lst.elementData) != null) {
+                        if ((hi = fence) < 0) {
+                            mc = modCount;
+                            hi = offset + size;
+                        }
+                        else
+                            mc = expectedModCount;
+                        if ((i = index) >= 0 && (index = hi) <= a.length) {
+                            for (; i < hi; ++i) {
+                                @SuppressWarnings("unchecked") E e = (E) a[i];
+                                action.accept(e);
+                            }
+                            if (lst.modCount == mc)
+                                return;
+                        }
+                    }
+                    throw new ConcurrentModificationException();
+                }
+
+                public long estimateSize() {
+                    return getFence() - index;
+                }
+
+                public int characteristics() {
+                    return Spliterator.ORDERED | Spliterator.SIZED | Spliterator.SUBSIZED;
+                }
+            };
+        }
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    @Override
+    public void forEach(Consumer<? super E> action) {
+        Objects.requireNonNull(action);
+        final int expectedModCount = modCount;
+        final Object[] es = elementData;
+        final int size = this.size;
+        for (int i = 0; modCount == expectedModCount && i < size; i++)
+            action.accept(elementAt(es, i));
+        if (modCount != expectedModCount)
+            throw new ConcurrentModificationException();
+    }
+
+    /**
+     * Creates a <em><a href="Spliterator.html#binding">late-binding</a></em>
+     * and <em>fail-fast</em> {@link Spliterator} over the elements in this
+     * list.
+     *
+     * <p>The {@code Spliterator} reports {@link Spliterator#SIZED},
+     * {@link Spliterator#SUBSIZED}, and {@link Spliterator#ORDERED}.
+     * Overriding implementations should document the reporting of additional
+     * characteristic values.
+     *
+     * @return a {@code Spliterator} over the elements in this list
+     * @since 1.8
+     */
+    @Override
+    public Spliterator<E> spliterator() {
+        return new ArrayListSpliterator(0, -1, 0);
+    }
+
+    /** Index-based split-by-two, lazily initialized Spliterator */
+    final class ArrayListSpliterator implements Spliterator<E> {
+
+        /*
+         * If XArrayLists were immutable, or structurally immutable (no
+         * adds, removes, etc), we could implement their spliterators
+         * with Arrays.spliterator. Instead we detect as much
+         * interference during traversal as practical without
+         * sacrificing much performance. We rely primarily on
+         * modCounts. These are not guaranteed to detect concurrency
+         * violations, and are sometimes overly conservative about
+         * within-thread interference, but detect enough problems to
+         * be worthwhile in practice. To carry this out, we (1) lazily
+         * initialize fence and expectedModCount until the latest
+         * point that we need to commit to the state we are checking
+         * against; thus improving precision.  (This doesn't apply to
+         * SubLists, that create spliterators with current non-lazy
+         * values).  (2) We perform only a single
+         * ConcurrentModificationException check at the end of forEach
+         * (the most performance-sensitive method). When using forEach
+         * (as opposed to iterators), we can normally only detect
+         * interference after actions, not before. Further
+         * CME-triggering checks apply to all other possible
+         * violations of assumptions for example null or too-small
+         * elementData array given its size(), that could only have
+         * occurred due to interference.  This allows the inner loop
+         * of forEach to run without any further checks, and
+         * simplifies lambda-resolution. While this does entail a
+         * number of checks, note that in the common case of
+         * list.stream().forEach(a), no checks or other computation
+         * occur anywhere other than inside forEach itself.  The other
+         * less-often-used methods cannot take advantage of most of
+         * these streamlinings.
+         */
+
+        private int index; // current index, modified on advance/split
+        private int fence; // -1 until used; then one past last index
+        private int expectedModCount; // initialized when fence set
+
+        /** Creates new spliterator covering the given range. */
+        ArrayListSpliterator(int origin, int fence, int expectedModCount) {
+            this.index = origin;
+            this.fence = fence;
+            this.expectedModCount = expectedModCount;
+        }
+
+        private int getFence() { // initialize fence to size on first use
+            int hi; // (a specialized variant appears in method forEach)
+            if ((hi = fence) < 0) {
+                expectedModCount = modCount;
+                hi = fence = size;
+            }
+            return hi;
+        }
+
+        public ArrayListSpliterator trySplit() {
+            int hi = getFence(), lo = index, mid = (lo + hi) >>> 1;
+            return (lo >= mid) ? null : // divide range in half unless too small
+                new ArrayListSpliterator(lo, index = mid, expectedModCount);
+        }
+
+        public boolean tryAdvance(Consumer<? super E> action) {
+            if (action == null)
+                throw new NullPointerException();
+            int hi = getFence(), i = index;
+            if (i < hi) {
+                index = i + 1;
+                @SuppressWarnings("unchecked") E e = (E)elementData[i];
+                action.accept(e);
+                if (modCount != expectedModCount)
+                    throw new ConcurrentModificationException();
+                return true;
+            }
+            return false;
+        }
+
+        public void forEachRemaining(Consumer<? super E> action) {
+            int i, hi, mc; // hoist accesses and checks from loop
+            Object[] a;
+            if (action == null)
+                throw new NullPointerException();
+            if ((a = elementData) != null) {
+                if ((hi = fence) < 0) {
+                    mc = modCount;
+                    hi = size;
+                }
+                else
+                    mc = expectedModCount;
+                if ((i = index) >= 0 && (index = hi) <= a.length) {
+                    for (; i < hi; ++i) {
+                        @SuppressWarnings("unchecked") E e = (E) a[i];
+                        action.accept(e);
+                    }
+                    if (modCount == mc)
+                        return;
+                }
+            }
+            throw new ConcurrentModificationException();
+        }
+
+        public long estimateSize() {
+            return getFence() - index;
+        }
+
+        public int characteristics() {
+            return Spliterator.ORDERED | Spliterator.SIZED | Spliterator.SUBSIZED;
+        }
+    }
+
+    // A tiny bit set implementation
+
+    private static long[] nBits(int n) {
+        return new long[((n - 1) >> 6) + 1];
+    }
+    private static void setBit(long[] bits, int i) {
+        bits[i >> 6] |= 1L << i;
+    }
+    private static boolean isClear(long[] bits, int i) {
+        return (bits[i >> 6] & (1L << i)) == 0;
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    @Override
+    public boolean removeIf(Predicate<? super E> filter) {
+        return removeIf(filter, 0, size);
+    }
+
+    /**
+     * Removes all elements satisfying the given predicate, from index
+     * i (inclusive) to index end (exclusive).
+     */
+    boolean removeIf(Predicate<? super E> filter, int i, final int end) {
+        Objects.requireNonNull(filter);
+        int expectedModCount = modCount;
+        final Object[] es = elementData;
+        // Optimize for initial run of survivors
+        for (; i < end && !filter.test(elementAt(es, i)); i++)
+            ;
+        // Tolerate predicates that reentrantly access the collection for
+        // read (but writers still get CME), so traverse once to find
+        // elements to delete, a second pass to physically expunge.
+        if (i < end) {
+            final int beg = i;
+            final long[] deathRow = nBits(end - beg);
+            deathRow[0] = 1L;   // set bit 0
+            for (i = beg + 1; i < end; i++)
+                if (filter.test(elementAt(es, i)))
+                    setBit(deathRow, i - beg);
+            if (modCount != expectedModCount)
+                throw new ConcurrentModificationException();
+            modCount++;
+            int w = beg;
+            for (i = beg; i < end; i++)
+                if (isClear(deathRow, i - beg))
+                    es[w++] = es[i];
+            shiftTailOverGap(es, w, end);
+            return true;
+        } else {
+            if (modCount != expectedModCount)
+                throw new ConcurrentModificationException();
+            return false;
+        }
+    }
+
+    @Override
+    public void replaceAll(UnaryOperator<E> operator) {
+        replaceAllRange(operator, 0, size);
+        // TODO(8203662): remove increment of modCount from ...
+        modCount++;
+    }
+
+    private void replaceAllRange(UnaryOperator<E> operator, int i, int end) {
+        Objects.requireNonNull(operator);
+        final int expectedModCount = modCount;
+        final Object[] es = elementData;
+        for (; modCount == expectedModCount && i < end; i++)
+            es[i] = operator.apply(elementAt(es, i));
+        if (modCount != expectedModCount)
+            throw new ConcurrentModificationException();
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public void sort(Comparator<? super E> c) {
+        final int expectedModCount = modCount;
+        Arrays.sort((E[]) elementData, 0, size, c);
+        if (modCount != expectedModCount)
+            throw new ConcurrentModificationException();
+        modCount++;
+    }
+
+    void checkInvariants() {
+        // assert size >= 0;
+        // assert size == elementData.length || elementData[size] == null;
+    }
+
+    /**
+     * Calculates a new array length given an array's current length, a preferred
+     * growth value, and a minimum growth value.  If the preferred growth value
+     * is less than the minimum growth value, the minimum growth value is used in
+     * its place.  If the sum of the current length and the preferred growth
+     * value does not exceed {@link #MAX_ARRAY_LENGTH}, that sum is returned.
+     * If the sum of the current length and the minimum growth value does not
+     * exceed {@code MAX_ARRAY_LENGTH}, then {@code MAX_ARRAY_LENGTH} is returned.
+     * If the sum does not overflow an int, then {@code Integer.MAX_VALUE} is
+     * returned.  Otherwise, {@code OutOfMemoryError} is thrown.
+     *
+     * @param oldLength   current length of the array (must be non negative)
+     * @param minGrowth   minimum required growth of the array length (must be
+     *                    positive)
+     * @param prefGrowth  preferred growth of the array length (ignored, if less
+     *                    then {@code minGrowth})
+     * @return the new length of the array
+     * @throws OutOfMemoryError if increasing {@code oldLength} by
+     *                    {@code minGrowth} overflows.
+     */
+    private static int newLength(int oldLength, int minGrowth, int prefGrowth) {
+        // assert oldLength >= 0
+        // assert minGrowth > 0
+
+        int newLength = Math.max(minGrowth, prefGrowth) + oldLength;
+        if (newLength - MAX_ARRAY_LENGTH <= 0) {
+            return newLength;
+        }
+        return hugeLength(oldLength, minGrowth);
+    }
+
+    private static int hugeLength(int oldLength, int minGrowth) {
+        int minLength = oldLength + minGrowth;
+        if (minLength < 0) { // overflow
+            throw new OutOfMemoryError("Required array length too large");
+        }
+        if (minLength <= MAX_ARRAY_LENGTH) {
+            return MAX_ARRAY_LENGTH;
+        }
+        return Integer.MAX_VALUE;
+    }
+
+    private static final int MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8;
+
+    private static void subListRangeCheck(int fromIndex, int toIndex, int size) {
+        if (fromIndex < 0)
+            throw new IndexOutOfBoundsException("fromIndex = " + fromIndex);
+        if (toIndex > size)
+            throw new IndexOutOfBoundsException("toIndex = " + toIndex);
+        if (fromIndex > toIndex)
+            throw new IllegalArgumentException("fromIndex(" + fromIndex +
+                    ") > toIndex(" + toIndex + ")");
+    }
+}
diff --git a/test/micro/org/openjdk/bench/valhalla/corelibs/XArrayListCursorTest.java b/test/micro/org/openjdk/bench/valhalla/corelibs/XArrayListCursorTest.java
new file mode 100644
index 00000000000..1f15d1761f5
--- /dev/null
+++ b/test/micro/org/openjdk/bench/valhalla/corelibs/XArrayListCursorTest.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+package org.openjdk.bench.valhalla.corelibs;
+
+import java.util.List;
+import java.util.Iterator;
+import java.util.concurrent.TimeUnit;
+
+import org.openjdk.jmh.annotations.*;
+import org.openjdk.jmh.infra.Blackhole;
+
+@Fork(1)
+@Warmup(iterations = 3, time = 1)
+@Measurement(iterations = 5, time = 3)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@BenchmarkMode(Mode.AverageTime)
+@State(Scope.Thread)
+public class XArrayListCursorTest {
+    @Param({"100000"})
+    public static int size;
+
+    private static final String constantString = "abc";
+
+    private static XArrayList<String> list;
+
+    @Setup
+    public void setup() {
+        list = new XArrayList<>();
+        for (int i = 0; i < size; i++) {
+            list.add(constantString);
+        }
+    }
+
+    @Benchmark
+    public void getViaCursorWhileLoop(Blackhole blackhole) {
+        InlineCursor<String> cur = list.cursor();
+        while (cur.exists()) {
+            blackhole.consume(cur.get());
+            cur = cur.advance();
+        }
+    }
+
+    @Benchmark
+    public void getViaCursorForLoop(Blackhole blackhole) {
+        for (InlineCursor<String> cur = list.cursor();
+             cur.exists();
+             cur = cur.advance()) {
+            blackhole.consume(cur.get());
+        }
+    }
+
+    @Benchmark
+    public void getViaIterator(Blackhole blackhole) {
+        Iterator<String> it = list.iterator();
+        while (it.hasNext()) {
+            blackhole.consume(it.next());
+        }
+    }
+
+    @Benchmark
+    public void getViaIteratorCurs(Blackhole blackhole) {
+        Iterator<String> it = list.iteratorCurs();
+        while (it.hasNext()) {
+            blackhole.consume(it.next());
+        }
+    }
+
+    @Benchmark
+    public void getViaArray(Blackhole blackhole) {
+        for (int i = 0; i < list.size(); i++) {
+            blackhole.consume(list.get(i));
+        }
+    }
+
+}