diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index 1e4ee33a9db..9d03d720480 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -971,6 +971,26 @@ reg_class v3_reg(
     V3, V3_H
 );
 
+// Class for 128 bit register v4
+reg_class v4_reg(
+    V4, V4_H
+);
+
+// Class for 128 bit register v5
+reg_class v5_reg(
+    V5, V5_H
+);
+
+// Class for 128 bit register v6
+reg_class v6_reg(
+    V6, V6_H
+);
+
+// Class for 128 bit register v7
+reg_class v7_reg(
+    V7, V7_H
+);
+
 // Singleton class for condition codes
 reg_class int_flags(RFLAGS);
 
@@ -4884,6 +4904,42 @@ operand vRegD_V3()
   interface(REG_INTER);
 %}
 
+operand vRegD_V4()
+%{
+  constraint(ALLOC_IN_RC(v4_reg));
+  match(RegD);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vRegD_V5()
+%{
+  constraint(ALLOC_IN_RC(v5_reg));
+  match(RegD);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vRegD_V6()
+%{
+  constraint(ALLOC_IN_RC(v6_reg));
+  match(RegD);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vRegD_V7()
+%{
+  constraint(ALLOC_IN_RC(v7_reg));
+  match(RegD);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Flags register, used as output of signed compare instructions
 
 // note that on AArch64 we also use this register as the output for
@@ -15390,14 +15446,17 @@ instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4
 %}
 
 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
-       iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
+                          iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
+                          iRegINoSp tmp3, iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6,
+                          vRegD_V0 vtmp0, vRegD_V1 vtmp1, rFlagsReg cr)
 %{
   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6,
+         TEMP vtmp0, TEMP vtmp1, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU) "
+            "# KILL $str1 $cnt1 $str2 $cnt2 $tmp1 $tmp2 $tmp3 $tmp4 $tmp5 $tmp6 V0-V1 cr" %}
 
   ins_encode %{
     __ string_indexof($str1$$Register, $str2$$Register,
@@ -15411,14 +15470,17 @@ instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2
 %}
 
 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
-       iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
+                          iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
+                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6,
+                          vRegD_V0 vtmp0, vRegD_V1 vtmp1, rFlagsReg cr)
 %{
   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6,
+         TEMP vtmp0, TEMP vtmp1, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL) "
+            "# KILL $str1 $cnt1 $str2 $cnt2 $tmp1 $tmp2 $tmp3 $tmp4 $tmp5 $tmp6 V0-V1 cr" %}
 
   ins_encode %{
     __ string_indexof($str1$$Register, $str2$$Register,
@@ -15432,14 +15494,17 @@ instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2
 %}
 
 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
-       iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
+                          iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
+                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6,
+                          vRegD_V0 vtmp0, vRegD_V1 vtmp1, rFlagsReg cr)
 %{
   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
+         TEMP tmp6, TEMP vtmp0, TEMP vtmp1, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL) "
+            "# KILL $str1 cnt1 $str2 $cnt2 $tmp1 $tmp2 $tmp3 $tmp4 $tmp5 $tmp6 V0-V1 cr" %}
 
   ins_encode %{
     __ string_indexof($str1$$Register, $str2$$Register,
@@ -15453,14 +15518,15 @@ instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2
 %}
 
 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
-                 immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
-                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+                              immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1,
+                              iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
 %{
   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU) "
+            "# KILL $str1 $cnt1 $str2 $tmp1 $tmp2 $tmp3 $tmp4 cr" %}
 
   ins_encode %{
     int icnt2 = (int)$int_cnt2$$constant;
@@ -15474,14 +15540,15 @@ instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
 %}
 
 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
-                 immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
-                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+                              immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1,
+                              iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
 %{
   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL) "
+            "# KILL $str1 $cnt1 $str2 $tmp1 $tmp2 $tmp3 $tmp4 cr" %}
 
   ins_encode %{
     int icnt2 = (int)$int_cnt2$$constant;
@@ -15495,14 +15562,15 @@ instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
 %}
 
 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
-                 immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
-                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+                              immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1,
+                              iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
 %{
   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL) "
+            "# KILL $str1 $cnt1 $str2 $tmp1 $tmp2 $tmp3 $tmp4 cr" %}
 
   ins_encode %{
     int icnt2 = (int)$int_cnt2$$constant;
@@ -15567,13 +15635,17 @@ instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
 
 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
+                       vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
+                       vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
                        iRegP_R10 tmp, rFlagsReg cr)
 %{
   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (AryEq ary1 ary2));
-  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
+         TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
+         TEMP vtmp6, TEMP vtmp7, KILL cr);
 
-  format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
+  format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
   ins_encode %{
     address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
                                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
@@ -15588,13 +15660,17 @@ instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
 
 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
+                       vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
+                       vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
                        iRegP_R10 tmp, rFlagsReg cr)
 %{
   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
   match(Set result (AryEq ary1 ary2));
-  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
+         TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
+         TEMP vtmp6, TEMP vtmp7, KILL cr);
 
-  format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
+  format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
   ins_encode %{
     address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
                                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
@@ -15624,35 +15700,40 @@ instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg c
 
 // fast char[] to byte[] compression
 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
-                         vRegD_V0 tmp1, vRegD_V1 tmp2,
-                         vRegD_V2 tmp3, vRegD_V3 tmp4,
+                         vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
+                         vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
                          iRegI_R0 result, rFlagsReg cr)
 %{
   match(Set result (StrCompressedCopy src (Binary dst len)));
-  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
+  effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
+         USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
 
-  format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
+  format %{ "String Compress $src,$dst -> $result # KILL $src $dst $len V0-V5 cr" %}
   ins_encode %{
     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
-                           $tmp1$$FloatRegister, $tmp2$$FloatRegister,
-                           $tmp3$$FloatRegister, $tmp4$$FloatRegister,
+                           $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
+                           $vtmp2$$FloatRegister, $vtmp3$$FloatRegister,
+                           $vtmp4$$FloatRegister, $vtmp5$$FloatRegister,
                            $result$$Register);
   %}
   ins_pipe( pipe_slow );
 %}
 
 // fast byte[] to char[] inflation
-instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
-                        vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
+instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len, iRegP_R3 tmp,
+                        vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
+                        vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, rFlagsReg cr)
 %{
   match(Set dummy (StrInflatedCopy src (Binary dst len)));
-  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
+  effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3,
+         TEMP vtmp4, TEMP vtmp5, TEMP vtmp6, TEMP tmp,
+         USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
 
-  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
+  format %{ "String Inflate $src,$dst # KILL $tmp $src $dst $len V0-V6 cr" %}
   ins_encode %{
     address tpc = __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
-                                        $tmp1$$FloatRegister, $tmp2$$FloatRegister,
-                                        $tmp3$$FloatRegister, $tmp4$$Register);
+                                        $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
+                                        $vtmp2$$FloatRegister, $tmp$$Register);
     if (tpc == NULL) {
       ciEnv::current()->record_failure("CodeCache is full");
       return;
@@ -15663,19 +15744,20 @@ instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len
 
 // encode char[] to byte[] in ISO_8859_1
 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
-                          vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
-                          vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
+                          vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
+                          vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
                           iRegI_R0 result, rFlagsReg cr)
 %{
   match(Set result (EncodeISOArray src (Binary dst len)));
-  effect(USE_KILL src, USE_KILL dst, USE_KILL len,
-         KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
+  effect(USE_KILL src, USE_KILL dst, USE_KILL len, KILL vtmp0, KILL vtmp1,
+         KILL vtmp2, KILL vtmp3, KILL vtmp4, KILL vtmp5, KILL cr);
 
-  format %{ "Encode array $src,$dst,$len -> $result" %}
+  format %{ "Encode array $src,$dst,$len -> $result # KILL $src $dst $len V0-V5 cr" %}
   ins_encode %{
     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
-         $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
-         $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
+                        $result$$Register, $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
+                        $vtmp2$$FloatRegister, $vtmp3$$FloatRegister,
+                        $vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
   %}
   ins_pipe( pipe_class_memory );
 %}
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
index 5753cc9a611..7f329a45d30 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -4332,6 +4332,7 @@ void MacroAssembler::remove_frame(int framesize) {
 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
 
 // Search for str1 in str2 and return index or -1
+// Clobbers: rscratch1, rscratch2, rflags. May also clobber v0-v1, when icnt1==-1.
 void MacroAssembler::string_indexof(Register str2, Register str1,
                                     Register cnt2, Register cnt1,
                                     Register tmp1, Register tmp2,
@@ -5123,6 +5124,8 @@ address MacroAssembler::has_negatives(Register ary1, Register len, Register resu
   return pc();
 }
 
+// Clobbers: rscratch1, rscratch2, rflags
+// May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
                                       Register tmp4, Register tmp5, Register result,
                                       Register cnt1, int elem_size) {
@@ -5615,10 +5618,13 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value)
 
 // Intrinsic for sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray and
 // java/lang/StringUTF16.compress.
+//
+// Clobbers: src, dst, res, rscratch1, rscratch2, rflags
 void MacroAssembler::encode_iso_array(Register src, Register dst,
-                      Register len, Register result,
-                      FloatRegister Vtmp1, FloatRegister Vtmp2,
-                      FloatRegister Vtmp3, FloatRegister Vtmp4)
+                                      Register len, Register result,
+                                      FloatRegister Vtmp1, FloatRegister Vtmp2,
+                                      FloatRegister Vtmp3, FloatRegister Vtmp4,
+                                      FloatRegister Vtmp5, FloatRegister Vtmp6)
 {
     Label DONE, SET_RESULT, NEXT_32, NEXT_32_PRFM, LOOP_8, NEXT_8, LOOP_1, NEXT_1,
         NEXT_32_START, NEXT_32_PRFM_START;
@@ -5641,13 +5647,13 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
           ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
         BIND(NEXT_32_PRFM_START);
           prfm(Address(src, SoftwarePrefetchHintDistance));
-          orr(v4, T16B, Vtmp1, Vtmp2);
-          orr(v5, T16B, Vtmp3, Vtmp4);
+          orr(Vtmp5, T16B, Vtmp1, Vtmp2);
+          orr(Vtmp6, T16B, Vtmp3, Vtmp4);
           uzp1(Vtmp1, T16B, Vtmp1, Vtmp2);
           uzp1(Vtmp3, T16B, Vtmp3, Vtmp4);
-          uzp2(v5, T16B, v4, v5); // high bytes
-          umov(tmp2, v5, D, 1);
-          fmovd(tmp1, v5);
+          uzp2(Vtmp6, T16B, Vtmp5, Vtmp6); // high bytes
+          umov(tmp2, Vtmp6, D, 1);
+          fmovd(tmp1, Vtmp6);
           orr(tmp1, tmp1, tmp2);
           cbnz(tmp1, LOOP_8);
           stpq(Vtmp1, Vtmp3, dst);
@@ -5666,8 +5672,8 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
           ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
       }
       prfm(Address(src, SoftwarePrefetchHintDistance));
-      uzp1(v4, T16B, Vtmp1, Vtmp2);
-      uzp1(v5, T16B, Vtmp3, Vtmp4);
+      uzp1(Vtmp5, T16B, Vtmp1, Vtmp2);
+      uzp1(Vtmp6, T16B, Vtmp3, Vtmp4);
       orr(Vtmp1, T16B, Vtmp1, Vtmp2);
       orr(Vtmp3, T16B, Vtmp3, Vtmp4);
       uzp2(Vtmp1, T16B, Vtmp1, Vtmp3); // high bytes
@@ -5675,7 +5681,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
       fmovd(tmp1, Vtmp1);
       orr(tmp1, tmp1, tmp2);
       cbnz(tmp1, LOOP_8);
-      stpq(v4, v5, dst);
+      stpq(Vtmp5, Vtmp6, dst);
       sub(len, len, 32);
       add(dst, dst, 32);
       add(src, src, 64);
@@ -5720,6 +5726,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
 
 
 // Inflate byte[] array to char[].
+// Clobbers: src, dst, len, rflags, rscratch1, v0-v6
 address MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
                                            FloatRegister vtmp1, FloatRegister vtmp2,
                                            FloatRegister vtmp3, Register tmp4) {
@@ -5828,9 +5835,10 @@ address MacroAssembler::byte_array_inflate(Register src, Register dst, Register
 void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
                                          FloatRegister tmp1Reg, FloatRegister tmp2Reg,
                                          FloatRegister tmp3Reg, FloatRegister tmp4Reg,
+                                         FloatRegister tmp5Reg, FloatRegister tmp6Reg,
                                          Register result) {
   encode_iso_array(src, dst, len, result,
-                   tmp1Reg, tmp2Reg, tmp3Reg, tmp4Reg);
+                   tmp1Reg, tmp2Reg, tmp3Reg, tmp4Reg, tmp5Reg, tmp6Reg);
   cmp(len, zr);
   csel(result, result, zr, EQ);
 }
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
index 7e23c16a442..01fdf16a01c 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@@ -1245,12 +1245,14 @@ class MacroAssembler: public Assembler {
   void char_array_compress(Register src, Register dst, Register len,
                            FloatRegister tmp1Reg, FloatRegister tmp2Reg,
                            FloatRegister tmp3Reg, FloatRegister tmp4Reg,
+                           FloatRegister tmp5Reg, FloatRegister tmp6Reg,
                            Register result);
 
   void encode_iso_array(Register src, Register dst,
                         Register len, Register result,
                         FloatRegister Vtmp1, FloatRegister Vtmp2,
-                        FloatRegister Vtmp3, FloatRegister Vtmp4);
+                        FloatRegister Vtmp3, FloatRegister Vtmp4,
+                        FloatRegister Vtmp5, FloatRegister Vtmp6);
   void string_indexof(Register str1, Register str2,
                       Register cnt1, Register cnt2,
                       Register tmp1, Register tmp2,
diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
index bd4b5d7c13f..482784d6b7b 100644
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@@ -4099,6 +4099,7 @@ class StubGenerator: public StubCodeGenerator {
   // result = r0 - return value. Already contains "false"
   // cnt1 = r10 - amount of elements left to check, reduced by wordSize
   // r3-r5 are reserved temporary registers
+  // Clobbers: v0-v7 when UseSIMDForArrayEquals, rscratch1, rscratch2
   address generate_large_array_equals() {
     Register a1 = r1, a2 = r2, result = r0, cnt1 = r10, tmp1 = rscratch1,
         tmp2 = rscratch2, tmp3 = r3, tmp4 = r4, tmp5 = r5, tmp6 = r11,
@@ -4503,6 +4504,8 @@ class StubGenerator: public StubCodeGenerator {
   // R2 = cnt1
   // R3 = str1
   // R4 = cnt2
+  // Clobbers: rscratch1, rscratch2, v0, v1, rflags
+  //
   // This generic linear code use few additional ideas, which makes it faster:
   // 1) we can safely keep at least 1st register of pattern(since length >= 8)
   // in order to skip initial loading(help in systems with 1 ld pipeline)
@@ -4817,6 +4820,7 @@ class StubGenerator: public StubCodeGenerator {
   // R3 = len >> 3
   // V0 = 0
   // v1 = loaded 8 bytes
+  // Clobbers: r0, r1, r3, rscratch1, rflags, v0-v6
   address generate_large_byte_array_inflate() {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "large_byte_array_inflate");
diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/TestIntrinsicsRegStress.java b/test/hotspot/jtreg/compiler/c2/aarch64/TestIntrinsicsRegStress.java
new file mode 100644
index 00000000000..960661b975a
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/c2/aarch64/TestIntrinsicsRegStress.java
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2023, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8307572
+ * @summary Verify vector register clobbering in some aarch64 intrinsics
+ * @library /compiler/patches /test/lib
+ * @build java.base/java.lang.Helper
+ * @run main/othervm -Xbatch -XX:CompileThreshold=100 -XX:-TieredCompilation compiler.c2.aarch64.TestIntrinsicsRegStress
+ */
+
+package compiler.c2.aarch64;
+
+import java.util.Arrays;
+
+public class TestIntrinsicsRegStress {
+
+    final int LENGTH = 1024;
+    final int ITER = 10000;
+    final int NUM = 32;
+
+    byte[] ba;
+    char[] ca;
+    char[] cb;
+    float[] fv;
+
+    String str;
+    String[] strings;
+    String needle = "01234567890123456789";
+
+    public void init() {
+        ca = new char[LENGTH];
+        fv = new float[NUM];
+        strings = new String[NUM];
+        for (int i = 0; i < LENGTH; i++) {
+            ca[i] = (char) ('a' + i % NUM);
+        }
+        cb = ca.clone();
+        str = new String(ca);
+        for (int i = 0; i < NUM; i++) {
+            fv[i] = 1;
+        }
+        for (int i = 0; i < NUM; i++) {
+            strings[i] = str.substring(i) + needle;
+        }
+    }
+
+    public void checkIndexOf(int iter) {
+        float t0 = 0;
+        float t1 = fv[1] * fv[0];
+        float t2 = fv[2] * fv[0];
+        float t3 = fv[3] * fv[0];
+        float t4 = fv[4] * fv[0];
+        float t5 = fv[5] * fv[0];
+        float t6 = fv[6] * fv[0];
+        float t7 = fv[7] * fv[0];
+        float t8 = fv[8] * fv[0];
+        float t9 = fv[9] * fv[0];
+        float t10 = fv[10] * fv[0];
+        float t11 = fv[11] * fv[0];
+        float t12 = fv[12] * fv[0];
+        float t13 = fv[13] * fv[0];
+        float t14 = fv[14] * fv[0];
+        float t15 = fv[15] * fv[0];
+        float t16 = fv[16] * fv[0];
+        float t17 = fv[17] * fv[0];
+        float t18 = fv[18] * fv[0];
+        float t19 = fv[19] * fv[0];
+        float t20 = fv[20] * fv[0];
+        float t21 = fv[21] * fv[0];
+        float t22 = fv[22] * fv[0];
+        float t23 = fv[23] * fv[0];
+        float t24 = fv[24] * fv[0];
+        float t25 = fv[25] * fv[0];
+        float t26 = fv[26] * fv[0];
+        float t27 = fv[27] * fv[0];
+        float t28 = fv[28] * fv[0];
+        float t29 = fv[29] * fv[0];
+        float t30 = fv[30] * fv[0];
+
+        int result = strings[iter % NUM].indexOf(needle);
+
+        if (result > LENGTH - NUM / 2) {
+            // Use fp registers as many as possible and try to make them
+            // live across above intrinsic function.
+            t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
+                    - t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
+                    + t29 - t30; // 0
+        }
+        fv[31] += t0 + t2 - t11 + t16 - t29;
+    }
+
+    public void testIndexOf() {
+        for (int i = 0; i < ITER; i++) {
+            checkIndexOf(i);
+        }
+    }
+
+    public void checkArraysEquals() {
+        float t0 = 0;
+        float t1 = fv[1] * fv[0];
+        float t2 = fv[2] * fv[0];
+        float t3 = fv[3] * fv[0];
+        float t4 = fv[4] * fv[0];
+        float t5 = fv[5] * fv[0];
+        float t6 = fv[6] * fv[0];
+        float t7 = fv[7] * fv[0];
+        float t8 = fv[8] * fv[0];
+        float t9 = fv[9] * fv[0];
+        float t10 = fv[10] * fv[0];
+        float t11 = fv[11] * fv[0];
+        float t12 = fv[12] * fv[0];
+        float t13 = fv[13] * fv[0];
+        float t14 = fv[14] * fv[0];
+        float t15 = fv[15] * fv[0];
+        float t16 = fv[16] * fv[0];
+        float t17 = fv[17] * fv[0];
+        float t18 = fv[18] * fv[0];
+        float t19 = fv[19] * fv[0];
+        float t20 = fv[20] * fv[0];
+        float t21 = fv[21] * fv[0];
+        float t22 = fv[22] * fv[0];
+        float t23 = fv[23] * fv[0];
+        float t24 = fv[24] * fv[0];
+        float t25 = fv[25] * fv[0];
+        float t26 = fv[26] * fv[0];
+        float t27 = fv[27] * fv[0];
+        float t28 = fv[28] * fv[0];
+        float t29 = fv[29] * fv[0];
+        float t30 = fv[30] * fv[0];
+
+        if (Arrays.equals(ca, cb)) {
+            // Use fp registers as many as possible and try to make them
+            // live across above intrinsic function.
+            t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
+                    - t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
+                    + t29 - t30; // 0
+        }
+        fv[31] += t0 + t2 - t11 + t16 - t29;
+    }
+
+    public void testArraysEquals() {
+        for (int i = 0; i < ITER; i++) {
+            checkArraysEquals();
+        }
+    }
+
+    public void checkCompress(int iter) {
+        float t0 = 0;
+        float t1 = fv[1] * fv[0];
+        float t2 = fv[2] * fv[0];
+        float t3 = fv[3] * fv[0];
+        float t4 = fv[4] * fv[0];
+        float t5 = fv[5] * fv[0];
+        float t6 = fv[6] * fv[0];
+        float t7 = fv[7] * fv[0];
+        float t8 = fv[8] * fv[0];
+        float t9 = fv[9] * fv[0];
+        float t10 = fv[10] * fv[0];
+        float t11 = fv[11] * fv[0];
+        float t12 = fv[12] * fv[0];
+        float t13 = fv[13] * fv[0];
+        float t14 = fv[14] * fv[0];
+        float t15 = fv[15] * fv[0];
+        float t16 = fv[16] * fv[0];
+        float t17 = fv[17] * fv[0];
+        float t18 = fv[18] * fv[0];
+        float t19 = fv[19] * fv[0];
+        float t20 = fv[20] * fv[0];
+        float t21 = fv[21] * fv[0];
+        float t22 = fv[22] * fv[0];
+        float t23 = fv[23] * fv[0];
+        float t24 = fv[24] * fv[0];
+        float t25 = fv[25] * fv[0];
+        float t26 = fv[26] * fv[0];
+        float t27 = fv[27] * fv[0];
+        float t28 = fv[28] * fv[0];
+        float t29 = fv[29] * fv[0];
+        float t30 = fv[30] * fv[0];
+
+        ba = Helper.compressChar(ca, 0, LENGTH, 0, LENGTH);
+
+        if (ba[iter % LENGTH] > (byte) ('a' + 5)) {
+            // Use fp registers as many as possible and try to make them
+            // live across above intrinsic function.
+            t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
+                    - t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
+                    + t29 - t30; // 0
+        }
+        fv[31] += t0 + t2 - t11 + t16 - t29;
+    }
+
+    public void testCompress() {
+        for (int i = 0; i < ITER; i++) {
+            checkCompress(i);
+        }
+    }
+
+    public void checkInflate(int iter) {
+        float t0 = 0;
+        float t1 = fv[1] * fv[0];
+        float t2 = fv[2] * fv[0];
+        float t3 = fv[3] * fv[0];
+        float t4 = fv[4] * fv[0];
+        float t5 = fv[5] * fv[0];
+        float t6 = fv[6] * fv[0];
+        float t7 = fv[7] * fv[0];
+        float t8 = fv[8] * fv[0];
+        float t9 = fv[9] * fv[0];
+        float t10 = fv[10] * fv[0];
+        float t11 = fv[11] * fv[0];
+        float t12 = fv[12] * fv[0];
+        float t13 = fv[13] * fv[0];
+        float t14 = fv[14] * fv[0];
+        float t15 = fv[15] * fv[0];
+        float t16 = fv[16] * fv[0];
+        float t17 = fv[17] * fv[0];
+        float t18 = fv[18] * fv[0];
+        float t19 = fv[19] * fv[0];
+        float t20 = fv[20] * fv[0];
+        float t21 = fv[21] * fv[0];
+        float t22 = fv[22] * fv[0];
+        float t23 = fv[23] * fv[0];
+        float t24 = fv[24] * fv[0];
+        float t25 = fv[25] * fv[0];
+        float t26 = fv[26] * fv[0];
+        float t27 = fv[27] * fv[0];
+        float t28 = fv[28] * fv[0];
+        float t29 = fv[29] * fv[0];
+        float t30 = fv[30] * fv[0];
+
+        str.getChars(0, LENGTH, ca, 0);
+
+        if (ca[iter % LENGTH] > (byte) ('a' + NUM / 2)) {
+            // Use fp registers as many as possible and try to make them
+            // live across above intrinsic function.
+            t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
+                    - t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
+                    + t29 - t30; // 0
+        }
+        fv[31] += t0 + t2 - t11 + t16 - t29;
+    }
+
+    public void testInflate() {
+        for (int i = 0; i < ITER; i++) {
+            checkInflate(i);
+        }
+    }
+
+    public void verifyAndReset() {
+        if (fv[31] != 1.0) {
+            throw new RuntimeException("Failed with " + Float.toString(fv[31]));
+        } else {
+            System.out.println("Success!");
+        }
+        fv[31] = 1.0f;
+    }
+
+    public static void main(String[] args) {
+        TestIntrinsicsRegStress t = new TestIntrinsicsRegStress();
+        t.init();
+
+        t.testIndexOf();
+        t.verifyAndReset();
+
+        t.testArraysEquals();
+        t.verifyAndReset();
+
+        t.testCompress();
+        t.verifyAndReset();
+
+        t.testInflate();
+        t.verifyAndReset();
+    }
+}