diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad
index d217fb7e94987..71701196af538 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
 // Copyright (c) 2020, 2024, Arm Limited. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
@@ -204,12 +204,6 @@ source %{
           return false;
         }
         break;
-      case Op_VectorLoadShuffle:
-      case Op_VectorRearrange:
-        if (vlen < 4) {
-          return false;
-        }
-        break;
       case Op_ExpandV:
         if (UseSVE < 2 || is_subword_type(bt)) {
           return false;
@@ -6156,61 +6150,24 @@ instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{
 
 // ------------------------------ Vector rearrange -----------------------------
 
-// Here is an example that rearranges a NEON vector with 4 ints:
-// Rearrange V1 int[a0, a1, a2, a3] to V2 int[a2, a3, a0, a1]
-//   1. Get the indices of V1 and store them as Vi byte[0, 1, 2, 3].
-//   2. Convert Vi byte[0, 1, 2, 3] to the indices of V2 and also store them as Vi byte[2, 3, 0, 1].
-//   3. Unsigned extend Long Vi from byte[2, 3, 0, 1] to int[2, 3, 0, 1].
-//   4. Multiply Vi int[2, 3, 0, 1] with constant int[0x04040404, 0x04040404, 0x04040404, 0x04040404]
-//      and get tbl base Vm int[0x08080808, 0x0c0c0c0c, 0x00000000, 0x04040404].
-//   5. Add Vm with constant int[0x03020100, 0x03020100, 0x03020100, 0x03020100]
-//      and get tbl index Vm int[0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504]
-//   6. Use Vm as index register, and use V1 as table register.
-//      Then get V2 as the result by tbl NEON instructions.
-// Notes:
-//   Step 1 matches VectorLoadConst.
-//   Step 3 matches VectorLoadShuffle.
-//   Step 4, 5, 6 match VectorRearrange.
-//   For VectorRearrange short/int, the reason why such complex calculation is
-//   required is because NEON tbl supports bytes table only, so for short/int, we
-//   need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
-//   to implement rearrange.
-
-// Maybe move the shuffle preparation to VectorLoadShuffle
-instruct rearrange_HS_neon(vReg dst, vReg src, vReg shuffle, vReg tmp1, vReg tmp2) %{
-  predicate(UseSVE == 0 &&
-            (Matcher::vector_element_basic_type(n) == T_SHORT ||
-             (type2aelembytes(Matcher::vector_element_basic_type(n)) == 4 &&
-              Matcher::vector_length_in_bytes(n) == 16)));
+instruct rearrange_HSD_neon(vReg dst, vReg src, vReg shuffle, vReg tmp) %{
+  predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n) != T_BYTE);
   match(Set dst (VectorRearrange src shuffle));
-  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
-  format %{ "rearrange_HS_neon $dst, $src, $shuffle\t# vector (4S/8S/4I/4F). KILL $tmp1, $tmp2" %}
+  effect(TEMP_DEF dst, TEMP tmp);
+  format %{ "rearrange_HSD_neon $dst, $src, $shuffle\t# vector (4H/8H/2S/4S/2D). KILL $tmp" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this);
-    if (bt == T_SHORT) {
-      uint length_in_bytes = Matcher::vector_length_in_bytes(this);
-      assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
-      Assembler::SIMD_Arrangement size1 = length_in_bytes == 16 ? __ T16B : __ T8B;
-      Assembler::SIMD_Arrangement size2 = length_in_bytes == 16 ? __ T8H : __ T4H;
-      __ mov($tmp1$$FloatRegister, size1, 0x02);
-      __ mov($tmp2$$FloatRegister, size2, 0x0100);
-      __ mulv($dst$$FloatRegister, size2, $shuffle$$FloatRegister, $tmp1$$FloatRegister);
-      __ addv($dst$$FloatRegister, size1, $dst$$FloatRegister, $tmp2$$FloatRegister);
-      __ tbl($dst$$FloatRegister, size1, $src$$FloatRegister, 1, $dst$$FloatRegister);
-    } else {
-      assert(bt == T_INT || bt == T_FLOAT, "unsupported type");
-      __ mov($tmp1$$FloatRegister, __ T16B, 0x04);
-      __ mov($tmp2$$FloatRegister, __ T4S, 0x03020100);
-      __ mulv($dst$$FloatRegister, __ T4S, $shuffle$$FloatRegister, $tmp1$$FloatRegister);
-      __ addv($dst$$FloatRegister, __ T16B, $dst$$FloatRegister, $tmp2$$FloatRegister);
-      __ tbl($dst$$FloatRegister, __ T16B, $src$$FloatRegister, 1, $dst$$FloatRegister);
-    }
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
+    __ neon_rearrange_hsd($dst$$FloatRegister, $src$$FloatRegister,
+                          $shuffle$$FloatRegister, $tmp$$FloatRegister,
+                          bt, length_in_bytes == 16);
   %}
   ins_pipe(pipe_slow);
 %}
 
 instruct rearrange(vReg dst, vReg src, vReg shuffle) %{
-  predicate(Matcher::vector_element_basic_type(n) == T_BYTE || UseSVE > 0);
+  predicate(UseSVE > 0 || Matcher::vector_element_basic_type(n) == T_BYTE);
   match(Set dst (VectorRearrange src shuffle));
   format %{ "rearrange $dst, $src, $shuffle" %}
   ins_encode %{
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
index 422e98d9b681a..575a37608fdc1 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
 // Copyright (c) 2020, 2024, Arm Limited. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
@@ -194,12 +194,6 @@ source %{
           return false;
         }
         break;
-      case Op_VectorLoadShuffle:
-      case Op_VectorRearrange:
-        if (vlen < 4) {
-          return false;
-        }
-        break;
       case Op_ExpandV:
         if (UseSVE < 2 || is_subword_type(bt)) {
           return false;
@@ -4403,61 +4397,24 @@ instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{
 
 // ------------------------------ Vector rearrange -----------------------------
 
-// Here is an example that rearranges a NEON vector with 4 ints:
-// Rearrange V1 int[a0, a1, a2, a3] to V2 int[a2, a3, a0, a1]
-//   1. Get the indices of V1 and store them as Vi byte[0, 1, 2, 3].
-//   2. Convert Vi byte[0, 1, 2, 3] to the indices of V2 and also store them as Vi byte[2, 3, 0, 1].
-//   3. Unsigned extend Long Vi from byte[2, 3, 0, 1] to int[2, 3, 0, 1].
-//   4. Multiply Vi int[2, 3, 0, 1] with constant int[0x04040404, 0x04040404, 0x04040404, 0x04040404]
-//      and get tbl base Vm int[0x08080808, 0x0c0c0c0c, 0x00000000, 0x04040404].
-//   5. Add Vm with constant int[0x03020100, 0x03020100, 0x03020100, 0x03020100]
-//      and get tbl index Vm int[0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504]
-//   6. Use Vm as index register, and use V1 as table register.
-//      Then get V2 as the result by tbl NEON instructions.
-// Notes:
-//   Step 1 matches VectorLoadConst.
-//   Step 3 matches VectorLoadShuffle.
-//   Step 4, 5, 6 match VectorRearrange.
-//   For VectorRearrange short/int, the reason why such complex calculation is
-//   required is because NEON tbl supports bytes table only, so for short/int, we
-//   need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
-//   to implement rearrange.
-
-// Maybe move the shuffle preparation to VectorLoadShuffle
-instruct rearrange_HS_neon(vReg dst, vReg src, vReg shuffle, vReg tmp1, vReg tmp2) %{
-  predicate(UseSVE == 0 &&
-            (Matcher::vector_element_basic_type(n) == T_SHORT ||
-             (type2aelembytes(Matcher::vector_element_basic_type(n)) == 4 &&
-              Matcher::vector_length_in_bytes(n) == 16)));
+instruct rearrange_HSD_neon(vReg dst, vReg src, vReg shuffle, vReg tmp) %{
+  predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n) != T_BYTE);
   match(Set dst (VectorRearrange src shuffle));
-  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
-  format %{ "rearrange_HS_neon $dst, $src, $shuffle\t# vector (4S/8S/4I/4F). KILL $tmp1, $tmp2" %}
+  effect(TEMP_DEF dst, TEMP tmp);
+  format %{ "rearrange_HSD_neon $dst, $src, $shuffle\t# vector (4H/8H/2S/4S/2D). KILL $tmp" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this);
-    if (bt == T_SHORT) {
-      uint length_in_bytes = Matcher::vector_length_in_bytes(this);
-      assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
-      Assembler::SIMD_Arrangement size1 = length_in_bytes == 16 ? __ T16B : __ T8B;
-      Assembler::SIMD_Arrangement size2 = length_in_bytes == 16 ? __ T8H : __ T4H;
-      __ mov($tmp1$$FloatRegister, size1, 0x02);
-      __ mov($tmp2$$FloatRegister, size2, 0x0100);
-      __ mulv($dst$$FloatRegister, size2, $shuffle$$FloatRegister, $tmp1$$FloatRegister);
-      __ addv($dst$$FloatRegister, size1, $dst$$FloatRegister, $tmp2$$FloatRegister);
-      __ tbl($dst$$FloatRegister, size1, $src$$FloatRegister, 1, $dst$$FloatRegister);
-    } else {
-      assert(bt == T_INT || bt == T_FLOAT, "unsupported type");
-      __ mov($tmp1$$FloatRegister, __ T16B, 0x04);
-      __ mov($tmp2$$FloatRegister, __ T4S, 0x03020100);
-      __ mulv($dst$$FloatRegister, __ T4S, $shuffle$$FloatRegister, $tmp1$$FloatRegister);
-      __ addv($dst$$FloatRegister, __ T16B, $dst$$FloatRegister, $tmp2$$FloatRegister);
-      __ tbl($dst$$FloatRegister, __ T16B, $src$$FloatRegister, 1, $dst$$FloatRegister);
-    }
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
+    __ neon_rearrange_hsd($dst$$FloatRegister, $src$$FloatRegister,
+                          $shuffle$$FloatRegister, $tmp$$FloatRegister,
+                          bt, length_in_bytes == 16);
   %}
   ins_pipe(pipe_slow);
 %}
 
 instruct rearrange(vReg dst, vReg src, vReg shuffle) %{
-  predicate(Matcher::vector_element_basic_type(n) == T_BYTE || UseSVE > 0);
+  predicate(UseSVE > 0 || Matcher::vector_element_basic_type(n) == T_BYTE);
   match(Set dst (VectorRearrange src shuffle));
   format %{ "rearrange $dst, $src, $shuffle" %}
   ins_encode %{
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
index 97cd00e652279..605a05a44a731 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@@ -2545,6 +2545,64 @@ void C2_MacroAssembler::neon_reverse_bytes(FloatRegister dst, FloatRegister src,
   }
 }
 
+// VectorRearrange implementation for short/int/float/long/double types with NEON
+// instructions. For VectorRearrange short/int/float, we use NEON tbl instruction.
+// But since it supports bytes table only, we need to lookup 2/4 bytes as a group.
+// For VectorRearrange long/double, we compare the shuffle input with iota indices,
+// and use bsl to implement the operation.
+void C2_MacroAssembler::neon_rearrange_hsd(FloatRegister dst, FloatRegister src,
+                                           FloatRegister shuffle, FloatRegister tmp,
+                                           BasicType bt, bool isQ) {
+  assert_different_registers(dst, src, shuffle, tmp);
+  SIMD_Arrangement size1 = isQ ? T16B : T8B;
+  SIMD_Arrangement size2 = esize2arrangement((uint)type2aelembytes(bt), isQ);
+
+  // Here is an example that rearranges a NEON vector with 4 ints:
+  // Rearrange V1 int[a0, a1, a2, a3] to V2 int[a2, a3, a0, a1]
+  //   1. We assume the shuffle input is Vi int[2, 3, 0, 1].
+  //   2. Multiply Vi int[2, 3, 0, 1] with constant int vector
+  //      [0x04040404, 0x04040404, 0x04040404, 0x04040404], and get
+  //      tbl base Vm int[0x08080808, 0x0c0c0c0c, 0x00000000, 0x04040404].
+  //   3. Add Vm with constant int[0x03020100, 0x03020100, 0x03020100, 0x03020100],
+  //      and get tbl index Vm int[0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504]
+  //   4. Use Vm as index register, and use V1 as table register.
+  //      Then get V2 as the result by tbl NEON instructions.
+  switch (bt) {
+    case T_SHORT:
+      mov(tmp, size1, 0x02);
+      mulv(dst, size2, shuffle, tmp);
+      mov(tmp, size2, 0x0100);
+      addv(dst, size1, dst, tmp);
+      tbl(dst, size1, src, 1, dst);
+      break;
+    case T_INT:
+    case T_FLOAT:
+      mov(tmp, size1, 0x04);
+      mulv(dst, size2, shuffle, tmp);
+      mov(tmp, size2, 0x03020100);
+      addv(dst, size1, dst, tmp);
+      tbl(dst, size1, src, 1, dst);
+      break;
+    case T_LONG:
+    case T_DOUBLE:
+      // Load the iota indices for Long type. The indices are ordered by
+      // type B/S/I/L/F/D, and the offset between two types is 16; Hence
+      // the offset for L is 48.
+      lea(rscratch1,
+          ExternalAddress(StubRoutines::aarch64::vector_iota_indices() + 48));
+      ldrq(tmp, rscratch1);
+      // Check whether the input "shuffle" is the same with iota indices.
+      // Return "src" if true, otherwise swap the two elements of "src".
+      cm(EQ, dst, size2, shuffle, tmp);
+      ext(tmp, size1, src, src, 8);
+      bsl(dst, size1, src, tmp);
+      break;
+    default:
+      assert(false, "unsupported element type");
+      ShouldNotReachHere();
+  }
+}
+
 // Extract a scalar element from an sve vector at position 'idx'.
 // The input elements in src are expected to be of integral type.
 void C2_MacroAssembler::sve_extract_integral(Register dst, BasicType bt, FloatRegister src,
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
index d61b050407d21..e0eaa0b76e6e9 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -179,6 +179,8 @@
 
   void neon_reverse_bytes(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ);
 
+  void neon_rearrange_hsd(FloatRegister dst, FloatRegister src, FloatRegister shuffle,
+                          FloatRegister tmp, BasicType bt, bool isQ);
   // java.lang.Math::signum intrinsics
   void vector_signum_neon(FloatRegister dst, FloatRegister src, FloatRegister zero,
                           FloatRegister one, SIMD_Arrangement T);
diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
index 0a5f6c63c7696..76faf7c9d69ed 100644
--- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
+++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
@@ -334,6 +334,36 @@ public class IRNode {
         beforeMatchingNameRegex(OPAQUE_MULTIVERSIONING, "OpaqueMultiversioning");
     }
 
+    public static final String REARRANGE_VB = VECTOR_PREFIX + "REARRANGE_VB" + POSTFIX;
+    static {
+        vectorNode(REARRANGE_VB, "VectorRearrange", TYPE_BYTE);
+    }
+
+    public static final String REARRANGE_VS = VECTOR_PREFIX + "REARRANGE_VS" + POSTFIX;
+    static {
+        vectorNode(REARRANGE_VS, "VectorRearrange", TYPE_SHORT);
+    }
+
+    public static final String REARRANGE_VI = VECTOR_PREFIX + "REARRANGE_VI" + POSTFIX;
+    static {
+        vectorNode(REARRANGE_VI, "VectorRearrange", TYPE_INT);
+    }
+
+    public static final String REARRANGE_VL = VECTOR_PREFIX + "REARRANGE_VL" + POSTFIX;
+    static {
+        vectorNode(REARRANGE_VL, "VectorRearrange", TYPE_LONG);
+    }
+
+    public static final String REARRANGE_VF = VECTOR_PREFIX + "REARRANGE_VF" + POSTFIX;
+    static {
+        vectorNode(REARRANGE_VF, "VectorRearrange", TYPE_FLOAT);
+    }
+
+    public static final String REARRANGE_VD = VECTOR_PREFIX + "REARRANGE_VD" + POSTFIX;
+    static {
+        vectorNode(REARRANGE_VD, "VectorRearrange", TYPE_DOUBLE);
+    }
+
     public static final String ADD_P_OF = COMPOSITE_PREFIX + "ADD_P_OF" + POSTFIX;
     static {
         String regex = START + "addP_" + IS_REPLACED + MID + ".*" + END;
diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorRearrangeTest.java b/test/hotspot/jtreg/compiler/vectorapi/VectorRearrangeTest.java
new file mode 100644
index 0000000000000..f2d172b888812
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/vectorapi/VectorRearrangeTest.java
@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8350463
+ * @summary AArch64: Add vector rearrange support for small lane count vectors
+ * @modules jdk.incubator.vector
+ * @library /test/lib /
+ *
+ * @run driver compiler.vectorapi.VectorRearrangeTest
+ */
+
+package compiler.vectorapi;
+
+import compiler.lib.generators.*;
+import compiler.lib.ir_framework.*;
+import jdk.incubator.vector.*;
+import jdk.test.lib.Asserts;
+
+public class VectorRearrangeTest {
+    private static final int LENGTH = 1024;
+    private static final Generators random = Generators.G;
+
+    private static final VectorSpecies<Byte> bspec128    = ByteVector.SPECIES_128;
+    private static final VectorSpecies<Short> sspec128   = ShortVector.SPECIES_128;
+    private static final VectorSpecies<Integer> ispec128 = IntVector.SPECIES_128;
+    private static final VectorSpecies<Long> lspec128    = LongVector.SPECIES_128;
+    private static final VectorSpecies<Float> fspec128   = FloatVector.SPECIES_128;
+    private static final VectorSpecies<Double> dspec128  = DoubleVector.SPECIES_128;
+    private static final VectorSpecies<Byte> bspec64     = ByteVector.SPECIES_64;
+    private static final VectorSpecies<Short> sspec64    = ShortVector.SPECIES_64;
+    private static final VectorSpecies<Integer> ispec64  = IntVector.SPECIES_64;
+    private static final VectorSpecies<Float> fspec64    = FloatVector.SPECIES_64;
+
+    private static byte[]   bsrc;
+    private static short[]  ssrc;
+    private static int[]    isrc;
+    private static long[]   lsrc;
+    private static float[]  fsrc;
+    private static double[] dsrc;
+
+    private static byte[]   bdst;
+    private static short[]  sdst;
+    private static int[]    idst;
+    private static long[]   ldst;
+    private static float[]  fdst;
+    private static double[] ddst;
+
+    private static int[][] indexes;
+
+    static {
+        bsrc = new byte[LENGTH];
+        ssrc = new short[LENGTH];
+        isrc = new int[LENGTH];
+        lsrc = new long[LENGTH];
+        fsrc = new float[LENGTH];
+        dsrc = new double[LENGTH];
+        bdst = new byte[LENGTH];
+        sdst = new short[LENGTH];
+        idst = new int[LENGTH];
+        ldst = new long[LENGTH];
+        fdst = new float[LENGTH];
+        ddst = new double[LENGTH];
+
+        Generator<Integer> byteGen = random.uniformInts(Byte.MIN_VALUE, Byte.MAX_VALUE);
+        Generator<Integer> shortGen = random.uniformInts(Short.MIN_VALUE, Short.MAX_VALUE);
+        for (int i = 0; i < LENGTH; i++) {
+            bsrc[i] = byteGen.next().byteValue();
+            ssrc[i] = shortGen.next().shortValue();
+        }
+        random.fill(random.ints(), isrc);
+        random.fill(random.longs(), lsrc);
+        random.fill(random.floats(), fsrc);
+        random.fill(random.doubles(), dsrc);
+
+        int[] nums = {2, 4, 8, 16};
+        indexes = new int[4][];
+        for (int i = 0; i < 4; i++) {
+            indexes[i] = new int[nums[i]];
+            random.fill(random.uniformInts(0, nums[i] - 1), indexes[i]);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.REARRANGE_VB, IRNode.VECTOR_SIZE_8, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    public void rearrange_byte64() {
+        VectorShuffle<Byte> shuffle = VectorShuffle.fromArray(bspec64, indexes[2], 0);
+        for (int i = 0; i < LENGTH; i += bspec64.length()) {
+            ByteVector.fromArray(bspec64, bsrc, i)
+                      .rearrange(shuffle)
+                      .intoArray(bdst, i);
+        }
+    }
+
+    @Check(test = "rearrange_byte64")
+    public void rearrange_byte64_verify() {
+        for (int i = 0; i < LENGTH; i += bspec64.length()) {
+            for (int j = 0; j < bspec64.length(); j++) {
+                Asserts.assertEquals(bsrc[indexes[2][j] + i], bdst[i + j]);
+            }
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.REARRANGE_VB, IRNode.VECTOR_SIZE_16, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    public void rearrange_byte128() {
+        VectorShuffle<Byte> shuffle = VectorShuffle.fromArray(bspec128, indexes[3], 0);
+        for (int i = 0; i < LENGTH; i += bspec128.length()) {
+            ByteVector.fromArray(bspec128, bsrc, i)
+                      .rearrange(shuffle)
+                      .intoArray(bdst, i);
+        }
+    }
+
+    @Check(test = "rearrange_byte128")
+    public void rearrange_byte128_verify() {
+        for (int i = 0; i < LENGTH; i += bspec128.length()) {
+            for (int j = 0; j < bspec128.length(); j++) {
+                Asserts.assertEquals(bsrc[indexes[3][j] + i], bdst[i + j]);
+            }
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.REARRANGE_VS, IRNode.VECTOR_SIZE_4, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    public void rearrange_short64() {
+        VectorShuffle<Short> shuffle = VectorShuffle.fromArray(sspec64, indexes[1], 0);
+        for (int i = 0; i < LENGTH; i += sspec64.length()) {
+            ShortVector.fromArray(sspec64, ssrc, i)
+                       .rearrange(shuffle)
+                       .intoArray(sdst, i);
+        }
+    }
+
+    @Check(test = "rearrange_short64")
+    public void rearrange_short64_verify() {
+        for (int i = 0; i < LENGTH; i += sspec64.length()) {
+            for (int j = 0; j < sspec64.length(); j++) {
+                Asserts.assertEquals(ssrc[indexes[1][j] + i], sdst[i + j]);
+            }
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.REARRANGE_VS, IRNode.VECTOR_SIZE_8, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    public void rearrange_short128() {
+        VectorShuffle<Short> shuffle = VectorShuffle.fromArray(sspec128, indexes[2], 0);
+        for (int i = 0; i < LENGTH; i += sspec128.length()) {
+            ShortVector.fromArray(sspec128, ssrc, i)
+                       .rearrange(shuffle)
+                       .intoArray(sdst, i);
+        }
+    }
+
+    @Check(test = "rearrange_short128")
+    public void rearrange_short128_verify() {
+        for (int i = 0; i < LENGTH; i += sspec128.length()) {
+            for (int j = 0; j < sspec128.length(); j++) {
+                Asserts.assertEquals(ssrc[indexes[2][j] + i], sdst[i + j]);
+            }
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.REARRANGE_VI, IRNode.VECTOR_SIZE_2, " >0 "}, applyIfCPUFeature = {"asimd", "true"})
+    public void rearrange_int64() {
+        VectorShuffle<Integer> shuffle = VectorShuffle.fromArray(ispec64, indexes[0], 0);
+        for (int i = 0; i < LENGTH; i += ispec64.length()) {
+            IntVector.fromArray(ispec64, isrc, i)
+                     .rearrange(shuffle)
+                     .intoArray(idst, i);
+        }
+    }
+
+    @Check(test = "rearrange_int64")
+    public void rearrange_int64_verify() {
+        for (int i = 0; i < LENGTH; i += ispec64.length()) {
+            for (int j = 0; j < ispec64.length(); j++) {
+                Asserts.assertEquals(isrc[indexes[0][j] + i], idst[i + j]);
+            }
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.REARRANGE_VI, IRNode.VECTOR_SIZE_4, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    public void rearrange_int128() {
+        VectorShuffle<Integer> shuffle = VectorShuffle.fromArray(ispec128, indexes[1], 0);
+        for (int i = 0; i < LENGTH; i += ispec128.length()) {
+            IntVector.fromArray(ispec128, isrc, i)
+                     .rearrange(shuffle)
+                     .intoArray(idst, i);
+        }
+    }
+
+    @Check(test = "rearrange_int128")
+    public void rearrange_int128_verify() {
+        for (int i = 0; i < LENGTH; i += ispec128.length()) {
+            for (int j = 0; j < ispec128.length(); j++) {
+                Asserts.assertEquals(isrc[indexes[1][j] + i], idst[i + j]);
+            }
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.REARRANGE_VL, IRNode.VECTOR_SIZE_2, " >0 "}, applyIfCPUFeature = {"asimd", "true"})
+    public void rearrange_long128() {
+        VectorShuffle<Long> shuffle = VectorShuffle.fromArray(lspec128, indexes[0], 0);
+        for (int i = 0; i < LENGTH; i += lspec128.length()) {
+            LongVector.fromArray(lspec128, lsrc, i)
+                      .rearrange(shuffle)
+                      .intoArray(ldst, i);
+        }
+    }
+
+    @Check(test = "rearrange_long128")
+    public void rearrange_long128_verify() {
+        for (int i = 0; i < LENGTH; i += lspec128.length()) {
+            for (int j = 0; j < lspec128.length(); j++) {
+                Asserts.assertEquals(lsrc[indexes[0][j] + i], ldst[i + j]);
+            }
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.REARRANGE_VF, IRNode.VECTOR_SIZE_2, " >0 "}, applyIfCPUFeature = {"asimd", "true"})
+    public void rearrange_float64() {
+        VectorShuffle<Float> shuffle = VectorShuffle.fromArray(fspec64, indexes[0], 0);
+        for (int i = 0; i < LENGTH; i += fspec64.length()) {
+            FloatVector.fromArray(fspec64, fsrc, i)
+                       .rearrange(shuffle)
+                       .intoArray(fdst, i);
+        }
+    }
+
+    @Check(test = "rearrange_float64")
+    public void rearrange_float64_verify() {
+        for (int i = 0; i < LENGTH; i += fspec64.length()) {
+            for (int j = 0; j < fspec64.length(); j++) {
+                Asserts.assertEquals(fsrc[indexes[0][j] + i], fdst[i + j]);
+            }
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.REARRANGE_VF, IRNode.VECTOR_SIZE_4, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    public void rearrange_float128() {
+        VectorShuffle<Float> shuffle = VectorShuffle.fromArray(fspec128, indexes[1], 0);
+        for (int i = 0; i < LENGTH; i += fspec128.length()) {
+            FloatVector.fromArray(fspec128, fsrc, i)
+                       .rearrange(shuffle)
+                       .intoArray(fdst, i);
+        }
+    }
+
+    @Check(test = "rearrange_float128")
+    public void rearrange_float128_verify() {
+        for (int i = 0; i < LENGTH; i += fspec128.length()) {
+            for (int j = 0; j < fspec128.length(); j++) {
+                Asserts.assertEquals(fsrc[indexes[1][j] + i], fdst[i + j]);
+            }
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.REARRANGE_VD, IRNode.VECTOR_SIZE_2, " >0 "}, applyIfCPUFeature = {"asimd", "true"})
+    public void rearrange_double128() {
+        VectorShuffle<Double> shuffle = VectorShuffle.fromArray(dspec128, indexes[0], 0);
+        for (int i = 0; i < LENGTH; i += dspec128.length()) {
+            DoubleVector.fromArray(dspec128, dsrc, i)
+                        .rearrange(shuffle)
+                        .intoArray(ddst, i);
+        }
+    }
+
+    @Check(test = "rearrange_double128")
+    public void rearrange_double128_verify() {
+        for (int i = 0; i < LENGTH; i += dspec128.length()) {
+            for (int j = 0; j < dspec128.length(); j++) {
+                Asserts.assertEquals(dsrc[indexes[0][j] + i], ddst[i + j]);
+            }
+        }
+    }
+
+    public static void main(String[] args) {
+        TestFramework testFramework = new TestFramework();
+        testFramework.setDefaultWarmup(5000)
+                     .addFlags("--add-modules=jdk.incubator.vector")
+                     .start();
+    }
+}