From 1895cf3112d341b0ae8beb6dd9d332f6a2c5d5fc Mon Sep 17 00:00:00 2001
From: Faye Gao <faye.gao@arm.com>
Date: Wed, 22 Nov 2023 07:24:02 +0000
Subject: [PATCH 1/4] 8319690: [AArch64] C2 compilation hits
 offset_ok_for_immed: assert "c2 compiler bug"

On LP64 systems, if the heap can be moved into low virtual
address space (below 4GB) and the heap size is smaller than the
interesting threshold of 4 GB, we can use unscaled decoding
pattern for narrow klass decoding. It means that a generic field
reference can be decoded by:
```
cast<64> (32-bit compressed reference) + field_offset
```

When the `field_offset` is an immediate, on aarch64 platform, the
unscaled decoding pattern can match perfectly with a direct
addressing mode, i.e., `base_plus_offset`, supported by LDR/STR
instructions. But for certain data width, not all immediates can
be encoded in the instruction field of LDR/STR[1]. The ranges are
different as data widths vary.

For example, when we try to load a value of long type at offset of
`1030`, the address expression is `(AddP (DecodeN base) 1030)`.
Before the patch, the expression was matching with
`operand indOffIN()`. But, for 64-bit LDR/STR, signed immediate
byte offset must be in the range -256 to 255 or positive immediate
byte offset must be a multiple of 8 in the range 0 to 32760[2].
`1030` can't be encoded in the instruction field. So, after
matching, when we do checking for instruction encoding, the
assertion would fail.

In this patch, we're going to filter out invalid immediates
when deciding if current addressing mode can be matched as
`base_plus_offset`. We introduce `indOffIN4/indOffLN4` and
`indOffIN8/indOffLN8` for 32-bit data type and 64-bit data
type separately in the patch. E.g., for `memory4`, we remove
the generic `indOffIN/indOffLN`, which matches wrong unscaled
immediate range, and replace them with `indOffIN4/indOffLN4`
instead.

Since 8-bit and 16-bit LDR/STR instructions also support the
unscaled decoding pattern, we add the addressing mode in the
lists of `memory1` and `memory2` by introducing
`indOffIN1/indOffLN1` and `indOffIN2/indOffLN2`.

We also remove unused operands `indOffI/indOffl/indOffIN/indOffLN`
to avoid misuse.

Tier 1-3 passed on aarch64.

[1] https://github.com/openjdk/jdk/blob/8db7bad992a0f31de9c7e00c2657c18670539102/src/hotspot/cpu/aarch64/assembler_aarch64.inline.hpp#L33
[2] https://developer.arm.com/documentation/ddi0602/2023-09/Base-Instructions/LDR--immediate---Load-Register--immediate--?lang=en
---
 src/hotspot/cpu/aarch64/aarch64.ad            | 132 ++++++++++----
 .../TestUnalignedAccessCompressedOops.java    | 172 ++++++++++++++++++
 2 files changed, 269 insertions(+), 35 deletions(-)
 create mode 100644 test/hotspot/jtreg/compiler/c2/aarch64/TestUnalignedAccessCompressedOops.java

diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index 233f9b6af7c14..5b516746767c1 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -5666,20 +5666,6 @@ operand indIndex(iRegP reg, iRegL lreg)
   %}
 %}
 
-operand indOffI(iRegP reg, immIOffset off)
-%{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP reg off);
-  op_cost(0);
-  format %{ "[$reg, $off]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
 operand indOffI1(iRegP reg, immIOffset1 off)
 %{
   constraint(ALLOC_IN_RC(ptr_reg));
@@ -5750,20 +5736,6 @@ operand indOffI16(iRegP reg, immIOffset16 off)
   %}
 %}
 
-operand indOffL(iRegP reg, immLoffset off)
-%{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP reg off);
-  op_cost(0);
-  format %{ "[$reg, $off]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
 operand indOffL1(iRegP reg, immLoffset1 off)
 %{
   constraint(ALLOC_IN_RC(ptr_reg));
@@ -5909,7 +5881,7 @@ operand indIndexN(iRegN reg, iRegL lreg)
   %}
 %}
 
-operand indOffIN(iRegN reg, immIOffset off)
+operand indOffIN1(iRegN reg, immIOffset1 off)
 %{
   predicate(CompressedOops::shift() == 0);
   constraint(ALLOC_IN_RC(ptr_reg));
@@ -5924,7 +5896,7 @@ operand indOffIN(iRegN reg, immIOffset off)
   %}
 %}
 
-operand indOffLN(iRegN reg, immLoffset off)
+operand indOffLN1(iRegN reg, immLoffset1 off)
 %{
   predicate(CompressedOops::shift() == 0);
   constraint(ALLOC_IN_RC(ptr_reg));
@@ -5939,6 +5911,95 @@ operand indOffLN(iRegN reg, immLoffset off)
   %}
 %}
 
+operand indOffIN2(iRegN reg, immIOffset2 off)
+%{
+  predicate(CompressedOops::shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffLN2(iRegN reg, immLoffset2 off)
+%{
+  predicate(CompressedOops::shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffIN4(iRegN reg, immIOffset4 off)
+%{
+  predicate(CompressedOops::shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffLN4(iRegN reg, immLoffset4 off)
+%{
+  predicate(CompressedOops::shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffIN8(iRegN reg, immIOffset8 off)
+%{
+  predicate(CompressedOops::shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffLN8(iRegN reg, immLoffset8 off)
+%{
+  predicate(CompressedOops::shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
 
 
 // AArch64 opto stubs need to write to the pc slot in the thread anchor
@@ -6189,21 +6250,22 @@ opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
 // instruction defs. we can turn a memory op into an Address
 
 opclass memory1(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI1, indOffL1,
-               indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
+                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN1, indOffLN1);
 
 opclass memory2(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI2, indOffL2,
-               indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
+                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN2, indOffLN2);
 
 opclass memory4(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI4, indOffL4,
-               indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
+                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN4, indOffLN4);
 
 opclass memory8(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI8, indOffL8,
-               indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
+                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN8, indOffLN8);
 
 // All of the memory operands. For the pipeline description.
 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex,
                indOffI1, indOffL1, indOffI2, indOffL2, indOffI4, indOffL4, indOffI8, indOffL8,
-               indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
+               indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN,
+               indOffIN1, indOffLN1, indOffIN2, indOffLN2, indOffIN4, indOffLN4, indOffIN8, indOffLN8);
 
 
 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/TestUnalignedAccessCompressedOops.java b/test/hotspot/jtreg/compiler/c2/aarch64/TestUnalignedAccessCompressedOops.java
new file mode 100644
index 0000000000000..11a260a628db0
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/c2/aarch64/TestUnalignedAccessCompressedOops.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2023, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.c2.aarch64;
+
+import jdk.internal.misc.Unsafe;
+import jdk.test.lib.Asserts;
+
+/**
+ * @test TestUnalignedAccessCompressedOops
+ * @summary AArch64: C2 compilation hits offset_ok_for_immed: assert "c2 compiler bug".
+ * @bug 8319690
+ * @library /test/lib
+ * @modules java.base/jdk.internal.misc
+ * @requires os.arch=="aarch64" & vm.compiler2.enabled
+ * @run main/othervm compiler.c2.aarch64.TestUnalignedAccessCompressedOops
+ * @run main/othervm -Xcomp -XX:-TieredCompilation -Xmx1g
+ *                   -XX:CompileCommand=compileonly,compiler.c2.aarch64.TestUnalignedAccessCompressedOops*::<clinit>
+ *                   compiler.c2.aarch64.TestUnalignedAccessCompressedOops
+ */
+
+public class TestUnalignedAccessCompressedOops {
+
+    public static final int LEN = 2040;
+
+    static final Unsafe UNSAFE = Unsafe.getUnsafe();
+    static void sink(int x) {}
+
+    public static long lseed = 1;
+    public static int iseed = 2;
+    public static short sseed = 3;
+    public static byte bseed = 4;
+    public static long lres = lseed;
+    public static int ires = iseed;
+    public static short sres = sseed;
+    public static byte bres = bseed;
+
+    public static class TestLong {
+
+        private static final byte[] BYTES = new byte[LEN];
+        private static final long rawdata = 0xbeef;
+        private static final long data;
+
+        static {
+            sink(2);
+            // Signed immediate byte offset: range -256 to 255
+            // Positive immediate byte offset: a multiple of 8 in the range 0 to 32760
+            // Other immediate byte offsets can't be encoded in the instruction field.
+
+            // 1030 can't be encoded as "base + offset" mode into the instruction field.
+            UNSAFE.putLongUnaligned(BYTES, 1030, rawdata);
+            lres += UNSAFE.getLongUnaligned(BYTES, 1030);
+            // 127 can be encoded into simm9 field.
+            UNSAFE.putLongUnaligned(BYTES, 127, lres);
+            lres += UNSAFE.getLongUnaligned(BYTES, 127);
+            // 1096 can be encoded into uimm12 field.
+            UNSAFE.putLongUnaligned(BYTES, 1096, lres);
+            data = UNSAFE.getLongUnaligned(BYTES, 1096);
+        }
+
+    }
+
+    public static class TestInt {
+
+        private static final byte[] BYTES = new byte[LEN];
+        private static final int rawdata = 0xbeef;
+        private static final int data;
+        static {
+            sink(2);
+            // Signed immediate byte offset: range -256 to 255
+            // Positive immediate byte offset, a multiple of 4 in the range 0 to 16380
+            // Other immediate byte offsets can't be encoded in the instruction field.
+
+            // 274 can't be encoded as "base + offset" mode into the instruction field.
+            UNSAFE.putIntUnaligned(BYTES, 274, rawdata);
+            ires += UNSAFE.getIntUnaligned(BYTES, 274);
+            // 255 can be encoded into simm9 field.
+            UNSAFE.putIntUnaligned(BYTES, 255, ires);
+            ires += UNSAFE.getIntUnaligned(BYTES, 255);
+            // 528 can be encoded into uimm12 field.
+            UNSAFE.putIntUnaligned(BYTES, 528, ires);
+            data = UNSAFE.getIntUnaligned(BYTES, 528);
+        }
+
+    }
+
+    public static class TestShort {
+
+        private static final byte[] BYTES = new byte[LEN];
+        private static final short rawdata = (short)0xbeef;
+        private static final short data;
+        static {
+            sink(2);
+            // Signed immediate byte offset: range -256 to 255
+            // Positive immediate byte offset: a multiple of 2 in the range 0 to 8190
+            // Other immediate byte offsets can't be encoded in the instruction field.
+
+            // 257 can't be encoded as "base + offset" mode into the instruction field.
+            UNSAFE.putShortUnaligned(BYTES, 257, rawdata);
+            sres = (short) (sres + UNSAFE.getShortUnaligned(BYTES, 257));
+            // 253 can be encoded into simm9 field.
+            UNSAFE.putShortUnaligned(BYTES, 253, sres);
+            sres = (short) (sres + UNSAFE.getShortUnaligned(BYTES, 253));
+            // 272 can be encoded into uimm12 field.
+            UNSAFE.putShortUnaligned(BYTES, 272, sres);
+            data = UNSAFE.getShortUnaligned(BYTES, 272);
+        }
+
+    }
+
+    public static class TestByte {
+
+        private static final byte[] BYTES = new byte[LEN];
+        private static final byte rawdata = (byte)0x3f;
+        private static final byte data;
+        static {
+            sink(2);
+            // Signed immediate byte offset: range -256 to 255
+            // Positive immediate byte offset: range 0 to 4095
+            // Other immediate byte offsets can't be encoded in the instruction field.
+
+            // 272 can be encoded into simm9 field.
+            UNSAFE.putByte(BYTES, 272, rawdata);
+            bres = (byte) (bres + UNSAFE.getByte(BYTES, 272));
+            // 53 can be encoded into simm9 field.
+            UNSAFE.putByte(BYTES, 53, bres);
+            bres = (byte) (bres + UNSAFE.getByte(BYTES, 53));
+            // 1027 can be encoded into uimm12 field.
+            UNSAFE.putByte(BYTES, 1027, bres);
+            data = UNSAFE.getByte(BYTES, 1027);
+        }
+
+    }
+
+    static void test() {
+        TestLong ta = new TestLong();
+        Asserts.assertEquals(ta.data, (ta.rawdata + lseed) * 2, "putUnaligned long failed!");
+
+        TestInt tb = new TestInt();
+        Asserts.assertEquals(tb.data, (tb.rawdata + iseed) * 2, "putUnaligned int failed!");
+
+        TestShort tc = new TestShort();
+        Asserts.assertEquals(tc.data, (short) (((short) (tc.rawdata + sseed)) * 2), "putUnaligned short failed!");
+
+        TestByte td = new TestByte();
+        Asserts.assertEquals(td.data, (byte) (((byte) (td.rawdata + bseed)) * 2), "put byte failed!");
+    }
+
+    public static void main(String[] strArr) {
+        test();
+    }
+}

From a7bfe2678f75321e9d4e68a0fb6ead3e2d3d7b1f Mon Sep 17 00:00:00 2001
From: Fei Gao <fei.gao@arm.com>
Date: Thu, 7 Dec 2023 06:37:29 +0000
Subject: [PATCH 2/4] Remove unused immIOffset/immLOffset

---
 src/hotspot/cpu/aarch64/aarch64.ad | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index 5b516746767c1..bbd5df5124e9e 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -4313,16 +4313,6 @@ operand immLU12()
 %}
 
 // Offset for scaled or unscaled immediate loads and stores
-operand immIOffset()
-%{
-  predicate(Address::offset_ok_for_immed(n->get_int(), 0));
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
 operand immIOffset1()
 %{
   predicate(Address::offset_ok_for_immed(n->get_int(), 0));
@@ -4373,16 +4363,6 @@ operand immIOffset16()
   interface(CONST_INTER);
 %}
 
-operand immLoffset()
-%{
-  predicate(Address::offset_ok_for_immed(n->get_long(), 0));
-  match(ConL);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
 operand immLoffset1()
 %{
   predicate(Address::offset_ok_for_immed(n->get_long(), 0));

From 0d8a4c128a7a7defd68f7e9afbf44edfed1f5a55 Mon Sep 17 00:00:00 2001
From: Fei Gao <fei.gao2@arm.com>
Date: Wed, 29 May 2024 08:36:17 +0000
Subject: [PATCH 3/4] Add the assertion back and merge matchrules with a better
 predicate

---
 src/hotspot/cpu/aarch64/aarch64.ad            | 167 ++++-------------
 src/hotspot/share/adlc/archDesc.cpp           |  11 +-
 src/hotspot/share/adlc/formssel.cpp           |  17 +-
 src/hotspot/share/adlc/formssel.hpp           |   9 +-
 src/hotspot/share/adlc/output_c.cpp           |   6 +-
 .../TestUnalignedAccessCompressedOops.java    | 172 ------------------
 6 files changed, 60 insertions(+), 322 deletions(-)
 delete mode 100644 test/hotspot/jtreg/compiler/c2/aarch64/TestUnalignedAccessCompressedOops.java

diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index dfeef2761ec4b..8fee05e6c8402 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -2720,6 +2720,10 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
   {
     Address addr = mem2address(opcode, base, index, scale, disp);
     if (addr.getMode() == Address::base_plus_offset) {
+      // If we get an out-of-range offset it is a bug in the compiler,
+      // so we assert here.
+      assert(Address::offset_ok_for_immed(addr.offset(), exact_log2(size_in_memory)),
+             "c2 compiler bug");
       /* Fix up any out-of-range offsets. */
       assert_different_registers(rscratch1, base);
       assert_different_registers(rscratch1, reg);
@@ -5181,10 +5185,12 @@ operand indIndex(iRegP reg, iRegL lreg)
   %}
 %}
 
-operand indOffI1(iRegP reg, immIOffset1 off)
+operand indOffI1(iRegP reg, iRegN regn, immIOffset1 off)
 %{
+  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
   constraint(ALLOC_IN_RC(ptr_reg));
   match(AddP reg off);
+  match(AddP (DecodeN regn) off);
   op_cost(0);
   format %{ "[$reg, $off]" %}
   interface(MEMORY_INTER) %{
@@ -5195,10 +5201,12 @@ operand indOffI1(iRegP reg, immIOffset1 off)
   %}
 %}
 
-operand indOffI2(iRegP reg, immIOffset2 off)
+operand indOffI2(iRegP reg, iRegN regn, immIOffset2 off)
 %{
+  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
   constraint(ALLOC_IN_RC(ptr_reg));
   match(AddP reg off);
+  match(AddP (DecodeN regn) off);
   op_cost(0);
   format %{ "[$reg, $off]" %}
   interface(MEMORY_INTER) %{
@@ -5209,10 +5217,12 @@ operand indOffI2(iRegP reg, immIOffset2 off)
   %}
 %}
 
-operand indOffI4(iRegP reg, immIOffset4 off)
+operand indOffI4(iRegP reg, iRegN regn, immIOffset4 off)
 %{
+  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
   constraint(ALLOC_IN_RC(ptr_reg));
   match(AddP reg off);
+  match(AddP (DecodeN regn) off);
   op_cost(0);
   format %{ "[$reg, $off]" %}
   interface(MEMORY_INTER) %{
@@ -5223,10 +5233,12 @@ operand indOffI4(iRegP reg, immIOffset4 off)
   %}
 %}
 
-operand indOffI8(iRegP reg, immIOffset8 off)
+operand indOffI8(iRegP reg, iRegN regn, immIOffset8 off)
 %{
+  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
   constraint(ALLOC_IN_RC(ptr_reg));
   match(AddP reg off);
+  match(AddP (DecodeN regn) off);
   op_cost(0);
   format %{ "[$reg, $off]" %}
   interface(MEMORY_INTER) %{
@@ -5251,10 +5263,12 @@ operand indOffI16(iRegP reg, immIOffset16 off)
   %}
 %}
 
-operand indOffL1(iRegP reg, immLoffset1 off)
+operand indOffL1(iRegP reg, iRegN regn, immLoffset1 off)
 %{
+  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
   constraint(ALLOC_IN_RC(ptr_reg));
   match(AddP reg off);
+  match(AddP (DecodeN regn) off);
   op_cost(0);
   format %{ "[$reg, $off]" %}
   interface(MEMORY_INTER) %{
@@ -5265,10 +5279,12 @@ operand indOffL1(iRegP reg, immLoffset1 off)
   %}
 %}
 
-operand indOffL2(iRegP reg, immLoffset2 off)
+operand indOffL2(iRegP reg, iRegN regn, immLoffset2 off)
 %{
+  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
   constraint(ALLOC_IN_RC(ptr_reg));
   match(AddP reg off);
+  match(AddP (DecodeN regn) off);
   op_cost(0);
   format %{ "[$reg, $off]" %}
   interface(MEMORY_INTER) %{
@@ -5279,10 +5295,12 @@ operand indOffL2(iRegP reg, immLoffset2 off)
   %}
 %}
 
-operand indOffL4(iRegP reg, immLoffset4 off)
+operand indOffL4(iRegP reg, iRegN regn, immLoffset4 off)
 %{
+  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
   constraint(ALLOC_IN_RC(ptr_reg));
   match(AddP reg off);
+  match(AddP (DecodeN regn) off);
   op_cost(0);
   format %{ "[$reg, $off]" %}
   interface(MEMORY_INTER) %{
@@ -5293,10 +5311,12 @@ operand indOffL4(iRegP reg, immLoffset4 off)
   %}
 %}
 
-operand indOffL8(iRegP reg, immLoffset8 off)
+operand indOffL8(iRegP reg, iRegN regn, immLoffset8 off)
 %{
+  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
   constraint(ALLOC_IN_RC(ptr_reg));
   match(AddP reg off);
+  match(AddP (DecodeN regn) off);
   op_cost(0);
   format %{ "[$reg, $off]" %}
   interface(MEMORY_INTER) %{
@@ -5396,126 +5416,6 @@ operand indIndexN(iRegN reg, iRegL lreg)
   %}
 %}
 
-operand indOffIN1(iRegN reg, immIOffset1 off)
-%{
-  predicate(CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeN reg) off);
-  op_cost(0);
-  format %{ "[$reg, $off]\t# narrow" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffLN1(iRegN reg, immLoffset1 off)
-%{
-  predicate(CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeN reg) off);
-  op_cost(0);
-  format %{ "[$reg, $off]\t# narrow" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffIN2(iRegN reg, immIOffset2 off)
-%{
-  predicate(CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeN reg) off);
-  op_cost(0);
-  format %{ "[$reg, $off]\t# narrow" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffLN2(iRegN reg, immLoffset2 off)
-%{
-  predicate(CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeN reg) off);
-  op_cost(0);
-  format %{ "[$reg, $off]\t# narrow" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffIN4(iRegN reg, immIOffset4 off)
-%{
-  predicate(CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeN reg) off);
-  op_cost(0);
-  format %{ "[$reg, $off]\t# narrow" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffLN4(iRegN reg, immLoffset4 off)
-%{
-  predicate(CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeN reg) off);
-  op_cost(0);
-  format %{ "[$reg, $off]\t# narrow" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffIN8(iRegN reg, immIOffset8 off)
-%{
-  predicate(CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeN reg) off);
-  op_cost(0);
-  format %{ "[$reg, $off]\t# narrow" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffLN8(iRegN reg, immLoffset8 off)
-%{
-  predicate(CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeN reg) off);
-  op_cost(0);
-  format %{ "[$reg, $off]\t# narrow" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
 //----------Special Memory Operands--------------------------------------------
 // Stack Slot Operand - This operand is used for loading and storing temporary
 //                      values on the stack where a match requires a value to
@@ -5749,22 +5649,21 @@ opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
 // instruction defs. we can turn a memory op into an Address
 
 opclass memory1(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI1, indOffL1,
-                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN1, indOffLN1);
+                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
 
 opclass memory2(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI2, indOffL2,
-                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN2, indOffLN2);
+                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
 
 opclass memory4(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI4, indOffL4,
-                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN4, indOffLN4);
+                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
 
 opclass memory8(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI8, indOffL8,
-                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN8, indOffLN8);
+                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
 
 // All of the memory operands. For the pipeline description.
 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex,
                indOffI1, indOffL1, indOffI2, indOffL2, indOffI4, indOffL4, indOffI8, indOffL8,
-               indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN,
-               indOffIN1, indOffLN1, indOffIN2, indOffLN2, indOffIN4, indOffLN4, indOffIN8, indOffLN8);
+               indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
 
 
 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp
index f084f506bf587..b9bdf73bc15f0 100644
--- a/src/hotspot/share/adlc/archDesc.cpp
+++ b/src/hotspot/share/adlc/archDesc.cpp
@@ -249,6 +249,15 @@ void ArchDesc::inspectOperands() {
     MatchRule *mrule = op->_matrule;
     Predicate *pred  = op->_predicate;
 
+    // If there are multiple arguments, we need to insert
+    // parentheses for predicate so that these arguments
+    // can be chained together logically with "&&".
+    if (op->_matrule &&
+        !(op->_matrule->_lChild == nullptr &&
+          op->_matrule->_rChild == nullptr)) {
+      pred = InstructForm::build_predicate(op->_matrule, op->_predicate);
+    }
+
     // Grab the machine type of the operand
     const char  *rootOp    = op->_ident;
     mrule->_machType  = rootOp;
@@ -296,7 +305,7 @@ void ArchDesc::inspectInstructions() {
     if ( instr->_matrule == nullptr )  continue;
 
     MatchRule &mrule = *instr->_matrule;
-    Predicate *pred  =  instr->build_predicate();
+    Predicate* pred  = InstructForm::build_predicate(instr->_matrule, instr->_predicate);
 
     // Grab the machine type of the operand
     const char  *rootOp    = instr->_ident;
diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
index be97547f8ce11..efdebb98bd085 100644
--- a/src/hotspot/share/adlc/formssel.cpp
+++ b/src/hotspot/share/adlc/formssel.cpp
@@ -1525,24 +1525,25 @@ void MachNodeForm::output(FILE *fp) {
 }
 
 //------------------------------build_predicate--------------------------------
-// Build instruction predicates.  If the user uses the same operand name
-// twice, we need to check that the operands are pointer-eequivalent in
-// the DFA during the labeling process.
-Predicate *InstructForm::build_predicate() {
+// Build predicates for instructions or operands.
+//
+// If the user uses the same operand name twice, we need to check that
+// the operands are pointer-equivalent in the DFA during the labeling process.
+Predicate* InstructForm::build_predicate(MatchRule* matrule, Predicate* predicate) {
   const int buflen = 1024;
   char buf[buflen], *s=buf;
   Dict names(cmpstr,hashstr,Form::arena);       // Map Names to counts
 
-  MatchNode *mnode =
-    strcmp(_matrule->_opType, "Set") ? _matrule : _matrule->_rChild;
+  MatchNode* mnode =
+    strcmp(matrule->_opType, "Set") ? matrule : matrule->_rChild;
   if (mnode != nullptr) mnode->count_instr_names(names);
 
   uint first = 1;
   // Start with the predicate supplied in the .ad file.
-  if (_predicate) {
+  if (predicate) {
     if (first) first = 0;
     strcpy(s, "("); s += strlen(s);
-    strncpy(s, _predicate->_pred, buflen - strlen(s) - 1);
+    strncpy(s, predicate->_pred, buflen - strlen(s) - 1);
     s += strlen(s);
     strcpy(s, ")"); s += strlen(s);
   }
diff --git a/src/hotspot/share/adlc/formssel.hpp b/src/hotspot/share/adlc/formssel.hpp
index 61d0fb40f18a8..9dfcadbfa9b1f 100644
--- a/src/hotspot/share/adlc/formssel.hpp
+++ b/src/hotspot/share/adlc/formssel.hpp
@@ -221,10 +221,11 @@ class InstructForm : public Form {
   // Does this instruction need a base-oop edge?
   int needs_base_oop_edge(FormDict &globals) const;
 
-  // Build instruction predicates.  If the user uses the same operand name
-  // twice, we need to check that the operands are pointer-eequivalent in
-  // the DFA during the labeling process.
-  Predicate *build_predicate();
+  // Build predicates for instructions or operands.
+  //
+  // If the user uses the same operand name twice, we need to check that the
+  // operands are pointer-equivalent in the DFA during the labeling process.
+  static Predicate* build_predicate(MatchRule* matrule, Predicate* predicate);
 
   virtual void        build_components(); // top-level operands
   // Return zero-based position in component list; -1 if not in list.
diff --git a/src/hotspot/share/adlc/output_c.cpp b/src/hotspot/share/adlc/output_c.cpp
index 77332b21c0112..cd0ac4ae061dd 100644
--- a/src/hotspot/share/adlc/output_c.cpp
+++ b/src/hotspot/share/adlc/output_c.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -4325,7 +4325,7 @@ void ArchDesc::identify_cisc_spill_instructions() {
       if ( instr->_matrule == nullptr )  continue;
 
       MatchRule &mrule = *instr->_matrule;
-      Predicate *pred  =  instr->build_predicate();
+      Predicate* pred  = InstructForm::build_predicate(instr->_matrule, instr->_predicate);
 
       // Grab the machine type of the operand
       const char *rootOp = instr->_ident;
@@ -4346,7 +4346,7 @@ void ArchDesc::identify_cisc_spill_instructions() {
             && (instr2->reduce_result() != nullptr) // want same result
             && (strcmp(result, instr2->reduce_result()) == 0)) {
           MatchRule &mrule2 = *instr2->_matrule;
-          Predicate *pred2  =  instr2->build_predicate();
+          Predicate* pred2  = InstructForm::build_predicate(instr2->_matrule, instr2->_predicate);
           found_cisc_alternate = instr->cisc_spills_to(*this, instr2);
         }
       }
diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/TestUnalignedAccessCompressedOops.java b/test/hotspot/jtreg/compiler/c2/aarch64/TestUnalignedAccessCompressedOops.java
deleted file mode 100644
index 11a260a628db0..0000000000000
--- a/test/hotspot/jtreg/compiler/c2/aarch64/TestUnalignedAccessCompressedOops.java
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2023, Arm Limited. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package compiler.c2.aarch64;
-
-import jdk.internal.misc.Unsafe;
-import jdk.test.lib.Asserts;
-
-/**
- * @test TestUnalignedAccessCompressedOops
- * @summary AArch64: C2 compilation hits offset_ok_for_immed: assert "c2 compiler bug".
- * @bug 8319690
- * @library /test/lib
- * @modules java.base/jdk.internal.misc
- * @requires os.arch=="aarch64" & vm.compiler2.enabled
- * @run main/othervm compiler.c2.aarch64.TestUnalignedAccessCompressedOops
- * @run main/othervm -Xcomp -XX:-TieredCompilation -Xmx1g
- *                   -XX:CompileCommand=compileonly,compiler.c2.aarch64.TestUnalignedAccessCompressedOops*::<clinit>
- *                   compiler.c2.aarch64.TestUnalignedAccessCompressedOops
- */
-
-public class TestUnalignedAccessCompressedOops {
-
-    public static final int LEN = 2040;
-
-    static final Unsafe UNSAFE = Unsafe.getUnsafe();
-    static void sink(int x) {}
-
-    public static long lseed = 1;
-    public static int iseed = 2;
-    public static short sseed = 3;
-    public static byte bseed = 4;
-    public static long lres = lseed;
-    public static int ires = iseed;
-    public static short sres = sseed;
-    public static byte bres = bseed;
-
-    public static class TestLong {
-
-        private static final byte[] BYTES = new byte[LEN];
-        private static final long rawdata = 0xbeef;
-        private static final long data;
-
-        static {
-            sink(2);
-            // Signed immediate byte offset: range -256 to 255
-            // Positive immediate byte offset: a multiple of 8 in the range 0 to 32760
-            // Other immediate byte offsets can't be encoded in the instruction field.
-
-            // 1030 can't be encoded as "base + offset" mode into the instruction field.
-            UNSAFE.putLongUnaligned(BYTES, 1030, rawdata);
-            lres += UNSAFE.getLongUnaligned(BYTES, 1030);
-            // 127 can be encoded into simm9 field.
-            UNSAFE.putLongUnaligned(BYTES, 127, lres);
-            lres += UNSAFE.getLongUnaligned(BYTES, 127);
-            // 1096 can be encoded into uimm12 field.
-            UNSAFE.putLongUnaligned(BYTES, 1096, lres);
-            data = UNSAFE.getLongUnaligned(BYTES, 1096);
-        }
-
-    }
-
-    public static class TestInt {
-
-        private static final byte[] BYTES = new byte[LEN];
-        private static final int rawdata = 0xbeef;
-        private static final int data;
-        static {
-            sink(2);
-            // Signed immediate byte offset: range -256 to 255
-            // Positive immediate byte offset, a multiple of 4 in the range 0 to 16380
-            // Other immediate byte offsets can't be encoded in the instruction field.
-
-            // 274 can't be encoded as "base + offset" mode into the instruction field.
-            UNSAFE.putIntUnaligned(BYTES, 274, rawdata);
-            ires += UNSAFE.getIntUnaligned(BYTES, 274);
-            // 255 can be encoded into simm9 field.
-            UNSAFE.putIntUnaligned(BYTES, 255, ires);
-            ires += UNSAFE.getIntUnaligned(BYTES, 255);
-            // 528 can be encoded into uimm12 field.
-            UNSAFE.putIntUnaligned(BYTES, 528, ires);
-            data = UNSAFE.getIntUnaligned(BYTES, 528);
-        }
-
-    }
-
-    public static class TestShort {
-
-        private static final byte[] BYTES = new byte[LEN];
-        private static final short rawdata = (short)0xbeef;
-        private static final short data;
-        static {
-            sink(2);
-            // Signed immediate byte offset: range -256 to 255
-            // Positive immediate byte offset: a multiple of 2 in the range 0 to 8190
-            // Other immediate byte offsets can't be encoded in the instruction field.
-
-            // 257 can't be encoded as "base + offset" mode into the instruction field.
-            UNSAFE.putShortUnaligned(BYTES, 257, rawdata);
-            sres = (short) (sres + UNSAFE.getShortUnaligned(BYTES, 257));
-            // 253 can be encoded into simm9 field.
-            UNSAFE.putShortUnaligned(BYTES, 253, sres);
-            sres = (short) (sres + UNSAFE.getShortUnaligned(BYTES, 253));
-            // 272 can be encoded into uimm12 field.
-            UNSAFE.putShortUnaligned(BYTES, 272, sres);
-            data = UNSAFE.getShortUnaligned(BYTES, 272);
-        }
-
-    }
-
-    public static class TestByte {
-
-        private static final byte[] BYTES = new byte[LEN];
-        private static final byte rawdata = (byte)0x3f;
-        private static final byte data;
-        static {
-            sink(2);
-            // Signed immediate byte offset: range -256 to 255
-            // Positive immediate byte offset: range 0 to 4095
-            // Other immediate byte offsets can't be encoded in the instruction field.
-
-            // 272 can be encoded into simm9 field.
-            UNSAFE.putByte(BYTES, 272, rawdata);
-            bres = (byte) (bres + UNSAFE.getByte(BYTES, 272));
-            // 53 can be encoded into simm9 field.
-            UNSAFE.putByte(BYTES, 53, bres);
-            bres = (byte) (bres + UNSAFE.getByte(BYTES, 53));
-            // 1027 can be encoded into uimm12 field.
-            UNSAFE.putByte(BYTES, 1027, bres);
-            data = UNSAFE.getByte(BYTES, 1027);
-        }
-
-    }
-
-    static void test() {
-        TestLong ta = new TestLong();
-        Asserts.assertEquals(ta.data, (ta.rawdata + lseed) * 2, "putUnaligned long failed!");
-
-        TestInt tb = new TestInt();
-        Asserts.assertEquals(tb.data, (tb.rawdata + iseed) * 2, "putUnaligned int failed!");
-
-        TestShort tc = new TestShort();
-        Asserts.assertEquals(tc.data, (short) (((short) (tc.rawdata + sseed)) * 2), "putUnaligned short failed!");
-
-        TestByte td = new TestByte();
-        Asserts.assertEquals(td.data, (byte) (((byte) (td.rawdata + bseed)) * 2), "put byte failed!");
-    }
-
-    public static void main(String[] strArr) {
-        test();
-    }
-}

From d8ebd740818524f5a42a36608ff58df2297408f1 Mon Sep 17 00:00:00 2001
From: Fei Gao <fei.gao2@arm.com>
Date: Mon, 24 Jun 2024 13:45:01 +0000
Subject: [PATCH 4/4] Discard IndOffXX style and let legitimize_address() fix
 any out-of-range immediate offsets

---
 src/hotspot/cpu/aarch64/aarch64.ad            | 439 +++++-------------
 src/hotspot/cpu/aarch64/aarch64_vector.ad     |  16 +-
 src/hotspot/cpu/aarch64/aarch64_vector_ad.m4  |   2 +-
 src/hotspot/cpu/aarch64/ad_encode.m4          |   8 +-
 src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad     |  12 +-
 src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad     |  12 +-
 src/hotspot/share/adlc/archDesc.cpp           |  11 +-
 src/hotspot/share/adlc/formssel.cpp           |  17 +-
 src/hotspot/share/adlc/formssel.hpp           |   9 +-
 src/hotspot/share/adlc/output_c.cpp           |   4 +-
 .../compiler/c2/TestUnalignedAccess.java      |  61 +--
 11 files changed, 193 insertions(+), 398 deletions(-)

diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index ff0170d6280ad..985784c70fa2a 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -2720,11 +2720,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
   {
     Address addr = mem2address(opcode, base, index, scale, disp);
     if (addr.getMode() == Address::base_plus_offset) {
-      // If we get an out-of-range offset it is a bug in the compiler,
-      // so we assert here.
-      assert(Address::offset_ok_for_immed(addr.offset(), exact_log2(size_in_memory)),
-             "c2 compiler bug");
-      /* Fix up any out-of-range offsets. */
+      // Fix up any out-of-range offsets.
       assert_different_registers(rscratch1, base);
       assert_different_registers(rscratch1, reg);
       addr = __ legitimize_address(addr, size_in_memory, rscratch1);
@@ -2749,10 +2745,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
     }
 
     if (index == -1) {
-      /* If we get an out-of-range offset it is a bug in the compiler,
-         so we assert here. */
-      assert(Address::offset_ok_for_immed(disp, exact_log2(size_in_memory)), "c2 compiler bug");
-      /* Fix up any out-of-range offsets. */
+      // Fix up any out-of-range offsets.
       assert_different_registers(rscratch1, base);
       Address addr = Address(base, disp);
       addr = __ legitimize_address(addr, size_in_memory, rscratch1);
@@ -2768,7 +2761,11 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
                         int opcode, Register base, int index, int size, int disp)
   {
     if (index == -1) {
-      (masm->*insn)(reg, T, Address(base, disp));
+      // Fix up any out-of-range offsets.
+      assert_different_registers(rscratch1, base);
+      Address addr = Address(base, disp);
+      addr = __ legitimize_address(addr, (1 << T), rscratch1);
+      (masm->*insn)(reg, T, addr);
     } else {
       assert(disp == 0, "unsupported address mode");
       (masm->*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
@@ -2823,7 +2820,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_ldrsbw(iRegI dst, memory1 mem) %{
+  enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
     Register dst_reg = as_Register($dst$$reg);
     loadStore(masm, &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
@@ -2831,7 +2828,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_ldrsb(iRegI dst, memory1 mem) %{
+  enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
     Register dst_reg = as_Register($dst$$reg);
     loadStore(masm, &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
@@ -2839,7 +2836,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_ldrb(iRegI dst, memory1 mem) %{
+  enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
     Register dst_reg = as_Register($dst$$reg);
     loadStore(masm, &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
@@ -2847,7 +2844,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_ldrb(iRegL dst, memory1 mem) %{
+  enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
     Register dst_reg = as_Register($dst$$reg);
     loadStore(masm, &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
@@ -2855,7 +2852,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_ldrshw(iRegI dst, memory2 mem) %{
+  enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
     Register dst_reg = as_Register($dst$$reg);
     loadStore(masm, &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
@@ -2863,7 +2860,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_ldrsh(iRegI dst, memory2 mem) %{
+  enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
     Register dst_reg = as_Register($dst$$reg);
     loadStore(masm, &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
@@ -2871,7 +2868,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_ldrh(iRegI dst, memory2 mem) %{
+  enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
     Register dst_reg = as_Register($dst$$reg);
     loadStore(masm, &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
@@ -2879,7 +2876,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_ldrh(iRegL dst, memory2 mem) %{
+  enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
     Register dst_reg = as_Register($dst$$reg);
     loadStore(masm, &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
@@ -2887,7 +2884,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_ldrw(iRegI dst, memory4 mem) %{
+  enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
     Register dst_reg = as_Register($dst$$reg);
     loadStore(masm, &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
@@ -2895,7 +2892,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_ldrw(iRegL dst, memory4 mem) %{
+  enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
     Register dst_reg = as_Register($dst$$reg);
     loadStore(masm, &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
@@ -2903,7 +2900,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_ldrsw(iRegL dst, memory4 mem) %{
+  enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
     Register dst_reg = as_Register($dst$$reg);
     loadStore(masm, &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
@@ -2911,7 +2908,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_ldr(iRegL dst, memory8 mem) %{
+  enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
     Register dst_reg = as_Register($dst$$reg);
     loadStore(masm, &MacroAssembler::ldr, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
@@ -2919,7 +2916,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_ldrs(vRegF dst, memory4 mem) %{
+  enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
     loadStore(masm, &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
@@ -2927,7 +2924,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_ldrd(vRegD dst, memory8 mem) %{
+  enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
     loadStore(masm, &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
@@ -2935,7 +2932,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_strb(iRegI src, memory1 mem) %{
+  enc_class aarch64_enc_strb(iRegI src, memory mem) %{
     Register src_reg = as_Register($src$$reg);
     loadStore(masm, &MacroAssembler::strb, src_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
@@ -2943,14 +2940,14 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_strb0(memory1 mem) %{
+  enc_class aarch64_enc_strb0(memory mem) %{
     loadStore(masm, &MacroAssembler::strb, zr, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
   %}
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_strh(iRegI src, memory2 mem) %{
+  enc_class aarch64_enc_strh(iRegI src, memory mem) %{
     Register src_reg = as_Register($src$$reg);
     loadStore(masm, &MacroAssembler::strh, src_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
@@ -2958,14 +2955,14 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_strh0(memory2 mem) %{
+  enc_class aarch64_enc_strh0(memory mem) %{
     loadStore(masm, &MacroAssembler::strh, zr, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
   %}
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_strw(iRegI src, memory4 mem) %{
+  enc_class aarch64_enc_strw(iRegI src, memory mem) %{
     Register src_reg = as_Register($src$$reg);
     loadStore(masm, &MacroAssembler::strw, src_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
@@ -2973,14 +2970,14 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_strw0(memory4 mem) %{
+  enc_class aarch64_enc_strw0(memory mem) %{
     loadStore(masm, &MacroAssembler::strw, zr, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
   %}
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_str(iRegL src, memory8 mem) %{
+  enc_class aarch64_enc_str(iRegL src, memory mem) %{
     Register src_reg = as_Register($src$$reg);
     // we sometimes get asked to store the stack pointer into the
     // current thread -- we cannot do that directly on AArch64
@@ -2995,14 +2992,14 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_str0(memory8 mem) %{
+  enc_class aarch64_enc_str0(memory mem) %{
     loadStore(masm, &MacroAssembler::str, zr, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
   %}
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_strs(vRegF src, memory4 mem) %{
+  enc_class aarch64_enc_strs(vRegF src, memory mem) %{
     FloatRegister src_reg = as_FloatRegister($src$$reg);
     loadStore(masm, &MacroAssembler::strs, src_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
@@ -3010,7 +3007,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_strd(vRegD src, memory8 mem) %{
+  enc_class aarch64_enc_strd(vRegD src, memory mem) %{
     FloatRegister src_reg = as_FloatRegister($src$$reg);
     loadStore(masm, &MacroAssembler::strd, src_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
@@ -3018,7 +3015,7 @@ encode %{
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_strb0_ordered(memory4 mem) %{
+  enc_class aarch64_enc_strb0_ordered(memory mem) %{
       __ membar(Assembler::StoreStore);
       loadStore(masm, &MacroAssembler::strb, zr, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
@@ -3220,7 +3217,7 @@ encode %{
 
   // synchronized read/update encodings
 
-  enc_class aarch64_enc_ldaxr(iRegL dst, memory8 mem) %{
+  enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
     Register dst_reg = as_Register($dst$$reg);
     Register base = as_Register($mem$$base);
     int index = $mem$$index;
@@ -3248,7 +3245,7 @@ encode %{
     }
   %}
 
-  enc_class aarch64_enc_stlxr(iRegLNoSp src, memory8 mem) %{
+  enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
     Register src_reg = as_Register($src$$reg);
     Register base = as_Register($mem$$base);
     int index = $mem$$index;
@@ -3352,7 +3349,11 @@ encode %{
     int scale = $mem$$scale;
     int disp = $mem$$disp;
     if (index == -1) {
-      __ prfm(Address(base, disp), PSTL1KEEP);
+      // Fix up any out-of-range offsets.
+      assert_different_registers(rscratch1, base);
+      Address addr = Address(base, disp);
+      addr = __ legitimize_address(addr, 8, rscratch1);
+      __ prfm(addr, PSTL1KEEP);
     } else {
       Register index_reg = as_Register(index);
       if (disp == 0) {
@@ -4172,30 +4173,10 @@ operand immIU7()
   interface(CONST_INTER);
 %}
 
-// Offset for scaled or unscaled immediate loads and stores
-operand immIOffset1()
-%{
-  predicate(Address::offset_ok_for_immed(n->get_int(), 0));
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immIOffset2()
-%{
-  predicate(Address::offset_ok_for_immed(n->get_int(), 1));
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immIOffset4()
+// Offset for immediate loads and stores
+operand immIOffset()
 %{
-  predicate(Address::offset_ok_for_immed(n->get_int(), 2));
+  predicate(n->get_int() >= -256 && n->get_int() <= 65520);
   match(ConI);
 
   op_cost(0);
@@ -4203,69 +4184,9 @@ operand immIOffset4()
   interface(CONST_INTER);
 %}
 
-operand immIOffset8()
+operand immLOffset()
 %{
-  predicate(Address::offset_ok_for_immed(n->get_int(), 3));
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immIOffset16()
-%{
-  predicate(Address::offset_ok_for_immed(n->get_int(), 4));
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immLoffset1()
-%{
-  predicate(Address::offset_ok_for_immed(n->get_long(), 0));
-  match(ConL);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immLoffset2()
-%{
-  predicate(Address::offset_ok_for_immed(n->get_long(), 1));
-  match(ConL);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immLoffset4()
-%{
-  predicate(Address::offset_ok_for_immed(n->get_long(), 2));
-  match(ConL);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immLoffset8()
-%{
-  predicate(Address::offset_ok_for_immed(n->get_long(), 3));
-  match(ConL);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immLoffset16()
-%{
-  predicate(Address::offset_ok_for_immed(n->get_long(), 4));
+  predicate(n->get_long() >= -256 && n->get_long() <= 65520);
   match(ConL);
 
   op_cost(0);
@@ -5185,28 +5106,10 @@ operand indIndex(iRegP reg, iRegL lreg)
   %}
 %}
 
-operand indOffI1(iRegP reg, iRegN regn, immIOffset1 off)
-%{
-  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP reg off);
-  match(AddP (DecodeN regn) off);
-  op_cost(0);
-  format %{ "[$reg, $off]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffI2(iRegP reg, iRegN regn, immIOffset2 off)
+operand indOffI(iRegP reg, immIOffset off)
 %{
-  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
   constraint(ALLOC_IN_RC(ptr_reg));
   match(AddP reg off);
-  match(AddP (DecodeN regn) off);
   op_cost(0);
   format %{ "[$reg, $off]" %}
   interface(MEMORY_INTER) %{
@@ -5217,117 +5120,7 @@ operand indOffI2(iRegP reg, iRegN regn, immIOffset2 off)
   %}
 %}
 
-operand indOffI4(iRegP reg, iRegN regn, immIOffset4 off)
-%{
-  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP reg off);
-  match(AddP (DecodeN regn) off);
-  op_cost(0);
-  format %{ "[$reg, $off]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffI8(iRegP reg, iRegN regn, immIOffset8 off)
-%{
-  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP reg off);
-  match(AddP (DecodeN regn) off);
-  op_cost(0);
-  format %{ "[$reg, $off]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffI16(iRegP reg, immIOffset16 off)
-%{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP reg off);
-  op_cost(0);
-  format %{ "[$reg, $off]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffL1(iRegP reg, iRegN regn, immLoffset1 off)
-%{
-  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP reg off);
-  match(AddP (DecodeN regn) off);
-  op_cost(0);
-  format %{ "[$reg, $off]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffL2(iRegP reg, iRegN regn, immLoffset2 off)
-%{
-  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP reg off);
-  match(AddP (DecodeN regn) off);
-  op_cost(0);
-  format %{ "[$reg, $off]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffL4(iRegP reg, iRegN regn, immLoffset4 off)
-%{
-  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP reg off);
-  match(AddP (DecodeN regn) off);
-  op_cost(0);
-  format %{ "[$reg, $off]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffL8(iRegP reg, iRegN regn, immLoffset8 off)
-%{
-  predicate(!n->in(2)->is_DecodeN() || CompressedOops::shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP reg off);
-  match(AddP (DecodeN regn) off);
-  op_cost(0);
-  format %{ "[$reg, $off]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0xffffffff);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffL16(iRegP reg, immLoffset16 off)
+operand indOffL(iRegP reg, immLOffset off)
 %{
   constraint(ALLOC_IN_RC(ptr_reg));
   match(AddP reg off);
@@ -5416,6 +5209,37 @@ operand indIndexN(iRegN reg, iRegL lreg)
   %}
 %}
 
+operand indOffIN(iRegN reg, immIOffset off)
+%{
+  predicate(CompressedOops::shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffLN(iRegN reg, immLOffset off)
+%{
+  predicate(CompressedOops::shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+
 //----------Special Memory Operands--------------------------------------------
 // Stack Slot Operand - This operand is used for loading and storing temporary
 //                      values on the stack where a match requires a value to
@@ -5633,10 +5457,7 @@ operand iRegL2I(iRegL reg) %{
   interface(REG_INTER)
 %}
 
-opclass vmem2(indirect, indIndex, indOffI2, indOffL2);
-opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
-opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
-opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
+opclass vmem(indirect, indIndex, indOffI, indOffL, indOffIN, indOffLN);
 
 //----------OPERAND CLASSES----------------------------------------------------
 // Operand Classes are groups of operands that are used as to simplify
@@ -5648,22 +5469,8 @@ opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
 // memory is used to define read/write location for load/store
 // instruction defs. we can turn a memory op into an Address
 
-opclass memory1(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI1, indOffL1,
-                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
-
-opclass memory2(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI2, indOffL2,
-                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
-
-opclass memory4(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI4, indOffL4,
-                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
-
-opclass memory8(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI8, indOffL8,
-                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
-
-// All of the memory operands. For the pipeline description.
-opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex,
-               indOffI1, indOffL1, indOffI2, indOffL2, indOffI4, indOffL4, indOffI8, indOffL8,
-               indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
+opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
+               indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
 
 
 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
@@ -6365,7 +6172,7 @@ define %{
 // Load Instructions
 
 // Load Byte (8 bit signed)
-instruct loadB(iRegINoSp dst, memory1 mem)
+instruct loadB(iRegINoSp dst, memory mem)
 %{
   match(Set dst (LoadB mem));
   predicate(!needs_acquiring_load(n));
@@ -6379,7 +6186,7 @@ instruct loadB(iRegINoSp dst, memory1 mem)
 %}
 
 // Load Byte (8 bit signed) into long
-instruct loadB2L(iRegLNoSp dst, memory1 mem)
+instruct loadB2L(iRegLNoSp dst, memory mem)
 %{
   match(Set dst (ConvI2L (LoadB mem)));
   predicate(!needs_acquiring_load(n->in(1)));
@@ -6393,7 +6200,7 @@ instruct loadB2L(iRegLNoSp dst, memory1 mem)
 %}
 
 // Load Byte (8 bit unsigned)
-instruct loadUB(iRegINoSp dst, memory1 mem)
+instruct loadUB(iRegINoSp dst, memory mem)
 %{
   match(Set dst (LoadUB mem));
   predicate(!needs_acquiring_load(n));
@@ -6407,7 +6214,7 @@ instruct loadUB(iRegINoSp dst, memory1 mem)
 %}
 
 // Load Byte (8 bit unsigned) into long
-instruct loadUB2L(iRegLNoSp dst, memory1 mem)
+instruct loadUB2L(iRegLNoSp dst, memory mem)
 %{
   match(Set dst (ConvI2L (LoadUB mem)));
   predicate(!needs_acquiring_load(n->in(1)));
@@ -6421,7 +6228,7 @@ instruct loadUB2L(iRegLNoSp dst, memory1 mem)
 %}
 
 // Load Short (16 bit signed)
-instruct loadS(iRegINoSp dst, memory2 mem)
+instruct loadS(iRegINoSp dst, memory mem)
 %{
   match(Set dst (LoadS mem));
   predicate(!needs_acquiring_load(n));
@@ -6435,7 +6242,7 @@ instruct loadS(iRegINoSp dst, memory2 mem)
 %}
 
 // Load Short (16 bit signed) into long
-instruct loadS2L(iRegLNoSp dst, memory2 mem)
+instruct loadS2L(iRegLNoSp dst, memory mem)
 %{
   match(Set dst (ConvI2L (LoadS mem)));
   predicate(!needs_acquiring_load(n->in(1)));
@@ -6449,7 +6256,7 @@ instruct loadS2L(iRegLNoSp dst, memory2 mem)
 %}
 
 // Load Char (16 bit unsigned)
-instruct loadUS(iRegINoSp dst, memory2 mem)
+instruct loadUS(iRegINoSp dst, memory mem)
 %{
   match(Set dst (LoadUS mem));
   predicate(!needs_acquiring_load(n));
@@ -6463,7 +6270,7 @@ instruct loadUS(iRegINoSp dst, memory2 mem)
 %}
 
 // Load Short/Char (16 bit unsigned) into long
-instruct loadUS2L(iRegLNoSp dst, memory2 mem)
+instruct loadUS2L(iRegLNoSp dst, memory mem)
 %{
   match(Set dst (ConvI2L (LoadUS mem)));
   predicate(!needs_acquiring_load(n->in(1)));
@@ -6477,7 +6284,7 @@ instruct loadUS2L(iRegLNoSp dst, memory2 mem)
 %}
 
 // Load Integer (32 bit signed)
-instruct loadI(iRegINoSp dst, memory4 mem)
+instruct loadI(iRegINoSp dst, memory mem)
 %{
   match(Set dst (LoadI mem));
   predicate(!needs_acquiring_load(n));
@@ -6491,7 +6298,7 @@ instruct loadI(iRegINoSp dst, memory4 mem)
 %}
 
 // Load Integer (32 bit signed) into long
-instruct loadI2L(iRegLNoSp dst, memory4 mem)
+instruct loadI2L(iRegLNoSp dst, memory mem)
 %{
   match(Set dst (ConvI2L (LoadI mem)));
   predicate(!needs_acquiring_load(n->in(1)));
@@ -6505,7 +6312,7 @@ instruct loadI2L(iRegLNoSp dst, memory4 mem)
 %}
 
 // Load Integer (32 bit unsigned) into long
-instruct loadUI2L(iRegLNoSp dst, memory4 mem, immL_32bits mask)
+instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
 %{
   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
@@ -6519,7 +6326,7 @@ instruct loadUI2L(iRegLNoSp dst, memory4 mem, immL_32bits mask)
 %}
 
 // Load Long (64 bit signed)
-instruct loadL(iRegLNoSp dst, memory8 mem)
+instruct loadL(iRegLNoSp dst, memory mem)
 %{
   match(Set dst (LoadL mem));
   predicate(!needs_acquiring_load(n));
@@ -6533,7 +6340,7 @@ instruct loadL(iRegLNoSp dst, memory8 mem)
 %}
 
 // Load Range
-instruct loadRange(iRegINoSp dst, memory4 mem)
+instruct loadRange(iRegINoSp dst, memory mem)
 %{
   match(Set dst (LoadRange mem));
 
@@ -6546,7 +6353,7 @@ instruct loadRange(iRegINoSp dst, memory4 mem)
 %}
 
 // Load Pointer
-instruct loadP(iRegPNoSp dst, memory8 mem)
+instruct loadP(iRegPNoSp dst, memory mem)
 %{
   match(Set dst (LoadP mem));
   predicate(!needs_acquiring_load(n) && (n->as_Load()->barrier_data() == 0));
@@ -6560,7 +6367,7 @@ instruct loadP(iRegPNoSp dst, memory8 mem)
 %}
 
 // Load Compressed Pointer
-instruct loadN(iRegNNoSp dst, memory4 mem)
+instruct loadN(iRegNNoSp dst, memory mem)
 %{
   match(Set dst (LoadN mem));
   predicate(!needs_acquiring_load(n));
@@ -6574,7 +6381,7 @@ instruct loadN(iRegNNoSp dst, memory4 mem)
 %}
 
 // Load Klass Pointer
-instruct loadKlass(iRegPNoSp dst, memory8 mem)
+instruct loadKlass(iRegPNoSp dst, memory mem)
 %{
   match(Set dst (LoadKlass mem));
   predicate(!needs_acquiring_load(n));
@@ -6588,7 +6395,7 @@ instruct loadKlass(iRegPNoSp dst, memory8 mem)
 %}
 
 // Load Narrow Klass Pointer
-instruct loadNKlass(iRegNNoSp dst, memory4 mem)
+instruct loadNKlass(iRegNNoSp dst, memory mem)
 %{
   match(Set dst (LoadNKlass mem));
   predicate(!needs_acquiring_load(n));
@@ -6602,7 +6409,7 @@ instruct loadNKlass(iRegNNoSp dst, memory4 mem)
 %}
 
 // Load Float
-instruct loadF(vRegF dst, memory4 mem)
+instruct loadF(vRegF dst, memory mem)
 %{
   match(Set dst (LoadF mem));
   predicate(!needs_acquiring_load(n));
@@ -6616,7 +6423,7 @@ instruct loadF(vRegF dst, memory4 mem)
 %}
 
 // Load Double
-instruct loadD(vRegD dst, memory8 mem)
+instruct loadD(vRegD dst, memory mem)
 %{
   match(Set dst (LoadD mem));
   predicate(!needs_acquiring_load(n));
@@ -6820,7 +6627,7 @@ instruct loadConD(vRegD dst, immD con) %{
 // Store Instructions
 
 // Store CMS card-mark Immediate
-instruct storeimmCM0(immI0 zero, memory1 mem)
+instruct storeimmCM0(immI0 zero, memory mem)
 %{
   match(Set mem (StoreCM mem zero));
 
@@ -6835,7 +6642,7 @@ instruct storeimmCM0(immI0 zero, memory1 mem)
 
 // Store CMS card-mark Immediate with intervening StoreStore
 // needed when using CMS with no conditional card marking
-instruct storeimmCM0_ordered(immI0 zero, memory1 mem)
+instruct storeimmCM0_ordered(immI0 zero, memory mem)
 %{
   match(Set mem (StoreCM mem zero));
 
@@ -6850,7 +6657,7 @@ instruct storeimmCM0_ordered(immI0 zero, memory1 mem)
 %}
 
 // Store Byte
-instruct storeB(iRegIorL2I src, memory1 mem)
+instruct storeB(iRegIorL2I src, memory mem)
 %{
   match(Set mem (StoreB mem src));
   predicate(!needs_releasing_store(n));
@@ -6864,7 +6671,7 @@ instruct storeB(iRegIorL2I src, memory1 mem)
 %}
 
 
-instruct storeimmB0(immI0 zero, memory1 mem)
+instruct storeimmB0(immI0 zero, memory mem)
 %{
   match(Set mem (StoreB mem zero));
   predicate(!needs_releasing_store(n));
@@ -6878,7 +6685,7 @@ instruct storeimmB0(immI0 zero, memory1 mem)
 %}
 
 // Store Char/Short
-instruct storeC(iRegIorL2I src, memory2 mem)
+instruct storeC(iRegIorL2I src, memory mem)
 %{
   match(Set mem (StoreC mem src));
   predicate(!needs_releasing_store(n));
@@ -6891,7 +6698,7 @@ instruct storeC(iRegIorL2I src, memory2 mem)
   ins_pipe(istore_reg_mem);
 %}
 
-instruct storeimmC0(immI0 zero, memory2 mem)
+instruct storeimmC0(immI0 zero, memory mem)
 %{
   match(Set mem (StoreC mem zero));
   predicate(!needs_releasing_store(n));
@@ -6906,7 +6713,7 @@ instruct storeimmC0(immI0 zero, memory2 mem)
 
 // Store Integer
 
-instruct storeI(iRegIorL2I src, memory4 mem)
+instruct storeI(iRegIorL2I src, memory mem)
 %{
   match(Set mem(StoreI mem src));
   predicate(!needs_releasing_store(n));
@@ -6919,7 +6726,7 @@ instruct storeI(iRegIorL2I src, memory4 mem)
   ins_pipe(istore_reg_mem);
 %}
 
-instruct storeimmI0(immI0 zero, memory4 mem)
+instruct storeimmI0(immI0 zero, memory mem)
 %{
   match(Set mem(StoreI mem zero));
   predicate(!needs_releasing_store(n));
@@ -6933,7 +6740,7 @@ instruct storeimmI0(immI0 zero, memory4 mem)
 %}
 
 // Store Long (64 bit signed)
-instruct storeL(iRegL src, memory8 mem)
+instruct storeL(iRegL src, memory mem)
 %{
   match(Set mem (StoreL mem src));
   predicate(!needs_releasing_store(n));
@@ -6947,7 +6754,7 @@ instruct storeL(iRegL src, memory8 mem)
 %}
 
 // Store Long (64 bit signed)
-instruct storeimmL0(immL0 zero, memory8 mem)
+instruct storeimmL0(immL0 zero, memory mem)
 %{
   match(Set mem (StoreL mem zero));
   predicate(!needs_releasing_store(n));
@@ -6961,7 +6768,7 @@ instruct storeimmL0(immL0 zero, memory8 mem)
 %}
 
 // Store Pointer
-instruct storeP(iRegP src, memory8 mem)
+instruct storeP(iRegP src, memory mem)
 %{
   match(Set mem (StoreP mem src));
   predicate(!needs_releasing_store(n) && n->as_Store()->barrier_data() == 0);
@@ -6975,7 +6782,7 @@ instruct storeP(iRegP src, memory8 mem)
 %}
 
 // Store Pointer
-instruct storeimmP0(immP0 zero, memory8 mem)
+instruct storeimmP0(immP0 zero, memory mem)
 %{
   match(Set mem (StoreP mem zero));
   predicate(!needs_releasing_store(n) && n->as_Store()->barrier_data() == 0);
@@ -6989,7 +6796,7 @@ instruct storeimmP0(immP0 zero, memory8 mem)
 %}
 
 // Store Compressed Pointer
-instruct storeN(iRegN src, memory4 mem)
+instruct storeN(iRegN src, memory mem)
 %{
   match(Set mem (StoreN mem src));
   predicate(!needs_releasing_store(n));
@@ -7002,7 +6809,7 @@ instruct storeN(iRegN src, memory4 mem)
   ins_pipe(istore_reg_mem);
 %}
 
-instruct storeImmN0(immN0 zero, memory4 mem)
+instruct storeImmN0(immN0 zero, memory mem)
 %{
   match(Set mem (StoreN mem zero));
   predicate(!needs_releasing_store(n));
@@ -7016,7 +6823,7 @@ instruct storeImmN0(immN0 zero, memory4 mem)
 %}
 
 // Store Float
-instruct storeF(vRegF src, memory4 mem)
+instruct storeF(vRegF src, memory mem)
 %{
   match(Set mem (StoreF mem src));
   predicate(!needs_releasing_store(n));
@@ -7033,7 +6840,7 @@ instruct storeF(vRegF src, memory4 mem)
 // implement storeImmF0 and storeFImmPacked
 
 // Store Double
-instruct storeD(vRegD src, memory8 mem)
+instruct storeD(vRegD src, memory mem)
 %{
   match(Set mem (StoreD mem src));
   predicate(!needs_releasing_store(n));
@@ -7047,7 +6854,7 @@ instruct storeD(vRegD src, memory8 mem)
 %}
 
 // Store Compressed Klass Pointer
-instruct storeNKlass(iRegN src, memory4 mem)
+instruct storeNKlass(iRegN src, memory mem)
 %{
   predicate(!needs_releasing_store(n));
   match(Set mem (StoreNKlass mem src));
@@ -7066,7 +6873,7 @@ instruct storeNKlass(iRegN src, memory4 mem)
 // prefetch instructions
 // Must be safe to execute with invalid address (cannot fault).
 
-instruct prefetchalloc( memory8 mem ) %{
+instruct prefetchalloc( memory mem ) %{
   match(PrefetchAllocation mem);
 
   ins_cost(INSN_COST);
@@ -7635,7 +7442,7 @@ instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
   ins_pipe(pipe_class_default);
 %}
 
-instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
+instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
   match(Set dst (PopCountI (LoadI mem)));
   effect(TEMP tmp);
   ins_cost(INSN_COST * 13);
@@ -7676,7 +7483,7 @@ instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
   ins_pipe(pipe_class_default);
 %}
 
-instruct popCountL_mem(iRegINoSp dst, memory8 mem, vRegD tmp) %{
+instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
   match(Set dst (PopCountL (LoadL mem)));
   effect(TEMP tmp);
   ins_cost(INSN_COST * 13);
@@ -16812,7 +16619,7 @@ instruct compressBitsI_reg(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask,
   ins_pipe(pipe_slow);
 %}
 
-instruct compressBitsI_memcon(iRegINoSp dst, memory4 mem, immI mask,
+instruct compressBitsI_memcon(iRegINoSp dst, memory mem, immI mask,
                            vRegF tdst, vRegF tsrc, vRegF tmask) %{
   match(Set dst (CompressBits (LoadI mem) mask));
   effect(TEMP tdst, TEMP tsrc, TEMP tmask);
@@ -16849,7 +16656,7 @@ instruct compressBitsL_reg(iRegLNoSp dst, iRegL src, iRegL mask,
   ins_pipe(pipe_slow);
 %}
 
-instruct compressBitsL_memcon(iRegLNoSp dst, memory8 mem, immL mask,
+instruct compressBitsL_memcon(iRegLNoSp dst, memory mem, immL mask,
                            vRegF tdst, vRegF tsrc, vRegF tmask) %{
   match(Set dst (CompressBits (LoadL mem) mask));
   effect(TEMP tdst, TEMP tsrc, TEMP tmask);
@@ -16886,7 +16693,7 @@ instruct expandBitsI_reg(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask,
   ins_pipe(pipe_slow);
 %}
 
-instruct expandBitsI_memcon(iRegINoSp dst, memory4 mem, immI mask,
+instruct expandBitsI_memcon(iRegINoSp dst, memory mem, immI mask,
                          vRegF tdst, vRegF tsrc, vRegF tmask) %{
   match(Set dst (ExpandBits (LoadI mem) mask));
   effect(TEMP tdst, TEMP tsrc, TEMP tmask);
@@ -16924,7 +16731,7 @@ instruct expandBitsL_reg(iRegLNoSp dst, iRegL src, iRegL mask,
 %}
 
 
-instruct expandBitsL_memcon(iRegINoSp dst, memory8 mem, immL mask,
+instruct expandBitsL_memcon(iRegINoSp dst, memory mem, immL mask,
                          vRegF tdst, vRegF tsrc, vRegF tmask) %{
   match(Set dst (ExpandBits (LoadL mem) mask));
   effect(TEMP tdst, TEMP tsrc, TEMP tmask);
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad
index 1ebc6408a6094..637d3de73af6f 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@@ -345,7 +345,7 @@ source %{
 // ------------------------------ Vector load/store ----------------------------
 
 // Load Vector (16 bits)
-instruct loadV2(vReg dst, vmem2 mem) %{
+instruct loadV2(vReg dst, vmem mem) %{
   predicate(n->as_LoadVector()->memory_size() == 2);
   match(Set dst (LoadVector mem));
   format %{ "loadV2 $dst, $mem\t# vector (16 bits)" %}
@@ -354,7 +354,7 @@ instruct loadV2(vReg dst, vmem2 mem) %{
 %}
 
 // Store Vector (16 bits)
-instruct storeV2(vReg src, vmem2 mem) %{
+instruct storeV2(vReg src, vmem mem) %{
   predicate(n->as_StoreVector()->memory_size() == 2);
   match(Set mem (StoreVector mem src));
   format %{ "storeV2 $mem, $src\t# vector (16 bits)" %}
@@ -363,7 +363,7 @@ instruct storeV2(vReg src, vmem2 mem) %{
 %}
 
 // Load Vector (32 bits)
-instruct loadV4(vReg dst, vmem4 mem) %{
+instruct loadV4(vReg dst, vmem mem) %{
   predicate(n->as_LoadVector()->memory_size() == 4);
   match(Set dst (LoadVector mem));
   format %{ "loadV4 $dst, $mem\t# vector (32 bits)" %}
@@ -372,7 +372,7 @@ instruct loadV4(vReg dst, vmem4 mem) %{
 %}
 
 // Store Vector (32 bits)
-instruct storeV4(vReg src, vmem4 mem) %{
+instruct storeV4(vReg src, vmem mem) %{
   predicate(n->as_StoreVector()->memory_size() == 4);
   match(Set mem (StoreVector mem src));
   format %{ "storeV4 $mem, $src\t# vector (32 bits)" %}
@@ -381,7 +381,7 @@ instruct storeV4(vReg src, vmem4 mem) %{
 %}
 
 // Load Vector (64 bits)
-instruct loadV8(vReg dst, vmem8 mem) %{
+instruct loadV8(vReg dst, vmem mem) %{
   predicate(n->as_LoadVector()->memory_size() == 8);
   match(Set dst (LoadVector mem));
   format %{ "loadV8 $dst, $mem\t# vector (64 bits)" %}
@@ -390,7 +390,7 @@ instruct loadV8(vReg dst, vmem8 mem) %{
 %}
 
 // Store Vector (64 bits)
-instruct storeV8(vReg src, vmem8 mem) %{
+instruct storeV8(vReg src, vmem mem) %{
   predicate(n->as_StoreVector()->memory_size() == 8);
   match(Set mem (StoreVector mem src));
   format %{ "storeV8 $mem, $src\t# vector (64 bits)" %}
@@ -399,7 +399,7 @@ instruct storeV8(vReg src, vmem8 mem) %{
 %}
 
 // Load Vector (128 bits)
-instruct loadV16(vReg dst, vmem16 mem) %{
+instruct loadV16(vReg dst, vmem mem) %{
   predicate(n->as_LoadVector()->memory_size() == 16);
   match(Set dst (LoadVector mem));
   format %{ "loadV16 $dst, $mem\t# vector (128 bits)" %}
@@ -408,7 +408,7 @@ instruct loadV16(vReg dst, vmem16 mem) %{
 %}
 
 // Store Vector (128 bits)
-instruct storeV16(vReg src, vmem16 mem) %{
+instruct storeV16(vReg src, vmem mem) %{
   predicate(n->as_StoreVector()->memory_size() == 16);
   match(Set mem (StoreVector mem src));
   format %{ "storeV16 $mem, $src\t# vector (128 bits)" %}
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
index 29f927723688f..b3403ec82a1fe 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@@ -338,7 +338,7 @@ dnl VECTOR_LOAD_STORE($1,   $2,     $3,       $4,    $5  )
 dnl VECTOR_LOAD_STORE(type, nbytes, arg_name, nbits, size)
 define(`VECTOR_LOAD_STORE', `
 // ifelse(load, $1, Load, Store) Vector ($4 bits)
-instruct $1V$2(vReg $3, vmem$2 mem) %{
+instruct $1V$2(vReg $3, vmem mem) %{
   predicate(`n->as_'ifelse(load, $1, Load, Store)Vector()->memory_size() == $2);
   match(Set ifelse(load, $1, dst (LoadVector mem), mem (StoreVector mem src)));
   format %{ "$1V$2 ifelse(load, $1, `$dst, $mem', `$mem, $src')\t# vector ($4 bits)" %}
diff --git a/src/hotspot/cpu/aarch64/ad_encode.m4 b/src/hotspot/cpu/aarch64/ad_encode.m4
index 008dbd2c9369c..e3d8ea661b60a 100644
--- a/src/hotspot/cpu/aarch64/ad_encode.m4
+++ b/src/hotspot/cpu/aarch64/ad_encode.m4
@@ -34,7 +34,7 @@ define(access, `
 define(load,`
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_$2($1 dst, memory$5 mem) %{dnl
+  enc_class aarch64_enc_$2($1 dst, memory mem) %{dnl
 access(dst,$2,$3,$4,$5)')dnl
 load(iRegI,ldrsbw,,,1)
 load(iRegI,ldrsb,,,1)
@@ -53,12 +53,12 @@ load(vRegD,ldrd,Float,,8)
 define(STORE,`
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_$2($1 src, memory$5 mem) %{dnl
+  enc_class aarch64_enc_$2($1 src, memory mem) %{dnl
 access(src,$2,$3,$4,$5)')dnl
 define(STORE0,`
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_$2`'0(memory$4 mem) %{
+  enc_class aarch64_enc_$2`'0(memory mem) %{
     choose(masm,zr,$2,$mem->opcode(),
         as_$3Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp,$4)')dnl
 STORE(iRegI,strb,,,1)
@@ -82,7 +82,7 @@ STORE(vRegD,strd,Float,,8)
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
-  enc_class aarch64_enc_strb0_ordered(memory4 mem) %{
+  enc_class aarch64_enc_strb0_ordered(memory mem) %{
       __ membar(Assembler::StoreStore);
       loadStore(masm, &MacroAssembler::strb, zr, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
diff --git a/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad b/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad
index c7c7165affb57..5e690a8e47b94 100644
--- a/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad
+++ b/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -51,7 +51,7 @@ static void x_load_barrier_slow_path(MacroAssembler* masm, const MachNode* node,
 %}
 
 // Load Pointer
-instruct xLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
+instruct xLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr)
 %{
   match(Set dst (LoadP mem));
   predicate(UseZGC && !ZGenerational && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() != 0));
@@ -62,7 +62,13 @@ instruct xLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
   format %{ "ldr  $dst, $mem" %}
 
   ins_encode %{
-    const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+    Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+    if (ref_addr.getMode() == Address::base_plus_offset) {
+      // Fix up any out-of-range offsets.
+      assert_different_registers(rscratch1, as_Register($mem$$base));
+      assert_different_registers(rscratch1, $dst$$Register);
+      ref_addr = __ legitimize_address(ref_addr, 8, rscratch1);
+    }
     __ ldr($dst$$Register, ref_addr);
     x_load_barrier(masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, barrier_data());
   %}
diff --git a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
index 92181e2b6b908..1510b42bfe97d 100644
--- a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
+++ b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -100,7 +100,7 @@ static void z_store_barrier(MacroAssembler* masm, const MachNode* node, Address
 %}
 
 // Load Pointer
-instruct zLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
+instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr)
 %{
   match(Set dst (LoadP mem));
   predicate(UseZGC && ZGenerational && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
@@ -111,7 +111,13 @@ instruct zLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
   format %{ "ldr  $dst, $mem" %}
 
   ins_encode %{
-    const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+    Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+    if (ref_addr.getMode() == Address::base_plus_offset) {
+      // Fix up any out-of-range offsets.
+      assert_different_registers(rscratch2, as_Register($mem$$base));
+      assert_different_registers(rscratch2, $dst$$Register);
+      ref_addr = __ legitimize_address(ref_addr, 8, rscratch2);
+    }
     __ ldr($dst$$Register, ref_addr);
     z_load_barrier(masm, this, ref_addr, $dst$$Register, rscratch1);
   %}
diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp
index b9bdf73bc15f0..f084f506bf587 100644
--- a/src/hotspot/share/adlc/archDesc.cpp
+++ b/src/hotspot/share/adlc/archDesc.cpp
@@ -249,15 +249,6 @@ void ArchDesc::inspectOperands() {
     MatchRule *mrule = op->_matrule;
     Predicate *pred  = op->_predicate;
 
-    // If there are multiple arguments, we need to insert
-    // parentheses for predicate so that these arguments
-    // can be chained together logically with "&&".
-    if (op->_matrule &&
-        !(op->_matrule->_lChild == nullptr &&
-          op->_matrule->_rChild == nullptr)) {
-      pred = InstructForm::build_predicate(op->_matrule, op->_predicate);
-    }
-
     // Grab the machine type of the operand
     const char  *rootOp    = op->_ident;
     mrule->_machType  = rootOp;
@@ -305,7 +296,7 @@ void ArchDesc::inspectInstructions() {
     if ( instr->_matrule == nullptr )  continue;
 
     MatchRule &mrule = *instr->_matrule;
-    Predicate* pred  = InstructForm::build_predicate(instr->_matrule, instr->_predicate);
+    Predicate *pred  =  instr->build_predicate();
 
     // Grab the machine type of the operand
     const char  *rootOp    = instr->_ident;
diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
index efdebb98bd085..be97547f8ce11 100644
--- a/src/hotspot/share/adlc/formssel.cpp
+++ b/src/hotspot/share/adlc/formssel.cpp
@@ -1525,25 +1525,24 @@ void MachNodeForm::output(FILE *fp) {
 }
 
 //------------------------------build_predicate--------------------------------
-// Build predicates for instructions or operands.
-//
-// If the user uses the same operand name twice, we need to check that
-// the operands are pointer-equivalent in the DFA during the labeling process.
-Predicate* InstructForm::build_predicate(MatchRule* matrule, Predicate* predicate) {
+// Build instruction predicates.  If the user uses the same operand name
+// twice, we need to check that the operands are pointer-eequivalent in
+// the DFA during the labeling process.
+Predicate *InstructForm::build_predicate() {
   const int buflen = 1024;
   char buf[buflen], *s=buf;
   Dict names(cmpstr,hashstr,Form::arena);       // Map Names to counts
 
-  MatchNode* mnode =
-    strcmp(matrule->_opType, "Set") ? matrule : matrule->_rChild;
+  MatchNode *mnode =
+    strcmp(_matrule->_opType, "Set") ? _matrule : _matrule->_rChild;
   if (mnode != nullptr) mnode->count_instr_names(names);
 
   uint first = 1;
   // Start with the predicate supplied in the .ad file.
-  if (predicate) {
+  if (_predicate) {
     if (first) first = 0;
     strcpy(s, "("); s += strlen(s);
-    strncpy(s, predicate->_pred, buflen - strlen(s) - 1);
+    strncpy(s, _predicate->_pred, buflen - strlen(s) - 1);
     s += strlen(s);
     strcpy(s, ")"); s += strlen(s);
   }
diff --git a/src/hotspot/share/adlc/formssel.hpp b/src/hotspot/share/adlc/formssel.hpp
index 9dfcadbfa9b1f..61d0fb40f18a8 100644
--- a/src/hotspot/share/adlc/formssel.hpp
+++ b/src/hotspot/share/adlc/formssel.hpp
@@ -221,11 +221,10 @@ class InstructForm : public Form {
   // Does this instruction need a base-oop edge?
   int needs_base_oop_edge(FormDict &globals) const;
 
-  // Build predicates for instructions or operands.
-  //
-  // If the user uses the same operand name twice, we need to check that the
-  // operands are pointer-equivalent in the DFA during the labeling process.
-  static Predicate* build_predicate(MatchRule* matrule, Predicate* predicate);
+  // Build instruction predicates.  If the user uses the same operand name
+  // twice, we need to check that the operands are pointer-eequivalent in
+  // the DFA during the labeling process.
+  Predicate *build_predicate();
 
   virtual void        build_components(); // top-level operands
   // Return zero-based position in component list; -1 if not in list.
diff --git a/src/hotspot/share/adlc/output_c.cpp b/src/hotspot/share/adlc/output_c.cpp
index faaa23efdbecf..804e8f1a4e6c3 100644
--- a/src/hotspot/share/adlc/output_c.cpp
+++ b/src/hotspot/share/adlc/output_c.cpp
@@ -4316,7 +4316,7 @@ void ArchDesc::identify_cisc_spill_instructions() {
       if ( instr->_matrule == nullptr )  continue;
 
       MatchRule &mrule = *instr->_matrule;
-      Predicate* pred  = InstructForm::build_predicate(instr->_matrule, instr->_predicate);
+      Predicate *pred  =  instr->build_predicate();
 
       // Grab the machine type of the operand
       const char *rootOp = instr->_ident;
@@ -4337,7 +4337,7 @@ void ArchDesc::identify_cisc_spill_instructions() {
             && (instr2->reduce_result() != nullptr) // want same result
             && (strcmp(result, instr2->reduce_result()) == 0)) {
           MatchRule &mrule2 = *instr2->_matrule;
-          Predicate* pred2  = InstructForm::build_predicate(instr2->_matrule, instr2->_predicate);
+          Predicate *pred2  =  instr2->build_predicate();
           found_cisc_alternate = instr->cisc_spills_to(*this, instr2);
         }
       }
diff --git a/test/hotspot/jtreg/compiler/c2/TestUnalignedAccess.java b/test/hotspot/jtreg/compiler/c2/TestUnalignedAccess.java
index d05dbad4a73ba..033ea49e60955 100644
--- a/test/hotspot/jtreg/compiler/c2/TestUnalignedAccess.java
+++ b/test/hotspot/jtreg/compiler/c2/TestUnalignedAccess.java
@@ -46,20 +46,11 @@ public class TestUnalignedAccess {
     static final Unsafe UNSAFE = Unsafe.getUnsafe();
     static void sink(int x) {}
 
-    public static long lseed = 1;
-    public static int iseed = 2;
-    public static short sseed = 3;
-    public static byte bseed = 4;
-    public static long lres = lseed;
-    public static int ires = iseed;
-    public static short sres = sseed;
-    public static byte bres = bseed;
-
     public static class TestLong {
 
         private static final byte[] BYTES = new byte[LEN];
         private static final long rawdata = 0xbeef;
-        private static final long data;
+        private static final long lseed = 1;
 
         static {
             sink(2);
@@ -69,13 +60,10 @@ public static class TestLong {
 
             // 1030 can't be encoded as "base + offset" mode into the instruction field.
             UNSAFE.putLongUnaligned(BYTES, 1030, rawdata);
-            lres += UNSAFE.getLongUnaligned(BYTES, 1030);
             // 127 can be encoded into simm9 field.
-            UNSAFE.putLongUnaligned(BYTES, 127, lres);
-            lres += UNSAFE.getLongUnaligned(BYTES, 127);
+            UNSAFE.putLongUnaligned(BYTES, 127, rawdata+lseed);
             // 1096 can be encoded into uimm12 field.
-            UNSAFE.putLongUnaligned(BYTES, 1096, lres);
-            data = UNSAFE.getLongUnaligned(BYTES, 1096);
+            UNSAFE.putLongUnaligned(BYTES, 1096, rawdata-lseed);
         }
 
     }
@@ -84,7 +72,7 @@ public static class TestInt {
 
         private static final byte[] BYTES = new byte[LEN];
         private static final int rawdata = 0xbeef;
-        private static final int data;
+        private static final int iseed = 2;
         static {
             sink(2);
             // Signed immediate byte offset: range -256 to 255
@@ -93,13 +81,10 @@ public static class TestInt {
 
             // 274 can't be encoded as "base + offset" mode into the instruction field.
             UNSAFE.putIntUnaligned(BYTES, 274, rawdata);
-            ires += UNSAFE.getIntUnaligned(BYTES, 274);
             // 255 can be encoded into simm9 field.
-            UNSAFE.putIntUnaligned(BYTES, 255, ires);
-            ires += UNSAFE.getIntUnaligned(BYTES, 255);
+            UNSAFE.putIntUnaligned(BYTES, 255, rawdata + iseed);
             // 528 can be encoded into uimm12 field.
-            UNSAFE.putIntUnaligned(BYTES, 528, ires);
-            data = UNSAFE.getIntUnaligned(BYTES, 528);
+            UNSAFE.putIntUnaligned(BYTES, 528, rawdata - iseed);
         }
 
     }
@@ -108,7 +93,7 @@ public static class TestShort {
 
         private static final byte[] BYTES = new byte[LEN];
         private static final short rawdata = (short)0xbeef;
-        private static final short data;
+        private static final short sseed = 3;
         static {
             sink(2);
             // Signed immediate byte offset: range -256 to 255
@@ -117,13 +102,10 @@ public static class TestShort {
 
             // 257 can't be encoded as "base + offset" mode into the instruction field.
             UNSAFE.putShortUnaligned(BYTES, 257, rawdata);
-            sres = (short) (sres + UNSAFE.getShortUnaligned(BYTES, 257));
             // 253 can be encoded into simm9 field.
-            UNSAFE.putShortUnaligned(BYTES, 253, sres);
-            sres = (short) (sres + UNSAFE.getShortUnaligned(BYTES, 253));
+            UNSAFE.putShortUnaligned(BYTES, 253, (short) (rawdata + sseed));
             // 272 can be encoded into uimm12 field.
-            UNSAFE.putShortUnaligned(BYTES, 272, sres);
-            data = UNSAFE.getShortUnaligned(BYTES, 272);
+            UNSAFE.putShortUnaligned(BYTES, 272, (short) (rawdata - sseed));
         }
 
     }
@@ -132,7 +114,7 @@ public static class TestByte {
 
         private static final byte[] BYTES = new byte[LEN];
         private static final byte rawdata = (byte)0x3f;
-        private static final byte data;
+        private static final byte bseed = 4;
         static {
             sink(2);
             // Signed immediate byte offset: range -256 to 255
@@ -141,29 +123,34 @@ public static class TestByte {
 
             // 272 can be encoded into simm9 field.
             UNSAFE.putByte(BYTES, 272, rawdata);
-            bres = (byte) (bres + UNSAFE.getByte(BYTES, 272));
             // 53 can be encoded into simm9 field.
-            UNSAFE.putByte(BYTES, 53, bres);
-            bres = (byte) (bres + UNSAFE.getByte(BYTES, 53));
+            UNSAFE.putByte(BYTES, 53, (byte) (rawdata + bseed));
             // 1027 can be encoded into uimm12 field.
-            UNSAFE.putByte(BYTES, 1027, bres);
-            data = UNSAFE.getByte(BYTES, 1027);
+            UNSAFE.putByte(BYTES, 1027, (byte) (rawdata - bseed));
         }
 
     }
 
     static void test() {
         TestLong ta = new TestLong();
-        Asserts.assertEquals(ta.data, (ta.rawdata + lseed) * 2, "putUnaligned long failed!");
+        Asserts.assertEquals(UNSAFE.getLongUnaligned(ta.BYTES, 1030), ta.rawdata, "putUnaligned long failed!");
+        Asserts.assertEquals(UNSAFE.getLongUnaligned(ta.BYTES, 127), ta.rawdata + ta.lseed, "putUnaligned long failed!");
+        Asserts.assertEquals(UNSAFE.getLongUnaligned(ta.BYTES, 1096), ta.rawdata - ta.lseed, "putUnaligned long failed!");
 
         TestInt tb = new TestInt();
-        Asserts.assertEquals(tb.data, (tb.rawdata + iseed) * 2, "putUnaligned int failed!");
+        Asserts.assertEquals(UNSAFE.getIntUnaligned(tb.BYTES, 274), tb.rawdata, "putUnaligned int failed!");
+        Asserts.assertEquals(UNSAFE.getIntUnaligned(tb.BYTES, 255), tb.rawdata + tb.iseed, "putUnaligned int failed!");
+        Asserts.assertEquals(UNSAFE.getIntUnaligned(tb.BYTES, 528), tb.rawdata - tb.iseed, "putUnaligned int failed!");
 
         TestShort tc = new TestShort();
-        Asserts.assertEquals(tc.data, (short) (((short) (tc.rawdata + sseed)) * 2), "putUnaligned short failed!");
+        Asserts.assertEquals(UNSAFE.getShortUnaligned(tc.BYTES, 257), tc.rawdata, "putUnaligned short failed!");
+        Asserts.assertEquals(UNSAFE.getShortUnaligned(tc.BYTES, 253), (short) (tc.rawdata + tc.sseed), "putUnaligned short failed!");
+        Asserts.assertEquals(UNSAFE.getShortUnaligned(tc.BYTES, 272), (short) (tc.rawdata - tc.sseed), "putUnaligned short failed!");
 
         TestByte td = new TestByte();
-        Asserts.assertEquals(td.data, (byte) (((byte) (td.rawdata + bseed)) * 2), "put byte failed!");
+        Asserts.assertEquals(UNSAFE.getByte(td.BYTES, 272), td.rawdata, "put byte failed!");
+        Asserts.assertEquals(UNSAFE.getByte(td.BYTES, 53), (byte) (td.rawdata + td.bseed), "put byte failed!");
+        Asserts.assertEquals(UNSAFE.getByte(td.BYTES, 1027), (byte) (td.rawdata - td.bseed), "put byte failed!");
     }
 
     public static void main(String[] strArr) {