8255287: aarch64: fix SVE patterns for vector shift count

RealFYang · Yanhong Zhu · RealFYang · commit 5ec1b80c4ad3 · 2020-10-23T13:16:37.000Z
Co-authored-by: Yanhong Zhu &lt;zhuyanhong2@huawei.com&gt;
Reviewed-by: adinn
diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad
@@ -1329,7 +1329,7 @@ instruct vlsrL(vReg dst, vReg shift) %{
 
 instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
-  match(Set dst (RShiftVB src shift));
+  match(Set dst (RShiftVB src (RShiftCntV shift)));
   ins_cost(SVE_COST);
   format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (B)" %}
   ins_encode %{
@@ -1348,7 +1348,7 @@ instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
 
 instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
-  match(Set dst (RShiftVS src shift));
+  match(Set dst (RShiftVS src (RShiftCntV shift)));
   ins_cost(SVE_COST);
   format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (H)" %}
   ins_encode %{
@@ -1367,7 +1367,7 @@ instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
 
 instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
-  match(Set dst (RShiftVI src shift));
+  match(Set dst (RShiftVI src (RShiftCntV shift)));
   ins_cost(SVE_COST);
   format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (S)" %}
   ins_encode %{
@@ -1385,7 +1385,7 @@ instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
 
 instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
-  match(Set dst (RShiftVL src shift));
+  match(Set dst (RShiftVL src (RShiftCntV shift)));
   ins_cost(SVE_COST);
   format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (D)" %}
   ins_encode %{
@@ -1403,7 +1403,7 @@ instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
 
 instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
-  match(Set dst (URShiftVB src shift));
+  match(Set dst (URShiftVB src (RShiftCntV shift)));
   ins_cost(SVE_COST);
   format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (B)" %}
   ins_encode %{
@@ -1426,7 +1426,7 @@ instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
 
 instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
-  match(Set dst (URShiftVS src shift));
+  match(Set dst (URShiftVS src (RShiftCntV shift)));
   ins_cost(SVE_COST);
   format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (H)" %}
   ins_encode %{
@@ -1436,7 +1436,7 @@ instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
            as_FloatRegister($src$$reg));
       return;
     }
-    if (con >= 8) {
+    if (con >= 16) {
       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
            as_FloatRegister($src$$reg));
       return;
@@ -1449,7 +1449,7 @@ instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
 
 instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
-  match(Set dst (URShiftVI src shift));
+  match(Set dst (URShiftVI src (RShiftCntV shift)));
   ins_cost(SVE_COST);
   format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %}
   ins_encode %{
@@ -1467,7 +1467,7 @@ instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
 
 instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
-  match(Set dst (URShiftVL src shift));
+  match(Set dst (URShiftVL src (RShiftCntV shift)));
   ins_cost(SVE_COST);
   format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %}
   ins_encode %{
@@ -1485,7 +1485,7 @@ instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
 
 instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
   predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
-  match(Set dst (LShiftVB src shift));
+  match(Set dst (LShiftVB src (LShiftCntV shift)));
   ins_cost(SVE_COST);
   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %}
   ins_encode %{
@@ -1503,12 +1503,12 @@ instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
 
 instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
   predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
-  match(Set dst (LShiftVS src shift));
+  match(Set dst (LShiftVS src (LShiftCntV shift)));
   ins_cost(SVE_COST);
   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %}
   ins_encode %{
     int con = (int)$shift$$constant;
-    if (con >= 8) {
+    if (con >= 16) {
       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
            as_FloatRegister($src$$reg));
       return;
@@ -1521,7 +1521,7 @@ instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
 
 instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
   predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
-  match(Set dst (LShiftVI src shift));
+  match(Set dst (LShiftVI src (LShiftCntV shift)));
   ins_cost(SVE_COST);
   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %}
   ins_encode %{
@@ -1534,7 +1534,7 @@ instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
 
 instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
   predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
-  match(Set dst (LShiftVL src shift));
+  match(Set dst (LShiftVL src (LShiftCntV shift)));
   ins_cost(SVE_COST);
   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %}
   ins_encode %{
diff --git a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
@@ -727,14 +727,14 @@ instruct $1(vReg dst, vReg shift) %{
   ins_pipe(pipe_slow);
 %}')dnl
 dnl
-dnl VSHIFT_IMM_UNPREDICATE($1,        $2,      $3,   $4,          $5  )
-dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn)
+dnl VSHIFT_IMM_UNPREDICATE($1,        $2,      $3,       $4,   $5,          $6  )
+dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, op_name2, size, min_vec_len, insn)
 define(`VSHIFT_IMM_UNPREDICATE', `
 instruct $1(vReg dst, vReg src, immI shift) %{
-  predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
-  match(Set dst ($2 src shift));
+  predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
+  match(Set dst ($2 src ($3 shift)));
   ins_cost(SVE_COST);
-  format %{ "$5 $dst, $src, $shift\t# vector (sve) ($3)" %}
+  format %{ "$6 $dst, $src, $shift\t# vector (sve) ($4)" %}
   ins_encode %{
     int con = (int)$shift$$constant;dnl
 ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
@@ -743,16 +743,21 @@ ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
            as_FloatRegister($src$$reg));
       return;
     }')dnl
-ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$3', `B') == 0), 1, `
-    if (con >= 8) con = 7;')ifelse(eval(index(`$3', `H') == 0), 1, `
+ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$4', `B') == 0), 1, `
+    if (con >= 8) con = 7;')ifelse(eval(index(`$4', `H') == 0), 1, `
     if (con >= 16) con = 15;')')dnl
-ifelse(eval((index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0) && (index(`$3', `B') == 0 || index(`$3', `H') == 0)), 1, `
+ifelse(eval(index(`$1', `vlsl') == 0  || index(`$1', `vlsr') == 0), 1, `ifelse(eval(index(`$4', `B') == 0), 1, `
     if (con >= 8) {
       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
            as_FloatRegister($src$$reg));
       return;
-    }')
-    __ $5(as_FloatRegister($dst$$reg), __ $3,
+    }')ifelse(eval(index(`$4', `H') == 0), 1, `
+    if (con >= 16) {
+      __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+           as_FloatRegister($src$$reg));
+      return;
+    }')')
+    __ $6(as_FloatRegister($dst$$reg), __ $4,
          as_FloatRegister($src$$reg), con);
   %}
   ins_pipe(pipe_slow);
@@ -786,18 +791,18 @@ VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr)
 VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H,  8, sve_lsr)
 VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S,  4, sve_lsr)
 VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D,  2, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB,  B, 16, sve_asr)
-VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS,  H,  8, sve_asr)
-VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI,  S,  4, sve_asr)
-VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL,  D,  2, sve_asr)
-VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, B, 16, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, H,  8, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, S,  4, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, D,  2, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB,  B, 16, sve_lsl)
-VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS,  H,  8, sve_lsl)
-VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI,  S,  4, sve_lsl)
-VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL,  D,  2, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB,  RShiftCntV, B, 16, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS,  RShiftCntV, H,  8, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI,  RShiftCntV, S,  4, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL,  RShiftCntV, D,  2, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, RShiftCntV, B, 16, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, RShiftCntV, H,  8, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, RShiftCntV, S,  4, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, RShiftCntV, D,  2, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB,  LShiftCntV, B, 16, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS,  LShiftCntV, H,  8, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI,  LShiftCntV, S,  4, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL,  LShiftCntV, D,  2, sve_lsl)
 VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE)
 VSHIFT_COUNT(vshiftcntS, H,  8, T_SHORT)
 VSHIFT_COUNT(vshiftcntI, S,  4, T_INT)
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
@@ -401,7 +401,7 @@ void VM_Version::initialize() {
       warning("SVE does not support vector length less than 16 bytes. Disabling SVE.");
       UseSVE = 0;
     } else if ((MaxVectorSize % 16) == 0 && is_power_of_2(MaxVectorSize)) {
-      int new_vl = set_and_get_current_sve_vector_lenght(MaxVectorSize);
+      int new_vl = set_and_get_current_sve_vector_length(MaxVectorSize);
       _initial_sve_vector_length = new_vl;
       // Update MaxVectorSize to the largest supported value.
       if (new_vl < 0) {
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
@@ -51,7 +51,7 @@ class VM_Version : public Abstract_VM_Version {
   // Sets the SVE length and returns a new actual value or negative on error.
   // If the len is larger than the system largest supported SVE vector length,
   // the function sets the largest supported value.
-  static int set_and_get_current_sve_vector_lenght(int len);
+  static int set_and_get_current_sve_vector_length(int len);
   static int get_current_sve_vector_length();
 
 public:
diff --git a/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp
@@ -86,7 +86,7 @@ int VM_Version::get_current_sve_vector_length() {
   return prctl(PR_SVE_GET_VL);
 }
 
-int VM_Version::set_and_get_current_sve_vector_lenght(int length) {
+int VM_Version::set_and_get_current_sve_vector_length(int length) {
   assert(_features & CPU_SVE, "should not call this");
   int new_length = prctl(PR_SVE_SET_VL, length);
   return new_length;
diff --git a/src/hotspot/os_cpu/windows_aarch64/vm_version_windows_aarch64.cpp b/src/hotspot/os_cpu/windows_aarch64/vm_version_windows_aarch64.cpp
@@ -32,7 +32,7 @@ int VM_Version::get_current_sve_vector_length() {
   return 0;
 }
 
-int VM_Version::set_and_get_current_sve_vector_lenght(int length) {
+int VM_Version::set_and_get_current_sve_vector_length(int length) {
   assert(_features & CPU_SVE, "should not call this");
   ShouldNotReachHere();
   return 0;
diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/TestVectorShiftShorts.java b/test/hotspot/jtreg/compiler/c2/aarch64/TestVectorShiftShorts.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+
+/**
+ * @test
+ * @bug 8255287
+ * @summary aarch64: fix SVE patterns for vector shift count
+ *
+ * @requires os.arch == "aarch64" & vm.compiler2.enabled
+ * @run main/othervm -XX:UseSVE=2 -Xbatch -XX:-TieredCompilation
+ *      -XX:CompileCommand=compileonly,compiler.c2.aarch64.TestVectorShiftShorts::test_*
+ *      compiler.c2.aarch64.TestVectorShiftShorts
+ */
+
+package compiler.c2.aarch64;
+
+public class TestVectorShiftShorts {
+
+    private static final int ARRLEN = 1000;
+    private static final int ITERS  = 20000;
+
+    public static void main(String args[]) {
+        short[] a0 = new short[ARRLEN];
+        short[] a1 = new short[ARRLEN];
+
+        // Initialize
+        test_init(a0, a1);
+
+        // Warmup
+        for (int i = 0; i < ITERS; i++) {
+            test_lshift(a0, a1);
+            test_urshift(a0, a1);
+        }
+
+        // Test and verify results
+        test_init(a0, a1);
+        test_lshift(a0, a1);
+        verify_lshift(a0, a1);
+
+        test_init(a0, a1);
+        test_urshift(a0, a1);
+        verify_urshift(a0, a1);
+
+        // Finish
+        System.out.println("Test passed");
+    }
+
+    static void test_init(short[] a0, short[] a1) {
+        for (int i = 0; i < ARRLEN; i++) {
+            a0[i] = (short)(i & 3);
+            a1[i] = (short)i;
+        }
+    }
+
+    static void test_lshift(short[] a0, short[] a1) {
+        for (int i = 0; i < ARRLEN; i++) {
+            a0[i] = (short)(a1[i] << 10);
+        }
+    }
+
+    static void verify_lshift(short[] a0, short[] a1) {
+        for (int i = 0; i < ARRLEN; i++) {
+            if (a0[i] != (short)(a1[i] << 10)) {
+                throw new RuntimeException("LShift test failed.");
+            }
+        }
+    }
+
+    static void test_urshift(short[] a0, short[] a1) {
+        for (int i = 0; i < ARRLEN; i++) {
+            a0[i] = (short)(a1[i] >>> 10);
+        }
+    }
+
+    static void verify_urshift(short[] a0, short[] a1) {
+        for (int i = 0; i < ARRLEN; i++) {
+            if (a0[i] != (short)(a1[i] >>> 10)) {
+                throw new RuntimeException("URshift test failed.");
+            }
+        }
+    }
+
+}

Original file line number	Diff line number	Diff line change
`@@ -86,7 +86,7 @@ int VM_Version::get_current_sve_vector_length() {`
`86`	`86`	`return prctl(PR_SVE_GET_VL);`
`87`	`87`	`}`
`88`	`88`
`89`		`-int VM_Version::set_and_get_current_sve_vector_lenght(int length) {`
	`89`	`+int VM_Version::set_and_get_current_sve_vector_length(int length) {`
`90`	`90`	`assert(_features & CPU_SVE, "should not call this");`
`91`	`91`	`int new_length = prctl(PR_SVE_SET_VL, length);`
`92`	`92`	`return new_length;`
Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,7 @@ int VM_Version::get_current_sve_vector_length() {`
`32`	`32`	`return 0;`
`33`	`33`	`}`
`34`	`34`
`35`		`-int VM_Version::set_and_get_current_sve_vector_lenght(int length) {`
	`35`	`+int VM_Version::set_and_get_current_sve_vector_length(int length) {`
`36`	`36`	`assert(_features & CPU_SVE, "should not call this");`
`37`	`37`	`ShouldNotReachHere();`
`38`	`38`	`return 0;`