Skip to content

Commit

Permalink
[AArch64][SVE] Fix extract_subvector patterns for unpacked fp types.
Browse files Browse the repository at this point in the history
The patterns added in D110163 were incorrect, since it used the wrong
element widths for its shuffles.

Example for nxv2f16 extract_subvector(nxv8f16 %in, 6):
  <a|b|c|d|e|f|g|h>
               ^^^
           extract g and h.

  => UUNPKHI .h -> .s results in:
  <e  |f  |g  |h  >

  => UUNPKHI .s -> .d results in:
  <g      |h      >

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D110523
  • Loading branch information
sdesmalen-arm committed Sep 29, 2021
1 parent d6216e2 commit 87bcbd6
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 30 deletions.
16 changes: 8 additions & 8 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Expand Up @@ -1333,22 +1333,22 @@ let Predicates = [HasSVEorStreamingSVE] in {
(UUNPKHI_ZZ_S ZPR:$Zs)>;

def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))),
(UUNPKLO_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>;
(UUNPKLO_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>;
def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 2))),
(UUNPKHI_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>;
(UUNPKHI_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>;
def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))),
(UUNPKLO_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>;
(UUNPKLO_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 6))),
(UUNPKHI_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>;
(UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;

def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 0))),
(UUNPKLO_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>;
(UUNPKLO_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>;
def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 2))),
(UUNPKHI_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>;
(UUNPKHI_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>;
def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))),
(UUNPKLO_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>;
(UUNPKLO_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 6))),
(UUNPKHI_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>;
(UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;

// Concatenate two predicates.
def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)),
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
Expand Up @@ -502,8 +502,8 @@ declare <vscale x 4 x i8> @llvm.experimental.vector.extract.nxv4i8.nxv16i8(<vsca
define <vscale x 2 x half> @extract_nxv2f16_nxv6f16_0(<vscale x 6 x half> %in) {
; CHECK-LABEL: extract_nxv2f16_nxv6f16_0:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv6f16(<vscale x 6 x half> %in, i64 0)
ret <vscale x 2 x half> %res
Expand All @@ -512,8 +512,8 @@ define <vscale x 2 x half> @extract_nxv2f16_nxv6f16_0(<vscale x 6 x half> %in) {
define <vscale x 2 x half> @extract_nxv2f16_nxv6f16_2(<vscale x 6 x half> %in) {
; CHECK-LABEL: extract_nxv2f16_nxv6f16_2:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv6f16(<vscale x 6 x half> %in, i64 2)
ret <vscale x 2 x half> %res
Expand All @@ -522,8 +522,8 @@ define <vscale x 2 x half> @extract_nxv2f16_nxv6f16_2(<vscale x 6 x half> %in) {
define <vscale x 2 x half> @extract_nxv2f16_nxv6f16_4(<vscale x 6 x half> %in) {
; CHECK-LABEL: extract_nxv2f16_nxv6f16_4:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv6f16(<vscale x 6 x half> %in, i64 4)
ret <vscale x 2 x half> %res
Expand Down Expand Up @@ -601,8 +601,8 @@ declare <vscale x 4 x half> @llvm.experimental.vector.extract.nxv4f16.nxv16f16(<
define <vscale x 2 x half> @extract_nxv2f16_nxv8f16_0(<vscale x 8 x half> %in) {
; CHECK-LABEL: extract_nxv2f16_nxv8f16_0:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv8f16(<vscale x 8 x half> %in, i64 0)
ret <vscale x 2 x half> %res
Expand All @@ -611,8 +611,8 @@ define <vscale x 2 x half> @extract_nxv2f16_nxv8f16_0(<vscale x 8 x half> %in) {
define <vscale x 2 x half> @extract_nxv2f16_nxv8f16_2(<vscale x 8 x half> %in) {
; CHECK-LABEL: extract_nxv2f16_nxv8f16_2:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv8f16(<vscale x 8 x half> %in, i64 2)
ret <vscale x 2 x half> %res
Expand All @@ -621,8 +621,8 @@ define <vscale x 2 x half> @extract_nxv2f16_nxv8f16_2(<vscale x 8 x half> %in) {
define <vscale x 2 x half> @extract_nxv2f16_nxv8f16_4(<vscale x 8 x half> %in) {
; CHECK-LABEL: extract_nxv2f16_nxv8f16_4:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv8f16(<vscale x 8 x half> %in, i64 4)
ret <vscale x 2 x half> %res
Expand All @@ -631,8 +631,8 @@ define <vscale x 2 x half> @extract_nxv2f16_nxv8f16_4(<vscale x 8 x half> %in) {
define <vscale x 2 x half> @extract_nxv2f16_nxv8f16_6(<vscale x 8 x half> %in) {
; CHECK-LABEL: extract_nxv2f16_nxv8f16_6:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv8f16(<vscale x 8 x half> %in, i64 6)
ret <vscale x 2 x half> %res
Expand Down Expand Up @@ -669,8 +669,8 @@ declare <vscale x 4 x bfloat> @llvm.experimental.vector.extract.nxv4bf16.nxv8bf1
define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv6bf16_0(<vscale x 6 x bfloat> %in) {
; CHECK-LABEL: extract_nxv2bf16_nxv6bf16_0:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 2 x bfloat> @llvm.experimental.vector.extract.nxv2bf16.nxv6bf16(<vscale x 6 x bfloat> %in, i64 0)
ret <vscale x 2 x bfloat> %res
Expand All @@ -679,8 +679,8 @@ define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv6bf16_0(<vscale x 6 x bfloat>
define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv6bf16_2(<vscale x 6 x bfloat> %in) {
; CHECK-LABEL: extract_nxv2bf16_nxv6bf16_2:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 2 x bfloat> @llvm.experimental.vector.extract.nxv2bf16.nxv6bf16(<vscale x 6 x bfloat> %in, i64 2)
ret <vscale x 2 x bfloat> %res
Expand All @@ -689,8 +689,8 @@ define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv6bf16_2(<vscale x 6 x bfloat>
define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv6bf16_4(<vscale x 6 x bfloat> %in) {
; CHECK-LABEL: extract_nxv2bf16_nxv6bf16_4:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 2 x bfloat> @llvm.experimental.vector.extract.nxv2bf16.nxv6bf16(<vscale x 6 x bfloat> %in, i64 4)
ret <vscale x 2 x bfloat> %res
Expand All @@ -704,8 +704,8 @@ declare <vscale x 2 x bfloat> @llvm.experimental.vector.extract.nxv2bf16.nxv6bf1
define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv8bf16_0(<vscale x 8 x bfloat> %in) {
; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_0:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 2 x bfloat> @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16(<vscale x 8 x bfloat> %in, i64 0)
ret <vscale x 2 x bfloat> %res
Expand All @@ -714,8 +714,8 @@ define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv8bf16_0(<vscale x 8 x bfloat>
define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv8bf16_2(<vscale x 8 x bfloat> %in) {
; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_2:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 2 x bfloat> @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16(<vscale x 8 x bfloat> %in, i64 2)
ret <vscale x 2 x bfloat> %res
Expand All @@ -724,8 +724,8 @@ define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv8bf16_2(<vscale x 8 x bfloat>
define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv8bf16_4(<vscale x 8 x bfloat> %in) {
; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_4:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 2 x bfloat> @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16(<vscale x 8 x bfloat> %in, i64 4)
ret <vscale x 2 x bfloat> %res
Expand All @@ -734,8 +734,8 @@ define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv8bf16_4(<vscale x 8 x bfloat>
define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv8bf16_6(<vscale x 8 x bfloat> %in) {
; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_6:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 2 x bfloat> @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16(<vscale x 8 x bfloat> %in, i64 6)
ret <vscale x 2 x bfloat> %res
Expand Down

0 comments on commit 87bcbd6

Please sign in to comment.