Skip to content

Commit

Permalink
[AArch64][SVE] Add patterns for VSELECT of immediate merged with a va…
Browse files Browse the repository at this point in the history
…riable.

This covers forms involving "CPY (immediate, merging)".

Differential Revision: https://reviews.llvm.org/D79803
  • Loading branch information
efriedma-quic committed May 13, 2020
1 parent 1b7bf1b commit a52f10b
Show file tree
Hide file tree
Showing 2 changed files with 249 additions and 17 deletions.
38 changes: 23 additions & 15 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Expand Up @@ -4045,22 +4045,30 @@ class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm,
let ElementSize = zprty.ElementSize;
}

multiclass sve_int_dup_imm_pred_merge<string asm> {
let Constraints = "$Zd = $_Zd" in {
def _B : sve_int_dup_imm_pred<0b00, 1, asm, ZPR8, "/m", (ins ZPR8:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm)>;
def _H : sve_int_dup_imm_pred<0b01, 1, asm, ZPR16, "/m", (ins ZPR16:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm)>;
def _S : sve_int_dup_imm_pred<0b10, 1, asm, ZPR32, "/m", (ins ZPR32:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm)>;
def _D : sve_int_dup_imm_pred<0b11, 1, asm, ZPR64, "/m", (ins ZPR64:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm)>;
}

def : InstAlias<"mov $Zd, $Pg/m, $imm",
(!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm), 1>;
def : InstAlias<"mov $Zd, $Pg/m, $imm",
(!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm), 1>;
def : InstAlias<"mov $Zd, $Pg/m, $imm",
(!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm), 1>;
multiclass sve_int_dup_imm_pred_merge_inst<
bits<2> sz8_64, string asm, ZPRRegOp zprty, ValueType intty,
ValueType predty, ValueType scalarty, imm8_opt_lsl cpyimm> {
let Constraints = "$Zd = $_Zd" in
def NAME : sve_int_dup_imm_pred<sz8_64, 1, asm, zprty, "/m",
(ins zprty:$_Zd, PPRAny:$Pg, cpyimm:$imm)>;
def : InstAlias<"mov $Zd, $Pg/m, $imm",
(!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm), 1>;
(!cast<Instruction>(NAME) zprty:$Zd, PPRAny:$Pg, cpyimm:$imm), 1>;
def : Pat<(intty
(vselect predty:$Pg,
(intty (AArch64dup (scalarty (SVE8BitLslImm i32:$imm, i32:$shift)))),
intty:$Zd)),
(!cast<Instruction>(NAME) zprty:$Zd, $Pg, i32:$imm, i32:$shift)>;
}

multiclass sve_int_dup_imm_pred_merge<string asm> {
defm _B : sve_int_dup_imm_pred_merge_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1,
i32, cpy_imm8_opt_lsl_i8>;
defm _H : sve_int_dup_imm_pred_merge_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1,
i32, cpy_imm8_opt_lsl_i16>;
defm _S : sve_int_dup_imm_pred_merge_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1,
i32, cpy_imm8_opt_lsl_i32>;
defm _D : sve_int_dup_imm_pred_merge_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1,
i64, cpy_imm8_opt_lsl_i64>;

def : InstAlias<"fmov $Zd, $Pg/m, #0.0",
(!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, 0, 0), 0>;
Expand Down
228 changes: 226 additions & 2 deletions llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
Expand Up @@ -111,8 +111,10 @@ define <vscale x 2 x i64> @sel_64_shifted(<vscale x 2 x i1> %p) {
ret <vscale x 2 x i64> %sel
}

; TODO: We could actually use something like "sel z0.b, p0/z, #-128" if the
; odd bits of the predicate are zero.
; TODO: We could actually use something like "cpy z0.b, p0/z, #-128". But it's
; a little tricky to prove correctness: we're using the predicate with the
; wrong width, so we'd have to prove the bits which would normally be unused
; are actually zero.
define <vscale x 8 x i16> @sel_16_illegal_wrong_extension(<vscale x 8 x i1> %p) {
; CHECK-LABEL: sel_16_illegal_wrong_extension:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -190,3 +192,225 @@ define <vscale x 2 x i64> @sel_64_illegal_shifted(<vscale x 2 x i1> %p) {
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
ret <vscale x 2 x i64> %sel
}

define <vscale x 16 x i8> @sel_merge_8_positive(<vscale x 16 x i1> %p, <vscale x 16 x i8> %in) {
; CHECK-LABEL: sel_merge_8_positive:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, p0/m, #3 // =0x3
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 3, i32 0), <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i32> zeroinitializer
%sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %vec, <vscale x 16 x i8> %in
ret <vscale x 16 x i8> %sel
}

define <vscale x 8 x i16> @sel_merge_16_positive(<vscale x 8 x i1> %p, <vscale x 8 x i16> %in) {
; CHECK-LABEL: sel_merge_16_positive:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, p0/m, #3 // =0x3
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 3, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
ret <vscale x 8 x i16> %sel
}

define <vscale x 4 x i32> @sel_merge_32_positive(<vscale x 4 x i1> %p, <vscale x 4 x i32> %in) {
; CHECK-LABEL: sel_merge_32_positive:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, p0/m, #3 // =0x3
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 3, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
ret <vscale x 4 x i32> %sel
}

define <vscale x 2 x i64> @sel_merge_64_positive(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
; CHECK-LABEL: sel_merge_64_positive:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, p0/m, #3 // =0x3
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 3, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
ret <vscale x 2 x i64> %sel
}

define <vscale x 16 x i8> @sel_merge_8_negative(<vscale x 16 x i1> %p, <vscale x 16 x i8> %in) {
; CHECK-LABEL: sel_merge_8_negative:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, p0/m, #-128 // =0xffffffffffffff80
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 -128, i32 0), <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i32> zeroinitializer
%sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %vec, <vscale x 16 x i8> %in
ret <vscale x 16 x i8> %sel
}

define <vscale x 8 x i16> @sel_merge_16_negative(<vscale x 8 x i1> %p, <vscale x 8 x i16> %in) {
; CHECK-LABEL: sel_merge_16_negative:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, p0/m, #-128 // =0xffffffffffffff80
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 -128, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
ret <vscale x 8 x i16> %sel
}

define <vscale x 4 x i32> @sel_merge_32_negative(<vscale x 4 x i1> %p, <vscale x 4 x i32> %in) {
; CHECK-LABEL: sel_merge_32_negative:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, p0/m, #-128 // =0xffffffffffffff80
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 -128, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
ret <vscale x 4 x i32> %sel
}

define <vscale x 2 x i64> @sel_merge_64_negative(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
; CHECK-LABEL: sel_merge_64_negative:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, p0/m, #-128 // =0xffffffffffffff80
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 -128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
ret <vscale x 2 x i64> %sel
}

define <vscale x 16 x i8> @sel_merge_8_zero(<vscale x 16 x i1> %p, <vscale x 16 x i8> %in) {
; CHECK-LABEL: sel_merge_8_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %in
ret <vscale x 16 x i8> %sel
}

define <vscale x 8 x i16> @sel_merge_16_zero(<vscale x 8 x i1> %p, <vscale x 8 x i16> %in) {
; CHECK-LABEL: sel_merge_16_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> %in
ret <vscale x 8 x i16> %sel
}

define <vscale x 4 x i32> @sel_merge_32_zero(<vscale x 4 x i1> %p, <vscale x 4 x i32> %in) {
; CHECK-LABEL: sel_merge_32_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> %in
ret <vscale x 4 x i32> %sel
}

define <vscale x 2 x i64> @sel_merge_64_zero(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
; CHECK-LABEL: sel_merge_64_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> %in
ret <vscale x 2 x i64> %sel
}

define <vscale x 8 x i16> @sel_merge_16_shifted(<vscale x 8 x i1> %p, <vscale x 8 x i16> %in) {
; CHECK-LABEL: sel_merge_16_shifted:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, p0/m, #512 // =0x200
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 512, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
ret <vscale x 8 x i16> %sel
}

define <vscale x 4 x i32> @sel_merge_32_shifted(<vscale x 4 x i1> %p, <vscale x 4 x i32> %in) {
; CHECK-LABEL: sel_merge_32_shifted:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, p0/m, #512 // =0x200
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 512, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
ret <vscale x 4 x i32> %sel
}

define <vscale x 2 x i64> @sel_merge_64_shifted(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
; CHECK-LABEL: sel_merge_64_shifted:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, p0/m, #512 // =0x200
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 512, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
ret <vscale x 2 x i64> %sel
}

; TODO: We could actually use something like "cpy z0.b, p0/m, #-128". But it's
; a little tricky to prove correctness: we're using the predicate with the
; wrong width, so we'd have to prove the bits which would normally be unused
; are actually zero.
define <vscale x 8 x i16> @sel_merge_16_illegal_wrong_extension(<vscale x 8 x i1> %p, <vscale x 8 x i16> %in) {
; CHECK-LABEL: sel_merge_16_illegal_wrong_extension:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #128
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 128, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
ret <vscale x 8 x i16> %sel
}

define <vscale x 4 x i32> @sel_merge_32_illegal_wrong_extension(<vscale x 4 x i1> %p, <vscale x 4 x i32> %in) {
; CHECK-LABEL: sel_merge_32_illegal_wrong_extension:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #128
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: mov z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 128, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
ret <vscale x 4 x i32> %sel
}

define <vscale x 2 x i64> @sel_merge_64_illegal_wrong_extension(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
; CHECK-LABEL: sel_merge_64_illegal_wrong_extension:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #128
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mov z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
ret <vscale x 2 x i64> %sel
}

define <vscale x 8 x i16> @sel_merge_16_illegal_shifted(<vscale x 8 x i1> %p, <vscale x 8 x i16> %in) {
; CHECK-LABEL: sel_merge_16_illegal_shifted:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #513
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 513, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
ret <vscale x 8 x i16> %sel
}

define <vscale x 4 x i32> @sel_merge_32_illegal_shifted(<vscale x 4 x i1> %p, <vscale x 4 x i32> %in) {
; CHECK-LABEL: sel_merge_32_illegal_shifted:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #513
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: mov z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 513, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
ret <vscale x 4 x i32> %sel
}

define <vscale x 2 x i64> @sel_merge_64_illegal_shifted(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
; CHECK-LABEL: sel_merge_64_illegal_shifted:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #513
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mov z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 513, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
ret <vscale x 2 x i64> %sel
}

0 comments on commit a52f10b

Please sign in to comment.