Skip to content

Commit

Permalink
[AArch64][SVE] Add patterns for VSELECT of immediates.
Browse files Browse the repository at this point in the history
This covers forms involving "CPY (immediate, zeroing)".

This doesn't handle the case where the operands are reversed, and the
condition is freely invertible.  Not sure how to handle that.  Maybe a
DAGCombine.

Differential Revision: https://reviews.llvm.org/D79598
  • Loading branch information
efriedma-quic committed May 12, 2020
1 parent 5633813 commit a8874c7
Show file tree
Hide file tree
Showing 3 changed files with 221 additions and 35 deletions.
2 changes: 0 additions & 2 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Expand Up @@ -10,8 +10,6 @@
//
//===----------------------------------------------------------------------===//

def SVE8BitLslImm : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;

// Contiguous loads - node definitions
//
def SDT_AArch64_LD1 : SDTypeProfile<1, 3, [
Expand Down
62 changes: 29 additions & 33 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Expand Up @@ -167,40 +167,24 @@ def SVEAddSubImmOperand32 : SVEShiftedImmOperand<32, "AddSub", "isSVEAddSubImm<i
def SVEAddSubImmOperand64 : SVEShiftedImmOperand<64, "AddSub", "isSVEAddSubImm<int64_t>">;

class imm8_opt_lsl<int ElementWidth, string printType,
AsmOperandClass OpndClass, code Predicate>
: Operand<i32>, ImmLeaf<i32, Predicate> {
AsmOperandClass OpndClass>
: Operand<i32> {
let EncoderMethod = "getImm8OptLsl";
let DecoderMethod = "DecodeImm8OptLsl<" # ElementWidth # ">";
let PrintMethod = "printImm8OptLsl<" # printType # ">";
let ParserMatchClass = OpndClass;
let MIOperandInfo = (ops i32imm, i32imm);
}

def cpy_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "int8_t", SVECpyImmOperand8, [{
return AArch64_AM::isSVECpyImm<int8_t>(Imm);
}]>;
def cpy_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "int16_t", SVECpyImmOperand16, [{
return AArch64_AM::isSVECpyImm<int16_t>(Imm);
}]>;
def cpy_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "int32_t", SVECpyImmOperand32, [{
return AArch64_AM::isSVECpyImm<int32_t>(Imm);
}]>;
def cpy_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "int64_t", SVECpyImmOperand64, [{
return AArch64_AM::isSVECpyImm<int64_t>(Imm);
}]>;

def addsub_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "uint8_t", SVEAddSubImmOperand8, [{
return AArch64_AM::isSVEAddSubImm<int8_t>(Imm);
}]>;
def addsub_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "uint16_t", SVEAddSubImmOperand16, [{
return AArch64_AM::isSVEAddSubImm<int16_t>(Imm);
}]>;
def addsub_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "uint32_t", SVEAddSubImmOperand32, [{
return AArch64_AM::isSVEAddSubImm<int32_t>(Imm);
}]>;
def addsub_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "uint64_t", SVEAddSubImmOperand64, [{
return AArch64_AM::isSVEAddSubImm<int64_t>(Imm);
}]>;
def cpy_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "int8_t", SVECpyImmOperand8>;
def cpy_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "int16_t", SVECpyImmOperand16>;
def cpy_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "int32_t", SVECpyImmOperand32>;
def cpy_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "int64_t", SVECpyImmOperand64>;

def addsub_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "uint8_t", SVEAddSubImmOperand8>;
def addsub_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "uint16_t", SVEAddSubImmOperand16>;
def addsub_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "uint32_t", SVEAddSubImmOperand32>;
def addsub_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "uint64_t", SVEAddSubImmOperand64>;

def SVEAddSubImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i8>", []>;
def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16>", []>;
Expand All @@ -212,6 +196,8 @@ def SVELogicalImm16Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i16>",
def SVELogicalImm32Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i32>", []>;
def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>", []>;

def SVE8BitLslImm : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;

def SVEArithUImmPat : ComplexPattern<i32, 1, "SelectSVEArithImm", []>;
def SVEArithSImmPat : ComplexPattern<i32, 1, "SelectSVESignedArithImm", []>;

Expand Down Expand Up @@ -4086,8 +4072,9 @@ multiclass sve_int_dup_imm_pred_merge<string asm> {

multiclass sve_int_dup_imm_pred_zero_inst<
bits<2> sz8_64, string asm, ZPRRegOp zprty, ValueType intty,
ValueType predty, imm8_opt_lsl cpyimm> {
def NAME : sve_int_dup_imm_pred<sz8_64, 0, asm, zprty, "/z", (ins PPRAny:$Pg, cpyimm:$imm)>;
ValueType predty, ValueType scalarty, imm8_opt_lsl cpyimm> {
def NAME : sve_int_dup_imm_pred<sz8_64, 0, asm, zprty, "/z",
(ins PPRAny:$Pg, cpyimm:$imm)>;
def : InstAlias<"mov $Zd, $Pg/z, $imm",
(!cast<Instruction>(NAME) zprty:$Zd, PPRAny:$Pg, cpyimm:$imm), 1>;
def : Pat<(intty (zext (predty PPRAny:$Ps1))),
Expand All @@ -4096,13 +4083,22 @@ multiclass sve_int_dup_imm_pred_zero_inst<
(!cast<Instruction>(NAME) PPRAny:$Ps1, -1, 0)>;
def : Pat<(intty (anyext (predty PPRAny:$Ps1))),
(!cast<Instruction>(NAME) PPRAny:$Ps1, 1, 0)>;
def : Pat<(intty
(vselect predty:$Pg,
(intty (AArch64dup (scalarty (SVE8BitLslImm i32:$imm, i32:$shift)))),
(intty (AArch64dup (scalarty 0))))),
(!cast<Instruction>(NAME) $Pg, i32:$imm, i32:$shift)>;
}

multiclass sve_int_dup_imm_pred_zero<string asm> {
defm _B : sve_int_dup_imm_pred_zero_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1, cpy_imm8_opt_lsl_i8>;
defm _H : sve_int_dup_imm_pred_zero_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1, cpy_imm8_opt_lsl_i16>;
defm _S : sve_int_dup_imm_pred_zero_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1, cpy_imm8_opt_lsl_i32>;
defm _D : sve_int_dup_imm_pred_zero_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1, cpy_imm8_opt_lsl_i64>;
defm _B : sve_int_dup_imm_pred_zero_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1,
i32, cpy_imm8_opt_lsl_i8>;
defm _H : sve_int_dup_imm_pred_zero_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1,
i32, cpy_imm8_opt_lsl_i16>;
defm _S : sve_int_dup_imm_pred_zero_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1,
i32, cpy_imm8_opt_lsl_i32>;
defm _D : sve_int_dup_imm_pred_zero_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1,
i64, cpy_imm8_opt_lsl_i64>;
}

//===----------------------------------------------------------------------===//
Expand Down
192 changes: 192 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
@@ -0,0 +1,192 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s

define <vscale x 16 x i8> @sel_8_positive(<vscale x 16 x i1> %p) {
; CHECK-LABEL: sel_8_positive:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, p0/z, #3 // =0x3
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 3, i32 0), <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i32> zeroinitializer
%sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %vec, <vscale x 16 x i8> zeroinitializer
ret <vscale x 16 x i8> %sel
}

define <vscale x 8 x i16> @sel_16_positive(<vscale x 8 x i1> %p) {
; CHECK-LABEL: sel_16_positive:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, p0/z, #3 // =0x3
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 3, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
ret <vscale x 8 x i16> %sel
}

define <vscale x 4 x i32> @sel_32_positive(<vscale x 4 x i1> %p) {
; CHECK-LABEL: sel_32_positive:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, p0/z, #3 // =0x3
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 3, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
ret <vscale x 4 x i32> %sel
}

define <vscale x 2 x i64> @sel_64_positive(<vscale x 2 x i1> %p) {
; CHECK-LABEL: sel_64_positive:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, p0/z, #3 // =0x3
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 3, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
ret <vscale x 2 x i64> %sel
}

define <vscale x 16 x i8> @sel_8_negative(<vscale x 16 x i1> %p) {
; CHECK-LABEL: sel_8_negative:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, p0/z, #-128 // =0xffffffffffffff80
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 -128, i32 0), <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i32> zeroinitializer
%sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %vec, <vscale x 16 x i8> zeroinitializer
ret <vscale x 16 x i8> %sel
}

define <vscale x 8 x i16> @sel_16_negative(<vscale x 8 x i1> %p) {
; CHECK-LABEL: sel_16_negative:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, p0/z, #-128 // =0xffffffffffffff80
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 -128, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
ret <vscale x 8 x i16> %sel
}

define <vscale x 4 x i32> @sel_32_negative(<vscale x 4 x i1> %p) {
; CHECK-LABEL: sel_32_negative:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, p0/z, #-128 // =0xffffffffffffff80
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 -128, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
ret <vscale x 4 x i32> %sel
}

define <vscale x 2 x i64> @sel_64_negative(<vscale x 2 x i1> %p) {
; CHECK-LABEL: sel_64_negative:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, p0/z, #-128 // =0xffffffffffffff80
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 -128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
ret <vscale x 2 x i64> %sel
}

define <vscale x 8 x i16> @sel_16_shifted(<vscale x 8 x i1> %p) {
; CHECK-LABEL: sel_16_shifted:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, p0/z, #512 // =0x200
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 512, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
ret <vscale x 8 x i16> %sel
}

define <vscale x 4 x i32> @sel_32_shifted(<vscale x 4 x i1> %p) {
; CHECK-LABEL: sel_32_shifted:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, p0/z, #512 // =0x200
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 512, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
ret <vscale x 4 x i32> %sel
}

define <vscale x 2 x i64> @sel_64_shifted(<vscale x 2 x i1> %p) {
; CHECK-LABEL: sel_64_shifted:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, p0/z, #512 // =0x200
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 512, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
ret <vscale x 2 x i64> %sel
}

; TODO: We could actually use something like "sel z0.b, p0/z, #-128" if the
; odd bits of the predicate are zero.
define <vscale x 8 x i16> @sel_16_illegal_wrong_extension(<vscale x 8 x i1> %p) {
; CHECK-LABEL: sel_16_illegal_wrong_extension:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #128
; CHECK-NEXT: mov z0.h, w8
; CHECK-NEXT: mov z1.h, #0 // =0x0
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 128, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
ret <vscale x 8 x i16> %sel
}

define <vscale x 4 x i32> @sel_32_illegal_wrong_extension(<vscale x 4 x i1> %p) {
; CHECK-LABEL: sel_32_illegal_wrong_extension:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #128
; CHECK-NEXT: mov z0.s, w8
; CHECK-NEXT: mov z1.s, #0 // =0x0
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 128, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
ret <vscale x 4 x i32> %sel
}

define <vscale x 2 x i64> @sel_64_illegal_wrong_extension(<vscale x 2 x i1> %p) {
; CHECK-LABEL: sel_64_illegal_wrong_extension:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #128
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
ret <vscale x 2 x i64> %sel
}

define <vscale x 8 x i16> @sel_16_illegal_shifted(<vscale x 8 x i1> %p) {
; CHECK-LABEL: sel_16_illegal_shifted:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #513
; CHECK-NEXT: mov z0.h, w8
; CHECK-NEXT: mov z1.h, #0 // =0x0
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 513, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
ret <vscale x 8 x i16> %sel
}

define <vscale x 4 x i32> @sel_32_illegal_shifted(<vscale x 4 x i1> %p) {
; CHECK-LABEL: sel_32_illegal_shifted:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #513
; CHECK-NEXT: mov z0.s, w8
; CHECK-NEXT: mov z1.s, #0 // =0x0
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 513, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
ret <vscale x 4 x i32> %sel
}

define <vscale x 2 x i64> @sel_64_illegal_shifted(<vscale x 2 x i1> %p) {
; CHECK-LABEL: sel_64_illegal_shifted:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #513
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 513, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
ret <vscale x 2 x i64> %sel
}

0 comments on commit a8874c7

Please sign in to comment.