Skip to content

Commit

Permalink
[AArch64][SVE2] Asm: support SVE2 Accumulate Group
Browse files Browse the repository at this point in the history
Summary:
Patch adds support for the following instructions:

SVE2 bitwise shift and insert:
    * SRI, SLI

SVE2 bitwise shift right and accumulate:
    * SSRA, USRA, SRSRA, URSRA

SVE2 complex integer add:
    * CADD, SQCADD

SVE2 integer absolute difference and accumulate:
    * SABA, UABA

SVE2 integer absolute difference and accumulate long:
    * SABALB, SABALT, UABALB, UABALT

SVE2 integer add/subtract long with carry:
    * ADCLB, ADCLT, SBCLB, SBCLT

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D62204

llvm-svn: 361622
  • Loading branch information
c-rhodes committed May 24, 2019
1 parent 8654b8c commit 5f04f00
Show file tree
Hide file tree
Showing 38 changed files with 1,853 additions and 0 deletions.
30 changes: 30 additions & 0 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Expand Up @@ -1198,6 +1198,36 @@ let Predicates = [HasSVE2] in {
defm PMULLB_ZZZ : sve2_pmul_long<0b0, "pmullb">;
defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">;

// SVE2 bitwise shift and insert
defm SRI_ZZI : sve2_int_bin_cons_shift_imm_right<0b0, "sri">;
defm SLI_ZZI : sve2_int_bin_cons_shift_imm_left< 0b1, "sli">;

// SVE2 bitwise shift right and accumulate
defm SSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b00, "ssra">;
defm USRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b01, "usra">;
defm SRSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b10, "srsra">;
defm URSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b11, "ursra">;

// SVE2 complex integer add
defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">;
defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd">;

// SVE2 integer absolute difference and accumulate
defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba">;
defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba">;

// SVE2 integer absolute difference and accumulate long
defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb">;
defm SABALT_ZZZ : sve2_int_absdiff_accum_long<0b01, "sabalt">;
defm UABALB_ZZZ : sve2_int_absdiff_accum_long<0b10, "uabalb">;
defm UABALT_ZZZ : sve2_int_absdiff_accum_long<0b11, "uabalt">;

// SVE2 integer add/subtract long with carry
defm ADCLB_ZZZ : sve2_int_addsub_long_carry<0b00, "adclb">;
defm ADCLT_ZZZ : sve2_int_addsub_long_carry<0b01, "adclt">;
defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">;
defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;

// Predicated shifts
defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
Expand Down
156 changes: 156 additions & 0 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Expand Up @@ -2191,6 +2191,162 @@ multiclass sve2_pmul_long<bits<1> opc, string asm> {
def _D : sve2_wide_int_arith<0b11, {0b1101, opc}, asm, ZPR64, ZPR32, ZPR32>;
}

//===----------------------------------------------------------------------===//
// SVE2 Accumulate Group
//===----------------------------------------------------------------------===//

class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm,
ZPRRegOp zprty, Operand immtype>
: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm),
asm, "\t$Zd, $Zn, $imm",
"", []>, Sched<[]> {
bits<5> Zd;
bits<5> Zn;
bits<6> imm;
let Inst{31-24} = 0b01000101;
let Inst{23-22} = tsz8_64{3-2};
let Inst{21} = 0b0;
let Inst{20-19} = tsz8_64{1-0};
let Inst{18-16} = imm{2-0}; // imm3
let Inst{15-11} = 0b11110;
let Inst{10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
}

multiclass sve2_int_bin_cons_shift_imm_left<bit opc, string asm> {
def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
let Inst{20-19} = imm{4-3};
}
def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
}

multiclass sve2_int_bin_cons_shift_imm_right<bit opc, string asm> {
def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
}

class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
ZPRRegOp zprty, Operand immtype>
: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, immtype:$imm),
asm, "\t$Zda, $Zn, $imm",
"", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
bits<6> imm;
let Inst{31-24} = 0b01000101;
let Inst{23-22} = tsz8_64{3-2};
let Inst{21} = 0b0;
let Inst{20-19} = tsz8_64{1-0};
let Inst{18-16} = imm{2-0}; // imm3
let Inst{15-12} = 0b1110;
let Inst{11-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zda;

let Constraints = "$Zda = $_Zda";
let DestructiveInstType = Destructive;
let ElementSize = ElementSizeNone;
}

multiclass sve2_int_bin_accum_cons_shift_imm_right<bits<2> opc, string asm> {
def _B : sve2_int_bin_accum_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
def _H : sve2_int_bin_accum_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_accum_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
def _D : sve2_int_bin_accum_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
}

class sve2_int_cadd<bits<2> sz, bit opc, string asm, ZPRRegOp zprty>
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, complexrotateopodd:$rot),
asm, "\t$Zdn, $_Zdn, $Zm, $rot", "", []>, Sched<[]> {
bits<5> Zdn;
bits<5> Zm;
bit rot;
let Inst{31-24} = 0b01000101;
let Inst{23-22} = sz;
let Inst{21-17} = 0b00000;
let Inst{16} = opc;
let Inst{15-11} = 0b11011;
let Inst{10} = rot;
let Inst{9-5} = Zm;
let Inst{4-0} = Zdn;

let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = Destructive;
let ElementSize = ElementSizeNone;
}

multiclass sve2_int_cadd<bit opc, string asm> {
def _B : sve2_int_cadd<0b00, opc, asm, ZPR8>;
def _H : sve2_int_cadd<0b01, opc, asm, ZPR16>;
def _S : sve2_int_cadd<0b10, opc, asm, ZPR32>;
def _D : sve2_int_cadd<0b11, opc, asm, ZPR64>;
}

class sve2_int_absdiff_accum<bits<2> sz, bits<4> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2>
: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm),
asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
bits<5> Zm;
let Inst{31-24} = 0b01000101;
let Inst{23-22} = sz;
let Inst{21} = 0b0;
let Inst{20-16} = Zm;
let Inst{15-14} = 0b11;
let Inst{13-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zda;

let Constraints = "$Zda = $_Zda";
let DestructiveInstType = Destructive;
let ElementSize = ElementSizeNone;
}

multiclass sve2_int_absdiff_accum<bit opc, string asm> {
def _B : sve2_int_absdiff_accum<0b00, { 0b111, opc }, asm, ZPR8, ZPR8>;
def _H : sve2_int_absdiff_accum<0b01, { 0b111, opc }, asm, ZPR16, ZPR16>;
def _S : sve2_int_absdiff_accum<0b10, { 0b111, opc }, asm, ZPR32, ZPR32>;
def _D : sve2_int_absdiff_accum<0b11, { 0b111, opc }, asm, ZPR64, ZPR64>;
}

multiclass sve2_int_absdiff_accum_long<bits<2> opc, string asm> {
def _H : sve2_int_absdiff_accum<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
def _S : sve2_int_absdiff_accum<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
def _D : sve2_int_absdiff_accum<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
}

multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm> {
def _S : sve2_int_absdiff_accum<{ opc{1}, 0b0 }, { 0b010, opc{0} }, asm,
ZPR32, ZPR32>;
def _D : sve2_int_absdiff_accum<{ opc{1}, 0b1 }, { 0b010, opc{0} }, asm,
ZPR64, ZPR64>;
}

//===----------------------------------------------------------------------===//
// SVE Integer Arithmetic - Unary Predicated Group
//===----------------------------------------------------------------------===//
Expand Down
25 changes: 25 additions & 0 deletions llvm/test/MC/AArch64/SVE2/adclb-diagnostics.s
@@ -0,0 +1,25 @@
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s


// ------------------------------------------------------------------------- //
// Invalid element width

adclb z0.b, z1.b, z2.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: adclb z0.b, z1.b, z2.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

adclb z0.h, z1.h, z2.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: adclb z0.h, z1.h, z2.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:


// --------------------------------------------------------------------------//
// Negative tests for instructions that are incompatible with movprfx

movprfx z0.d, p0/z, z7.d
adclb z0.d, z1.d, z7.d
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
// CHECK-NEXT: adclb z0.d, z1.d, z7.d
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
36 changes: 36 additions & 0 deletions llvm/test/MC/AArch64/SVE2/adclb.s
@@ -0,0 +1,36 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
// RUN: | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN

adclb z0.s, z1.s, z31.s
// CHECK-INST: adclb z0.s, z1.s, z31.s
// CHECK-ENCODING: [0x20,0xd0,0x1f,0x45]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 d0 1f 45 <unknown>

adclb z0.d, z1.d, z31.d
// CHECK-INST: adclb z0.d, z1.d, z31.d
// CHECK-ENCODING: [0x20,0xd0,0x5f,0x45]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 d0 5f 45 <unknown>


// --------------------------------------------------------------------------//
// Test compatibility with MOVPRFX instruction.

movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 bc 20 04 <unknown>

adclb z0.d, z1.d, z31.d
// CHECK-INST: adclb z0.d, z1.d, z31.d
// CHECK-ENCODING: [0x20,0xd0,0x5f,0x45]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 d0 5f 45 <unknown>
25 changes: 25 additions & 0 deletions llvm/test/MC/AArch64/SVE2/adclt-diagnostics.s
@@ -0,0 +1,25 @@
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s


// ------------------------------------------------------------------------- //
// Invalid element width

adclt z0.b, z1.b, z2.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: adclt z0.b, z1.b, z2.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

adclt z0.h, z1.h, z2.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: adclt z0.h, z1.h, z2.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:


// --------------------------------------------------------------------------//
// Negative tests for instructions that are incompatible with movprfx

movprfx z0.d, p0/z, z7.d
adclt z0.d, z1.d, z7.d
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
// CHECK-NEXT: adclt z0.d, z1.d, z7.d
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
36 changes: 36 additions & 0 deletions llvm/test/MC/AArch64/SVE2/adclt.s
@@ -0,0 +1,36 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
// RUN: | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN

adclt z0.s, z1.s, z31.s
// CHECK-INST: adclt z0.s, z1.s, z31.s
// CHECK-ENCODING: [0x20,0xd4,0x1f,0x45]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 d4 1f 45 <unknown>

adclt z0.d, z1.d, z31.d
// CHECK-INST: adclt z0.d, z1.d, z31.d
// CHECK-ENCODING: [0x20,0xd4,0x5f,0x45]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 d4 5f 45 <unknown>


// --------------------------------------------------------------------------//
// Test compatibility with MOVPRFX instruction.

movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 bc 20 04 <unknown>

adclt z0.d, z1.d, z31.d
// CHECK-INST: adclt z0.d, z1.d, z31.d
// CHECK-ENCODING: [0x20,0xd4,0x5f,0x45]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 d4 5f 45 <unknown>
38 changes: 38 additions & 0 deletions llvm/test/MC/AArch64/SVE2/cadd-diagnostics.s
@@ -0,0 +1,38 @@
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s

// --------------------------------------------------------------------------//
// Source and Destination Registers must match

cadd z0.d, z1.d, z2.d, #90
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
// CHECK-NEXT: cadd z0.d, z1.d, z2.d, #90
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:


// --------------------------------------------------------------------------//
// Invalid rotation

cadd z0.d, z0.d, z1.d, #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 90 or 270.
// CHECK-NEXT: cadd z0.d, z0.d, z1.d, #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

cadd z0.d, z0.d, z1.d, #180
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 90 or 270.
// CHECK-NEXT: cadd z0.d, z0.d, z1.d, #180
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

cadd z0.d, z0.d, z1.d, #450
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 90 or 270.
// CHECK-NEXT: cadd z0.d, z0.d, z1.d, #450
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:


// --------------------------------------------------------------------------//
// Negative tests for instructions that are incompatible with movprfx

movprfx z0.d, p0/z, z7.d
cadd z0.d, z0.d, z31.d, #90
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
// CHECK-NEXT: cadd z0.d, z0.d, z31.d, #90
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

0 comments on commit 5f04f00

Please sign in to comment.