Skip to content

Commit

Permalink
[AArch64][SVE2] Asm: implement CMLA/SQRDCMLAH instructions
Browse files Browse the repository at this point in the history
Summary:
This patch adds support for the indexed and unpredicated vectors forms
of the CMLA and SQRDCMLAH instructions.

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D61906

llvm-svn: 360871
  • Loading branch information
c-rhodes committed May 16, 2019
1 parent 07eba98 commit 472c6ef
Show file tree
Hide file tree
Showing 6 changed files with 557 additions and 0 deletions.
9 changes: 9 additions & 0 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1057,4 +1057,13 @@ let Predicates = [HasSVE2] in {

// SVE2 complex integer dot product
defm CDOT_ZZZ : sve2_cintx_dot<"cdot">;

// SVE2 complex integer multiply-add (indexed)
defm CMLA_ZZZI : sve2_cmla_by_indexed_elem<0b0, "cmla">;
// SVE2 complex saturating multiply-add (indexed)
defm SQRDCMLAH_ZZZI : sve2_cmla_by_indexed_elem<0b1, "sqrdcmlah">;

// SVE2 complex integer multiply-add
defm CMLA_ZZZ : sve2_int_cmla<0b0, "cmla">;
defm SQRDCMLAH_ZZZ : sve2_int_cmla<0b1, "sqrdcmlah">;
}
30 changes: 30 additions & 0 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -1869,6 +1869,17 @@ multiclass sve2_cintx_dot<string asm> {
def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>;
}

//===----------------------------------------------------------------------===//
// SVE2 Complex Multiply-Add Group
//===----------------------------------------------------------------------===//

multiclass sve2_int_cmla<bit opc, string asm> {
def _B : sve2_complex_int_arith<0b00, { 0b001, opc }, asm, ZPR8, ZPR8>;
def _H : sve2_complex_int_arith<0b01, { 0b001, opc }, asm, ZPR16, ZPR16>;
def _S : sve2_complex_int_arith<0b10, { 0b001, opc }, asm, ZPR32, ZPR32>;
def _D : sve2_complex_int_arith<0b11, { 0b001, opc }, asm, ZPR64, ZPR64>;
}

//===----------------------------------------------------------------------===//
// SVE2 Complex Integer Dot Product - Indexed Group
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -1910,6 +1921,25 @@ multiclass sve2_cintx_dot_by_indexed_elem<string asm> {
}
}

//===----------------------------------------------------------------------===//
// SVE2 Complex Multiply-Add - Indexed Group
//===----------------------------------------------------------------------===//

multiclass sve2_cmla_by_indexed_elem<bit opc, string asm> {
def _H : sve2_complex_int_arith_indexed<0b10, { 0b011, opc }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexS> {
bits<2> iop;
bits<3> Zm;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
def _S : sve2_complex_int_arith_indexed<0b11, { 0b011, opc }, asm, ZPR32, ZPR32, ZPR4b32, VectorIndexD> {
bit iop;
bits<4> Zm;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
}

//===----------------------------------------------------------------------===//
// SVE2 Integer Multiply - Unpredicated Group
//===----------------------------------------------------------------------===//
Expand Down
103 changes: 103 additions & 0 deletions llvm/test/MC/AArch64/SVE2/cmla-diagnostics.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s


// ------------------------------------------------------------------------- //
// Invalid element size

cmla z0.h, z1.b, z2.b[0], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: cmla z0.h, z1.b, z2.b[0], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

cmla z0.h, z1.s, z2.s[0], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: cmla z0.h, z1.s, z2.s[0], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

cmla z0.h, z1.d, z2.d[0], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: cmla z0.h, z1.d, z2.d[0], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

cmla z0.s, z1.b, z2.b[0], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: cmla z0.s, z1.b, z2.b[0], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

cmla z0.s, z1.h, z2.h[0], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: cmla z0.s, z1.h, z2.h[0], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

cmla z0.s, z1.d, z2.d[0], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: cmla z0.s, z1.d, z2.d[0], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:


// ------------------------------------------------------------------------- //
// Invalid restricted register for indexed vector.

cmla z0.h, z1.h, z8.h[3], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: cmla z0.h, z1.h, z8.h[3], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

cmla z0.s, z1.s, z16.s[1], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: cmla z0.s, z1.s, z16.s[1], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:


// ------------------------------------------------------------------------- //
// Invalid element index

cmla z0.h, z1.h, z7.h[-1], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
// CHECK-NEXT: cmla z0.h, z1.h, z7.h[-1], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

cmla z0.h, z1.h, z7.h[4], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
// CHECK-NEXT: cmla z0.h, z1.h, z7.h[4], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

cmla z0.s, z1.s, z15.s[-1], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
// CHECK-NEXT: cmla z0.s, z1.s, z15.s[-1], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

cmla z0.s, z1.s, z15.s[2], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
// CHECK-NEXT: cmla z0.s, z1.s, z15.s[2], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:


// --------------------------------------------------------------------------//
// Invalid rotation

cmla z0.h, z1.h, z2.h[0], #360
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 0, 90, 180 or 270.
// CHECK-NEXT: cmla z0.h, z1.h, z2.h[0], #360
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

cmla z0.s, z1.s, z2.s[0], #450
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 0, 90, 180 or 270.
// CHECK-NEXT: cmla z0.s, z1.s, z2.s[0], #450
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:


// --------------------------------------------------------------------------//
// Negative tests for instructions that are incompatible with movprfx

movprfx z0.b, p0/z, z7.b
cmla z0.b, z1.b, z31.b, #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
// CHECK-NEXT: cmla z0.b, z1.b, z31.b, #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

movprfx z0.s, p0/z, z7.s
cmla z0.s, z1.s, z15.s[1], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
// CHECK-NEXT: cmla z0.s, z1.s, z15.s[1], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
156 changes: 156 additions & 0 deletions llvm/test/MC/AArch64/SVE2/cmla.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
// RUN: | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN

cmla z0.b, z1.b, z2.b, #0
// CHECK-INST: cmla z0.b, z1.b, z2.b, #0
// CHECK-ENCODING: [0x20,0x20,0x02,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 20 02 44 <unknown>

cmla z0.h, z1.h, z2.h, #0
// CHECK-INST: cmla z0.h, z1.h, z2.h, #0
// CHECK-ENCODING: [0x20,0x20,0x42,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 20 42 44 <unknown>

cmla z0.s, z1.s, z2.s, #0
// CHECK-INST: cmla z0.s, z1.s, z2.s, #0
// CHECK-ENCODING: [0x20,0x20,0x82,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 20 82 44 <unknown>

cmla z0.d, z1.d, z2.d, #0
// CHECK-INST: cmla z0.d, z1.d, z2.d, #0
// CHECK-ENCODING: [0x20,0x20,0xc2,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 20 c2 44 <unknown>

cmla z29.b, z30.b, z31.b, #90
// CHECK-INST: cmla z29.b, z30.b, z31.b, #90
// CHECK-ENCODING: [0xdd,0x27,0x1f,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: dd 27 1f 44 <unknown>

cmla z29.h, z30.h, z31.h, #90
// CHECK-INST: cmla z29.h, z30.h, z31.h, #90
// CHECK-ENCODING: [0xdd,0x27,0x5f,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: dd 27 5f 44 <unknown>

cmla z29.s, z30.s, z31.s, #90
// CHECK-INST: cmla z29.s, z30.s, z31.s, #90
// CHECK-ENCODING: [0xdd,0x27,0x9f,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: dd 27 9f 44 <unknown>

cmla z29.d, z30.d, z31.d, #90
// CHECK-INST: cmla z29.d, z30.d, z31.d, #90
// CHECK-ENCODING: [0xdd,0x27,0xdf,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: dd 27 df 44 <unknown>

cmla z31.b, z31.b, z31.b, #180
// CHECK-INST: cmla z31.b, z31.b, z31.b, #180
// CHECK-ENCODING: [0xff,0x2b,0x1f,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: ff 2b 1f 44 <unknown>

cmla z31.h, z31.h, z31.h, #180
// CHECK-INST: cmla z31.h, z31.h, z31.h, #180
// CHECK-ENCODING: [0xff,0x2b,0x5f,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: ff 2b 5f 44 <unknown>

cmla z31.s, z31.s, z31.s, #180
// CHECK-INST: cmla z31.s, z31.s, z31.s, #180
// CHECK-ENCODING: [0xff,0x2b,0x9f,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: ff 2b 9f 44 <unknown>

cmla z31.d, z31.d, z31.d, #180
// CHECK-INST: cmla z31.d, z31.d, z31.d, #180
// CHECK-ENCODING: [0xff,0x2b,0xdf,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: ff 2b df 44 <unknown>

cmla z15.b, z16.b, z17.b, #270
// CHECK-INST: cmla z15.b, z16.b, z17.b, #270
// CHECK-ENCODING: [0x0f,0x2e,0x11,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 0f 2e 11 44 <unknown>

cmla z15.h, z16.h, z17.h, #270
// CHECK-INST: cmla z15.h, z16.h, z17.h, #270
// CHECK-ENCODING: [0x0f,0x2e,0x51,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 0f 2e 51 44 <unknown>

cmla z15.s, z16.s, z17.s, #270
// CHECK-INST: cmla z15.s, z16.s, z17.s, #270
// CHECK-ENCODING: [0x0f,0x2e,0x91,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 0f 2e 91 44 <unknown>

cmla z15.d, z16.d, z17.d, #270
// CHECK-INST: cmla z15.d, z16.d, z17.d, #270
// CHECK-ENCODING: [0x0f,0x2e,0xd1,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 0f 2e d1 44 <unknown>

cmla z0.h, z1.h, z2.h[0], #0
// CHECK-INST: cmla z0.h, z1.h, z2.h[0], #0
// CHECK-ENCODING: [0x20,0x60,0xa2,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 60 a2 44 <unknown>

cmla z0.s, z1.s, z2.s[0], #0
// CHECK-INST: cmla z0.s, z1.s, z2.s[0], #0
// CHECK-ENCODING: [0x20,0x60,0xe2,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 60 e2 44 <unknown>

cmla z31.h, z30.h, z7.h[0], #180
// CHECK-INST: cmla z31.h, z30.h, z7.h[0], #180
// CHECK-ENCODING: [0xdf,0x6b,0xa7,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: df 6b a7 44 <unknown>

cmla z31.s, z30.s, z7.s[0], #180
// CHECK-INST: cmla z31.s, z30.s, z7.s[0], #180
// CHECK-ENCODING: [0xdf,0x6b,0xe7,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: df 6b e7 44 <unknown>


// --------------------------------------------------------------------------//
// Test compatibility with MOVPRFX instruction.

movprfx z4, z6
// CHECK-INST: movprfx z4, z6
// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: c4 bc 20 04 <unknown>

cmla z4.d, z31.d, z31.d, #270
// CHECK-INST: cmla z4.d, z31.d, z31.d, #270
// CHECK-ENCODING: [0xe4,0x2f,0xdf,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: e4 2f df 44 <unknown>

movprfx z21, z28
// CHECK-INST: movprfx z21, z28
// CHECK-ENCODING: [0x95,0xbf,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 95 bf 20 04 <unknown>

cmla z21.s, z10.s, z5.s[1], #90
// CHECK-INST: cmla z21.s, z10.s, z5.s[1], #90
// CHECK-ENCODING: [0x55,0x65,0xf5,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 55 65 f5 44 <unknown>
Loading

0 comments on commit 472c6ef

Please sign in to comment.