Skip to content

Commit

Permalink
[AArch64]SME2 Outer Product and Accumulate instructions
Browse files Browse the repository at this point in the history
This patch adds the assembly/disassembly for the following instructions:
  BMOPA: Bitwise exclusive NOR population count outer product and accumulate.
  BMOPS: Bitwise exclusive NOR population count outer product and subtract.

  SMOPA (2-way): Signed integer sum of outer products and accumulate.
  SMOPS (2-way): Signed integer sum of outer products and subtract.

  UMOPA (2-way): Unsigned integer sum of outer products and accumulate.
  UMOPS (2-way): Signed integer sum of outer products and accumulate.
The reference can be found here:

https://developer.arm.com/documentation/ddi0602/2022-09

Differential Revision: https://reviews.llvm.org/D136077
  • Loading branch information
CarolineConcatto committed Nov 1, 2022
1 parent cfeab50 commit 7fd2afa
Show file tree
Hide file tree
Showing 18 changed files with 778 additions and 26 deletions.
18 changes: 13 additions & 5 deletions llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
Expand Up @@ -53,8 +53,8 @@ let Predicates = [HasSME] in {
// Outer products
//===----------------------------------------------------------------------===//

defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b0, "bfmopa", int_aarch64_sme_mopa_wide>;
defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b1, "bfmops", int_aarch64_sme_mops_wide>;
defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b000, "bfmopa", int_aarch64_sme_mopa_wide>;
defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b001, "bfmops", int_aarch64_sme_mops_wide>;

defm FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa", int_aarch64_sme_mopa>;
defm FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops", int_aarch64_sme_mops>;
Expand All @@ -66,8 +66,8 @@ defm FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops", int_aarch64_sme_mops>;
}

let Predicates = [HasSME] in {
defm FMOPAL_MPPZZ : sme_f16_outer_product<0b0, "fmopa", int_aarch64_sme_mopa_wide>;
defm FMOPSL_MPPZZ : sme_f16_outer_product<0b1, "fmops", int_aarch64_sme_mops_wide>;
defm FMOPAL_MPPZZ : sme_f16_outer_product<0b010, "fmopa", int_aarch64_sme_mopa_wide>;
defm FMOPSL_MPPZZ : sme_f16_outer_product<0b011, "fmops", int_aarch64_sme_mops_wide>;

defm SMOPA_MPPZZ_S : sme_int_outer_product_i32<0b000, "smopa", int_aarch64_sme_smopa_wide>;
defm SMOPS_MPPZZ_S : sme_int_outer_product_i32<0b001, "smops", int_aarch64_sme_smops_wide>;
Expand Down Expand Up @@ -550,8 +550,16 @@ defm UMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"umlsll", 0b00110, Ma
defm UMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"umlsll", 0b01110, MatrixOp32, ZZZZ_b, ZPR4b8>;
defm UMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlsll", 0b0110, MatrixOp32, ZZ_b_mul_r>;
defm UMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlsll", 0b0110, MatrixOp32, ZZZZ_b_mul_r>;
}

defm BMOPA_MPPZZ_S : sme2_bfp_mopx_tile<"bmopa", 0b100>;
defm BMOPS_MPPZZ_S : sme2_bfp_mopx_tile<"bmops", 0b101>;

defm SMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"smopa", 0b000>;
defm SMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"smops", 0b001>;

defm UMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"umopa", 0b100>;
defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101>;
}

let Predicates = [HasSME2, HasSMEI16I64] in {
defm ADD_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg24_single<"add", 0b1011010, MatrixOp64, ZZ_d, ZPR4b64>;
Expand Down
52 changes: 35 additions & 17 deletions llvm/lib/Target/AArch64/SMEInstrFormats.td
Expand Up @@ -87,7 +87,7 @@ multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op>
(!cast<Instruction>(NAME # _PSEUDO) timm32_0_7:$tile, $pn, $pm, $zn, $zm)>;
}

class sme_int_outer_product_inst<bit u0, bit u1, bit S, bit sz,
class sme_int_outer_product_inst<bits<3> opc, bit sz, bit sme2,
MatrixTileOperand za_ty, ZPRRegOp zpr_ty,
string mnemonic>
: I<(outs za_ty:$ZAda),
Expand All @@ -100,23 +100,23 @@ class sme_int_outer_product_inst<bit u0, bit u1, bit S, bit sz,
bits<3> Pn;
bits<5> Zn;
let Inst{31-25} = 0b1010000;
let Inst{24} = u0;
let Inst{24} = opc{2}; // u0
let Inst{23} = 0b1;
let Inst{22} = sz;
let Inst{21} = u1;
let Inst{21} = opc{1}; // u1
let Inst{20-16} = Zm;
let Inst{15-13} = Pm;
let Inst{12-10} = Pn;
let Inst{9-5} = Zn;
let Inst{4} = S;
let Inst{3} = 0b0;
let Inst{4} = opc{0}; //S;
let Inst{3} = sme2;

let Constraints = "$ZAda = $_ZAda";
}

multiclass sme_int_outer_product_i32<bits<3> opc, string mnemonic,
SDPatternOperator op> {
def NAME : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b0, TileOp32,
def NAME : sme_int_outer_product_inst<opc, 0b0, 0b0, TileOp32,
ZPR8, mnemonic> {
bits<2> ZAda;
let Inst{1-0} = ZAda;
Expand All @@ -132,7 +132,7 @@ multiclass sme_int_outer_product_i32<bits<3> opc, string mnemonic,

multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic,
SDPatternOperator op> {
def NAME : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b1, TileOp64,
def NAME : sme_int_outer_product_inst<opc, 0b1, 0b0, TileOp64,
ZPR16, mnemonic> {
bits<3> ZAda;
let Inst{2-0} = ZAda;
Expand All @@ -145,9 +145,9 @@ multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic,
(!cast<Instruction>(NAME # _PSEUDO) timm32_0_7:$tile, $pn, $pm, $zn, $zm)>;
}

class sme_outer_product_widening_inst<bit op, bit S, string mnemonic>
class sme_outer_product_widening_inst<bits<3> opc, ZPRRegOp zpr_ty, string mnemonic>
: I<(outs TileOp32:$ZAda),
(ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm),
(ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
"", []>,
Sched<[]> {
Expand All @@ -156,21 +156,24 @@ class sme_outer_product_widening_inst<bit op, bit S, string mnemonic>
bits<3> Pn;
bits<5> Zn;
bits<2> ZAda;
let Inst{31-22} = 0b1000000110;
let Inst{21} = op;
let Inst{31-25} = 0b1000000;
let Inst{24} = !if(opc{2}, 0, 1);
let Inst{23-22} = 0b10;
let Inst{21} = opc{1};
let Inst{20-16} = Zm;
let Inst{15-13} = Pm;
let Inst{12-10} = Pn;
let Inst{9-5} = Zn;
let Inst{4} = S;
let Inst{3-2} = 0b00;
let Inst{4} = opc{0};
let Inst{3} = opc{2};
let Inst{2} = 0b0;
let Inst{1-0} = ZAda;

let Constraints = "$ZAda = $_ZAda";
}

multiclass sme_bf16_outer_product<bit S, string mnemonic, SDPatternOperator op> {
def NAME : sme_outer_product_widening_inst<0b0, S, mnemonic>;
multiclass sme_bf16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> {
def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>;

def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>;

Expand All @@ -179,8 +182,8 @@ multiclass sme_bf16_outer_product<bit S, string mnemonic, SDPatternOperator op>
(!cast<Instruction>(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>;
}

multiclass sme_f16_outer_product<bit S, string mnemonic, SDPatternOperator op> {
def NAME : sme_outer_product_widening_inst<0b1, S, mnemonic>;
multiclass sme_f16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> {
def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>;

def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>;

Expand Down Expand Up @@ -2361,3 +2364,18 @@ multiclass sme2_mla_ll_array_vg4_multi<string mnemonic, bits<4> op,
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
(!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>;
}

//===----------------------------------------------------------------------===//
// SME2 Outer Product and Accumulate

multiclass sme2_int_mopx_tile<string mnemonic, bits<3> op> {
def NAME : sme_int_outer_product_inst<op, 0b0, 0b1, TileOp32, ZPR16, mnemonic> {
bits<2> ZAda;
let Inst{1-0} = ZAda;
let Inst{2} = 0b0;
}
}

multiclass sme2_bfp_mopx_tile<string mnemonic, bits<3> op> {
def NAME : sme_outer_product_widening_inst<op, ZPR32, mnemonic>;
}
2 changes: 1 addition & 1 deletion llvm/test/MC/AArch64/SME/smopa-diagnostics.s
Expand Up @@ -67,7 +67,7 @@ smopa za0.d, p0/m, p0/z, z0.h, z0.h
// expected: .s => .b, .d => .h

smopa za0.s, p0/m, p0/m, z0.h, z0.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: smopa za0.s, p0/m, p0/m, z0.h, z0.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/AArch64/SME/smops-diagnostics.s
Expand Up @@ -67,7 +67,7 @@ smops za0.d, p0/m, p0/z, z0.h, z0.h
// expected: .s => .b, .d => .h

smops za0.s, p0/m, p0/m, z0.h, z0.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: smops za0.s, p0/m, p0/m, z0.h, z0.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/AArch64/SME/umopa-diagnostics.s
Expand Up @@ -67,7 +67,7 @@ umopa za0.d, p0/m, p0/z, z0.h, z0.h
// expected: .s => .b, .d => .h

umopa za0.s, p0/m, p0/m, z0.h, z0.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: umopa za0.s, p0/m, p0/m, z0.h, z0.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/AArch64/SME/umops-diagnostics.s
Expand Up @@ -67,7 +67,7 @@ umops za0.d, p0/m, p0/z, z0.h, z0.h
// expected: .s => .b, .d => .h

umops za0.s, p0/m, p0/m, z0.h, z0.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: umops za0.s, p0/m, p0/m, z0.h, z0.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

Expand Down
35 changes: 35 additions & 0 deletions llvm/test/MC/AArch64/SME2/bmopa-diagnostics.s
@@ -0,0 +1,35 @@
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s

// --------------------------------------------------------------------------//
// Invalid tile

bmopa za8.s, p0/m, p0/m, z0.s, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: bmopa za8.s, p0/m, p0/m, z0.s, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

// --------------------------------------------------------------------------//
// Invalid predicate

bmopa za0.s, p0/z, p0/m, z0.s, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: bmopa za0.s, p0/z, p0/m, z0.s, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

bmopa za0.s, p15/m, p0/m, z0.s, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
// CHECK-NEXT: bmopa za0.s, p15/m, p0/m, z0.s, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

// --------------------------------------------------------------------------//
// Invalid suffixes

bmopa za0.d, p0/z, p0/m, z0.d, z0.d
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
// CHECK-NEXT: bmopa za0.d, p0/z, p0/m, z0.d, z0.d
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

bmopa za0.s, p0/m, p0/m, z0.s, z0.d
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: za0.s, p0/m, p0/m, z0.s, z0.d
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
86 changes: 86 additions & 0 deletions llvm/test/MC/AArch64/SME2/bmopa.s
@@ -0,0 +1,86 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \
// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \
// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST


bmopa za0.s, p0/m, p0/m, z0.s, z0.s // 10000000-10000000-00000000-00001000
// CHECK-INST: bmopa za0.s, p0/m, p0/m, z0.s, z0.s
// CHECK-ENCODING: [0x08,0x00,0x80,0x80]
// CHECK-ERROR: instruction requires: sme2
// CHECK-UNKNOWN: 80800008 <unknown>

bmopa za1.s, p5/m, p2/m, z10.s, z21.s // 10000000-10010101-01010101-01001001
// CHECK-INST: bmopa za1.s, p5/m, p2/m, z10.s, z21.s
// CHECK-ENCODING: [0x49,0x55,0x95,0x80]
// CHECK-ERROR: instruction requires: sme2
// CHECK-UNKNOWN: 80955549 <unknown>

bmopa za3.s, p3/m, p7/m, z13.s, z8.s // 10000000-10001000-11101101-10101011
// CHECK-INST: bmopa za3.s, p3/m, p7/m, z13.s, z8.s
// CHECK-ENCODING: [0xab,0xed,0x88,0x80]
// CHECK-ERROR: instruction requires: sme2
// CHECK-UNKNOWN: 8088edab <unknown>

bmopa za3.s, p7/m, p7/m, z31.s, z31.s // 10000000-10011111-11111111-11101011
// CHECK-INST: bmopa za3.s, p7/m, p7/m, z31.s, z31.s
// CHECK-ENCODING: [0xeb,0xff,0x9f,0x80]
// CHECK-ERROR: instruction requires: sme2
// CHECK-UNKNOWN: 809fffeb <unknown>

bmopa za1.s, p3/m, p0/m, z17.s, z16.s // 10000000-10010000-00001110-00101001
// CHECK-INST: bmopa za1.s, p3/m, p0/m, z17.s, z16.s
// CHECK-ENCODING: [0x29,0x0e,0x90,0x80]
// CHECK-ERROR: instruction requires: sme2
// CHECK-UNKNOWN: 80900e29 <unknown>

bmopa za1.s, p1/m, p4/m, z1.s, z30.s // 10000000-10011110-10000100-00101001
// CHECK-INST: bmopa za1.s, p1/m, p4/m, z1.s, z30.s
// CHECK-ENCODING: [0x29,0x84,0x9e,0x80]
// CHECK-ERROR: instruction requires: sme2
// CHECK-UNKNOWN: 809e8429 <unknown>

bmopa za0.s, p5/m, p2/m, z19.s, z20.s // 10000000-10010100-01010110-01101000
// CHECK-INST: bmopa za0.s, p5/m, p2/m, z19.s, z20.s
// CHECK-ENCODING: [0x68,0x56,0x94,0x80]
// CHECK-ERROR: instruction requires: sme2
// CHECK-UNKNOWN: 80945668 <unknown>

bmopa za0.s, p6/m, p0/m, z12.s, z2.s // 10000000-10000010-00011001-10001000
// CHECK-INST: bmopa za0.s, p6/m, p0/m, z12.s, z2.s
// CHECK-ENCODING: [0x88,0x19,0x82,0x80]
// CHECK-ERROR: instruction requires: sme2
// CHECK-UNKNOWN: 80821988 <unknown>

bmopa za1.s, p2/m, p6/m, z1.s, z26.s // 10000000-10011010-11001000-00101001
// CHECK-INST: bmopa za1.s, p2/m, p6/m, z1.s, z26.s
// CHECK-ENCODING: [0x29,0xc8,0x9a,0x80]
// CHECK-ERROR: instruction requires: sme2
// CHECK-UNKNOWN: 809ac829 <unknown>

bmopa za1.s, p2/m, p0/m, z22.s, z30.s // 10000000-10011110-00001010-11001001
// CHECK-INST: bmopa za1.s, p2/m, p0/m, z22.s, z30.s
// CHECK-ENCODING: [0xc9,0x0a,0x9e,0x80]
// CHECK-ERROR: instruction requires: sme2
// CHECK-UNKNOWN: 809e0ac9 <unknown>

bmopa za2.s, p5/m, p7/m, z9.s, z1.s // 10000000-10000001-11110101-00101010
// CHECK-INST: bmopa za2.s, p5/m, p7/m, z9.s, z1.s
// CHECK-ENCODING: [0x2a,0xf5,0x81,0x80]
// CHECK-ERROR: instruction requires: sme2
// CHECK-UNKNOWN: 8081f52a <unknown>

bmopa za3.s, p2/m, p5/m, z12.s, z11.s // 10000000-10001011-10101001-10001011
// CHECK-INST: bmopa za3.s, p2/m, p5/m, z12.s, z11.s
// CHECK-ENCODING: [0x8b,0xa9,0x8b,0x80]
// CHECK-ERROR: instruction requires: sme2
// CHECK-UNKNOWN: 808ba98b <unknown>

35 changes: 35 additions & 0 deletions llvm/test/MC/AArch64/SME2/bmops-diagnostics.s
@@ -0,0 +1,35 @@
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s

// --------------------------------------------------------------------------//
// Invalid tile

bmops za8.s, p0/m, p0/m, z0.s, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: bmops za8.s, p0/m, p0/m, z0.s, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

// --------------------------------------------------------------------------//
// Invalid predicate

bmops za0.s, p0/z, p0/m, z0.s, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: bmops za0.s, p0/z, p0/m, z0.s, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

bmops za0.s, p15/m, p0/m, z0.s, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
// CHECK-NEXT: bmops za0.s, p15/m, p0/m, z0.s, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

// --------------------------------------------------------------------------//
// Invalid suffixes

bmops za0.d, p0/z, p0/m, z0.d, z0.d
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
// CHECK-NEXT: bmops za0.d, p0/z, p0/m, z0.d, z0.d
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

bmops za0.s, p0/m, p0/m, z0.s, z0.d
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: za0.s, p0/m, p0/m, z0.s, z0.d
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

0 comments on commit 7fd2afa

Please sign in to comment.