Skip to content

Commit

Permalink
[CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ array to vector (
Browse files Browse the repository at this point in the history
…#88901)

According to the specification in
    ARM-software/acle#309 this adds the intrinsics

    Move and zero multiple ZA single-vector groups to vector registers

    // Variants are also available for _za8_u8, _za16_s16, _za16_u16,
    // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
    // _za64_s64, _za64_u64 and _za64_f64
    svint8x2_t svreadz_za8_s8_vg1x2(uint32_t slice)
    __arm_streaming __arm_inout("za");

    // Variants are also available for _za8_u8, _za16_s16, _za16_u16,
    // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
    // _za64_s64, _za64_u64 and _za64_f64
    svint8x4_t svreadz_za8_s8_vg1x4(uint32_t slice)
    __arm_streaming __arm_inout("za");
  • Loading branch information
CarolineConcatto authored Jul 1, 2024
1 parent 22c7317 commit 6859e5a
Show file tree
Hide file tree
Showing 8 changed files with 1,002 additions and 11 deletions.
12 changes: 12 additions & 0 deletions clang/include/clang/Basic/arm_sme.td
Original file line number Diff line number Diff line change
Expand Up @@ -805,4 +805,16 @@ defm SVREADZ_ZA16 : ZAReadzSingle<"za16", "sUshb", "aarch64_sme_readz", [ImmChe
defm SVREADZ_ZA32 : ZAReadzSingle<"za32", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREADZ_ZA64 : ZAReadzSingle<"za64", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
defm SVREADZ_ZA128 : ZAReadzSingle<"za128", "csilUcUiUsUlbhfd", "aarch64_sme_readz_q", [ImmCheck<0, ImmCheck0_15>]>;

multiclass ZAReadzArray<string vg_num>{
let SMETargetGuard = "sme2p1" in {
def NAME # _B : SInst<"svreadz_za8_{d}_vg1x" # vg_num, vg_num # "m", "cUc", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _H : SInst<"svreadz_za16_{d}_vg1x" # vg_num, vg_num # "m", "sUsbh", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _S : SInst<"svreadz_za32_{d}_vg1x" # vg_num, vg_num # "m", "iUif", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _D : SInst<"svreadz_za64_{d}_vg1x" # vg_num, vg_num # "m", "lUld", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
}
}

defm SVREADZ_VG2 : ZAReadzArray<"2">;
defm SVREADZ_VG4 : ZAReadzArray<"4">;
} // let SVETargetGuard = InvalidMode
705 changes: 705 additions & 0 deletions clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -2869,6 +2869,16 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_readz_q_horiz : SME_MOVAZ_TileToVector_Intrinsic;
def int_aarch64_sme_readz_q_vert : SME_MOVAZ_TileToVector_Intrinsic;

def int_aarch64_sme_readz_x2
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
[llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects]>;

def int_aarch64_sme_readz_x4
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects]>;

def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;

class SME_OuterProduct_Intrinsic
Expand Down
32 changes: 26 additions & 6 deletions llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,8 +395,9 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
template <unsigned MaxIdx, unsigned Scale>
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
unsigned Op);
void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, unsigned Op,
unsigned MaxIdx, unsigned Scale);
void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
unsigned Op, unsigned MaxIdx, unsigned Scale,
unsigned BaseReg = 0);
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
/// SVE Reg+Imm addressing mode.
template <int64_t Min, int64_t Max>
Expand Down Expand Up @@ -2006,18 +2007,27 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,

void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
unsigned Op, unsigned MaxIdx,
unsigned Scale) {
unsigned Scale, unsigned BaseReg) {
// Slice can be in different positions
// The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
// The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
SDValue SliceBase = N->getOperand(2);
if (BaseReg != AArch64::ZA)
SliceBase = N->getOperand(3);

SDValue SliceBase = N->getOperand(3);
SDValue Base, Offset;
if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
return;
// The correct Za tile number is computed in Machine Instruction
// See EmitZAInstr
// DAG cannot select Za tile as an output register with ZReg
SDLoc DL(N);
SDValue Ops[] = {/*TileNum*/ N->getOperand(2), Base, Offset,
/*Chain*/ N->getOperand(0)};
SmallVector<SDValue, 6> Ops;
if (BaseReg != AArch64::ZA )
Ops.push_back(N->getOperand(2));
Ops.push_back(Base);
Ops.push_back(Offset);
Ops.push_back(N->getOperand(0)); //Chain
SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);

EVT VT = N->getValueType(0);
Expand Down Expand Up @@ -5342,6 +5352,16 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
case Intrinsic::aarch64_sme_readz_x2: {
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
AArch64::ZA);
return;
}
case Intrinsic::aarch64_sme_readz_x4: {
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
AArch64::ZA);
return;
}
case Intrinsic::swift_async_context_addr: {
SDLoc DL(Node);
SDValue Chain = Node->getOperand(0);
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2995,6 +2995,11 @@ AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg,
MIB.addReg(BaseReg + MI.getOperand(StartIdx).getImm()); // Input Za Tile
StartIdx++;
} else {
// Avoids all instructions with mnemonic za.<sz>[Reg, Imm,
if (MI.getOperand(0).isReg() && !MI.getOperand(1).isImm()) {
MIB.add(MI.getOperand(StartIdx)); // Output ZPR
++StartIdx;
}
MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg);
}
for (unsigned I = StartIdx; I < MI.getNumOperands(); ++I)
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -818,8 +818,8 @@ defm MOVAZ_ZMI : sme2p1_movaz_tile_to_vec<"movaz", int_aarch64_sme_readz_horiz,
int_aarch64_sme_readz_q_horiz, int_aarch64_sme_readz_q_vert>;
defm MOVAZ_2ZMI : sme2p1_movaz_tile_to_vec_vg2<"movaz">;
defm MOVAZ_4ZMI : sme2p1_movaz_tile_to_vec_vg4<"movaz">;
defm MOVAZ_VG2_2ZM : sme2_mova_array_to_vec_vg2_multi<0b010, "movaz">;
defm MOVAZ_VG4_4ZM : sme2_mova_array_to_vec_vg4_multi<0b1100, "movaz">;
defm MOVAZ_VG2_2ZMXI : sme2_movaz_array_to_vec_vg2_multi<"movaz">;
defm MOVAZ_VG4_4ZMXI : sme2_movaz_array_to_vec_vg4_multi<"movaz">;

defm ZERO_MXI : sme2p1_zero_matrix<"zero">;

Expand Down
23 changes: 21 additions & 2 deletions llvm/lib/Target/AArch64/SMEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,15 @@ class sme2_movez_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, R
let SMEMatrixType = za_flag;
let usesCustomInserter = 1;
}

class sme2_movaz_array_to_tile_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
SMEMatrixTypeEnum za_flag>
: SMEPseudo2Instr<name, 0>,
Pseudo<(outs multi_vector_ty:$Zd), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3), []> {
let SMEMatrixType = za_flag;
let usesCustomInserter = 1;
}

//===----------------------------------------------------------------------===//
// SME pattern match helpers.
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -4287,7 +4296,7 @@ class sme2_mova_array_to_vec_vg24_multi<bits<4>op, RegisterOperand vector_ty,
// move array to vector, two registers.
multiclass sme2_mova_array_to_vec_vg2_multi<bits<3> opc, string mnemonic> {
def NAME : sme2_mova_array_to_vec_vg24_multi<{opc,?}, ZZ_d_mul_r, MatrixOp64,
mnemonic, "vgx2"> {
mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1>{
bits<4> Zd;
let Inst{4-1} = Zd;
}
Expand Down Expand Up @@ -4359,10 +4368,15 @@ multiclass sme2_mova_array_to_vec_vg2_multi<bits<3> opc, string mnemonic> {
}
}

multiclass sme2_movaz_array_to_vec_vg2_multi<string mnemonic> {
defm NAME : sme2_mova_array_to_vec_vg2_multi<0b010, mnemonic>;
def NAME # _PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>;
}

// move array to vector, four registers
multiclass sme2_mova_array_to_vec_vg4_multi<bits<4> opc, string mnemonic> {
def NAME : sme2_mova_array_to_vec_vg24_multi<opc, ZZZZ_d_mul_r, MatrixOp64,
mnemonic, "vgx4"> {
mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1> {
bits<3> Zd;
let Inst{4-2} = Zd;
}
Expand Down Expand Up @@ -4434,6 +4448,11 @@ multiclass sme2_mova_array_to_vec_vg4_multi<bits<4> opc, string mnemonic> {
}
}

multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> {
defm NAME : sme2_mova_array_to_vec_vg4_multi<0b1100, mnemonic>;
def NAME # _PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>;
}

//===----------------------------------------------------------------------===//
// SME2 multi-vec saturating shift right narrow
class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
Expand Down
Loading

0 comments on commit 6859e5a

Please sign in to comment.