Skip to content

Commit

Permalink
[PowerPC] Add intrinsics for MMA
Browse files Browse the repository at this point in the history
This patch adds support for MMA intrinsics.

Authored by: Baptiste Saleil

Reviewed By: #powerpc, bsaleil, amyk

Differential Revision: https://reviews.llvm.org/D89345
  • Loading branch information
Baptiste Saleil authored and Ahsan Saghir committed Oct 23, 2020
1 parent dd887d9 commit edb2791
Show file tree
Hide file tree
Showing 5 changed files with 2,767 additions and 2 deletions.
80 changes: 78 additions & 2 deletions llvm/include/llvm/IR/IntrinsicsPowerPC.td
Expand Up @@ -141,6 +141,28 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
Intrinsic<ret_types, param_types, properties>;
}

//===----------------------------------------------------------------------===//
// PowerPC MMA Intrinsic Multi Class Definitions.
//

multiclass PowerPC_MMA_ACC_Intrinsic<list<LLVMType> args> {
def NAME: Intrinsic<[llvm_v512i1_ty], args, [IntrNoMem]>;
def pp : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
[IntrNoMem]>;
def pn : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
[IntrNoMem]>;
def np : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
[IntrNoMem]>;
def nn : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
[IntrNoMem]>;
}

multiclass PowerPC_MMA_ACC_PP_Intrinsic<list<LLVMType> args> {
def NAME: Intrinsic<[llvm_v512i1_ty], args, [IntrNoMem]>;
def pp : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
[IntrNoMem]>;
}

//===----------------------------------------------------------------------===//
// PowerPC Altivec Intrinsic Class Definitions.
//
Expand Down Expand Up @@ -1371,7 +1393,6 @@ def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>;
// PowerPC set FPSCR Intrinsic Definitions.
def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">,
Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>;

}

let TargetPrefix = "ppc" in {
Expand Down Expand Up @@ -1400,5 +1421,60 @@ let TargetPrefix = "ppc" in {

def int_ppc_mma_xxsetaccz :
Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>;
}

// MMA Reduced-Precision: Outer Product Intrinsic Definitions.
defm int_ppc_mma_xvi4ger8 :
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
defm int_ppc_mma_pmxvi4ger8 :
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty]>;

defm int_ppc_mma_xvi8ger4 :
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
defm int_ppc_mma_pmxvi8ger4 :
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty]>;

defm int_ppc_mma_xvi16ger2s :
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
defm int_ppc_mma_pmxvi16ger2s :
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty]>;

defm int_ppc_mma_xvf16ger2 :
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
defm int_ppc_mma_pmxvf16ger2 :
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty]>;
defm int_ppc_mma_xvf32ger :
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
defm int_ppc_mma_pmxvf32ger :
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i32_ty]>;
defm int_ppc_mma_xvf64ger :
PowerPC_MMA_ACC_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>;
defm int_ppc_mma_pmxvf64ger :
PowerPC_MMA_ACC_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i32_ty]>;

// MMA Reduced-Precision: bfloat16 Outer Product Intrinsic Definitions.
defm int_ppc_mma_xvbf16ger2 :
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
defm int_ppc_mma_pmxvbf16ger2 :
PowerPC_MMA_ACC_Intrinsic<
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;

// MMA Reduced-Precision: Missing Integer-based Outer Product Operations.
defm int_ppc_mma_xvi16ger2 :
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
defm int_ppc_mma_pmxvi16ger2 :
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty]>;
def int_ppc_mma_xvi8ger4spp :
Intrinsic<[llvm_v512i1_ty],
[llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_ppc_mma_pmxvi8ger4spp :
Intrinsic<[llvm_v512i1_ty],
[llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
}
219 changes: 219 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1,3 +1,8 @@
// Mask immediates for MMA instructions (2, 4 and 8 bits).
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;

//===----------------------------------------------------------------------===//
// PowerPC ISA 3.1 specific type constraints.
//
Expand Down Expand Up @@ -1341,6 +1346,220 @@ defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB),
"xvf64ger", "$AT, $XA, $XB">;
//------------------------------------------------------------------------------

// MMA Intrinsics
let Predicates = [MMA] in {
def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)),
(XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;

def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)),
(XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;

def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)),
(XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;

def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)),
(XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;

def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)),
(XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)),
(XVF64GER $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
(XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
(XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
(XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
(XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>;

def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)),
(XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)),
(XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
}

// MMA Intrinsics
let Predicates = [MMA, PrefixInstrs] in {
def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk8Imm:$PMSK)),
(PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk8Imm:$PMSK)),
(PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;

def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
(PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk4Imm:$PMSK)),
(PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;

def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
(PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
(PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;

def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)),
(PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
(PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
(PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
(PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
(PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;

def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)),
(PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
(PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
(PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
(PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
(PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)>;

def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
(PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
(PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
}

def Concats {
dag VecsToVecPair0 =
(v256i1 (INSERT_SUBREG
Expand Down

0 comments on commit edb2791

Please sign in to comment.