Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -709,13 +709,13 @@ let Predicates = [HasSVE_or_SME] in {

let Predicates = [HasSVE_or_SME] in {
def : Pat<(nxv4i32 (partial_reduce_umla nxv4i32:$Acc, nxv16i8:$MulLHS, nxv16i8:$MulRHS)),
(UDOT_ZZZ_S $Acc, $MulLHS, $MulRHS)>;
(UDOT_ZZZ_BtoS $Acc, $MulLHS, $MulRHS)>;
def : Pat<(nxv4i32 (partial_reduce_smla nxv4i32:$Acc, nxv16i8:$MulLHS, nxv16i8:$MulRHS)),
(SDOT_ZZZ_S $Acc, $MulLHS, $MulRHS)>;
(SDOT_ZZZ_BtoS $Acc, $MulLHS, $MulRHS)>;
def : Pat<(nxv2i64 (partial_reduce_umla nxv2i64:$Acc, nxv8i16:$MulLHS, nxv8i16:$MulRHS)),
(UDOT_ZZZ_D $Acc, $MulLHS, $MulRHS)>;
(UDOT_ZZZ_HtoD $Acc, $MulLHS, $MulRHS)>;
def : Pat<(nxv2i64 (partial_reduce_smla nxv2i64:$Acc, nxv8i16:$MulLHS, nxv8i16:$MulRHS)),
(SDOT_ZZZ_D $Acc, $MulLHS, $MulRHS)>;
(SDOT_ZZZ_HtoD $Acc, $MulLHS, $MulRHS)>;
} // End HasSVE_or_SME

defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>;
Expand Down Expand Up @@ -2541,7 +2541,7 @@ let Predicates = [HasBF16, HasSVE_or_SME] in {
} // End HasBF16, HasSVE_or_SME

let Predicates = [HasBF16, HasSVE] in {
defm BFMMLA_ZZZ : sve_fp_matrix_mla<0b01, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>;
defm BFMMLA_ZZZ_HtoS : sve_fp_matrix_mla<0b01, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>;
} // End HasBF16, HasSVE

let Predicates = [HasBF16, HasSVE_or_SME] in {
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AArch64/AArch64SchedA320.td
Original file line number Diff line number Diff line change
Expand Up @@ -826,13 +826,13 @@ def : InstRW<[CortexA320MCWrite<15, 12, CortexA320UnitVMC>], (instregex "^[SU]DI
def : InstRW<[CortexA320MCWrite<26, 23, CortexA320UnitVMC>], (instregex "^[SU]DIVR?_(ZPmZ|ZPZZ)_D")>;

// Dot product, 8 bit
def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_S")>;
def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_BtoS")>;

// Dot product, 8 bit, using signed and unsigned integers
def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;

// Dot product, 16 bit
def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_D")>;
def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_HtoD")>;

// Duplicate, immediate and indexed form
def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^DUP_ZI_[BHSD]",
Expand Down Expand Up @@ -1182,7 +1182,7 @@ def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instrs BFCVT_ZPmZ, BFCVT
def : InstRW<[CortexA320Write_11cyc_1VMAC_1VALU], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;

// Matrix multiply accumulate
def : InstRW<[CortexA320Write_16cyc_1VMAC_1VALU], (instrs BFMMLA_ZZZ)>;
def : InstRW<[CortexA320Write_16cyc_1VMAC_1VALU], (instrs BFMMLA_ZZZ_HtoS)>;

// Multiply accumulate long
def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^BFMLAL[BT]_ZZZ(I)?")>;
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AArch64/AArch64SchedA510.td
Original file line number Diff line number Diff line change
Expand Up @@ -804,13 +804,13 @@ def : InstRW<[CortexA510MCWrite<15, 12, CortexA510UnitVMC>], (instregex "^[SU]DI
def : InstRW<[CortexA510MCWrite<26, 23, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_(ZPmZ|ZPZZ)_D")>;

// Dot product, 8 bit
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_S")>;
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_BtoS")>;

// Dot product, 8 bit, using signed and unsigned integers
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;

// Dot product, 16 bit
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_D")>;
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_HtoD")>;

// Duplicate, immediate and indexed form
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZI_[BHSD]",
Expand Down Expand Up @@ -1160,7 +1160,7 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instrs BFCVT_ZPmZ, BFCVT
def : InstRW<[A510Write_10cyc_1VMAC_1VALU], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;

// Matrix multiply accumulate
def : InstRW<[A510Write_15cyc_1VMAC_1VALU], (instrs BFMMLA_ZZZ)>;
def : InstRW<[A510Write_15cyc_1VMAC_1VALU], (instrs BFMMLA_ZZZ_HtoS)>;

// Multiply accumulate long
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^BFMLAL[BT]_ZZZ(I)?")>;
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
Original file line number Diff line number Diff line change
Expand Up @@ -1764,13 +1764,13 @@ def : InstRW<[N2Write_20c_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
"^[SU]DIV_ZPZZ_D")>;

// Dot product, 8 bit
def : InstRW<[N2Write_3c_1V], (instregex "^[SU]DOT_ZZZI?_S$")>;
def : InstRW<[N2Write_3c_1V], (instregex "^[SU]DOT_ZZZI?_BtoS$")>;

// Dot product, 8 bit, using signed and unsigned integers
def : InstRW<[N2Write_3c_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;

// Dot product, 16 bit
def : InstRW<[N2Write_4c_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>;
def : InstRW<[N2Write_4c_1V0], (instregex "^[SU]DOT_ZZZI?_HtoD$")>;

// Duplicate, immediate and indexed form
def : InstRW<[N2Write_2c_1V], (instregex "^DUP_ZI_[BHSD]$",
Expand Down Expand Up @@ -2118,7 +2118,7 @@ def : InstRW<[N2Write_3c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
def : InstRW<[N2Write_4c_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;

// Matrix multiply accumulate
def : InstRW<[N2Write_5c_1V], (instrs BFMMLA_ZZZ)>;
def : InstRW<[N2Write_5c_1V], (instrs BFMMLA_ZZZ_HtoS)>;

// Multiply accumulate long
def : InstRW<[N2Write_4c_1V], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
Original file line number Diff line number Diff line change
Expand Up @@ -1736,13 +1736,13 @@ def : InstRW<[N3Write_16c_16V0], (instregex "^[SU]DIVR?_ZPmZ_D",
"^[SU]DIV_ZPZZ_D")>;

// Dot product, 8 bit
def : InstRW<[N3Write_3c_1V], (instregex "^[SU]DOT_ZZZI?_S$")>;
def : InstRW<[N3Write_3c_1V], (instregex "^[SU]DOT_ZZZI?_BtoS$")>;

// Dot product, 8 bit, using signed and unsigned integers
def : InstRW<[N3Write_3c_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;

// Dot product, 16 bit
def : InstRW<[N3Write_4c_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>;
def : InstRW<[N3Write_4c_1V0], (instregex "^[SU]DOT_ZZZI?_HtoD$")>;

// Duplicate, immediate and indexed form
def : InstRW<[N3Write_2c_1V], (instregex "^DUP_ZI_[BHSD]$",
Expand Down Expand Up @@ -2082,7 +2082,7 @@ def : InstRW<[N3Write_4c_2V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
def : InstRW<[N3Write_4c_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;

// Matrix multiply accumulate
def : InstRW<[N3Write_5c_1V], (instrs BFMMLA_ZZZ)>;
def : InstRW<[N3Write_5c_1V], (instrs BFMMLA_ZZZ_HtoS)>;

// Multiply accumulate long
def : InstRW<[N3Write_4c_1V], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
Original file line number Diff line number Diff line change
Expand Up @@ -1555,14 +1555,14 @@ def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
"^[SU]DIV_ZPZZ_D")>;

// Dot product, 8 bit
def : InstRW<[V1Wr_ZDOTB, V1Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S$")>;
def : InstRW<[V1Wr_ZDOTB, V1Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_BtoS$")>;

// Dot product, 8 bit, using signed and unsigned integers
def : InstRW<[V1Wr_ZUDOTB, V1Rd_ZUDOTB],
(instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>;

// Dot product, 16 bit
def : InstRW<[V1Wr_ZDOTH, V1Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D$")>;
def : InstRW<[V1Wr_ZDOTH, V1Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_HtoD$")>;

// Duplicate, immediate and indexed form
def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$",
Expand Down Expand Up @@ -1808,7 +1808,7 @@ def : InstRW<[V1Write_4c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
def : InstRW<[V1Wr_ZBFDOT, V1Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;

// Matrix multiply accumulate
def : InstRW<[V1Wr_ZBFMMA, V1Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>;
def : InstRW<[V1Wr_ZBFMMA, V1Rd_ZBFMMA], (instrs BFMMLA_ZZZ_HtoS)>;

// Multiply accumulate long
def : InstRW<[V1Wr_ZBFMAL, V1Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
Original file line number Diff line number Diff line change
Expand Up @@ -2251,13 +2251,13 @@ def : InstRW<[V2Write_20c_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
"^[SU]DIV_ZPZZ_D")>;

// Dot product, 8 bit
def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>;
def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_BtoS")>;

// Dot product, 8 bit, using signed and unsigned integers
def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;

// Dot product, 16 bit
def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>;
def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_HtoD")>;

// Duplicate, immediate and indexed form
def : InstRW<[V2Write_2c_1V], (instregex "^DUP_ZI_[BHSD]",
Expand Down Expand Up @@ -2614,7 +2614,7 @@ def : InstRW<[V2Write_4c_1V02], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
def : InstRW<[V2Wr_ZBFDOT, V2Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;

// Matrix multiply accumulate
def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>;
def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ_HtoS)>;

// Multiply accumulate long
def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>;
Expand Down
16 changes: 8 additions & 8 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -3770,11 +3770,11 @@ class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1,
}

multiclass sve_intx_dot<bit opc, string asm, SDPatternOperator op> {
def _S : sve_intx_dot<0b0, opc, asm, ZPR32, ZPR8>;
def _D : sve_intx_dot<0b1, opc, asm, ZPR64, ZPR16>;
def _BtoS : sve_intx_dot<0b0, opc, asm, ZPR32, ZPR8>;
def _HtoD : sve_intx_dot<0b1, opc, asm, ZPR64, ZPR16>;

def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _D)>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _BtoS)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _HtoD)>;
}

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -3804,21 +3804,21 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,

multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm,
SDPatternOperator op> {
def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> {
def _BtoS : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> {
bits<2> iop;
bits<3> Zm;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> {
def _HtoD : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> {
bits<1> iop;
bits<4> Zm;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}

def : SVE_4_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv16i8, nxv16i8, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _S)>;
def : SVE_4_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv8i16, nxv8i16, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _D)>;
def : SVE_4_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv16i8, nxv16i8, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _BtoS)>;
def : SVE_4_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv8i16, nxv8i16, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _HtoD)>;
}

//===----------------------------------------------------------------------===//
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
Original file line number Diff line number Diff line change
Expand Up @@ -2649,7 +2649,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 5 0.50 2 V1UnitV,V1UnitV01 BFMLALT_ZZZI bfmlalt z0.s, z1.h, z2.h[7]
# CHECK-NEXT: 1 5 0.50 2 V1UnitV,V1UnitV01 BFMLALT_ZZZI bfmlalt z0.s, z1.h, z7.h[7]
# CHECK-NEXT: 1 5 0.50 2 V1UnitV,V1UnitV01 BFMLALT_ZZZ bfmlalt z14.s, z10.h, z21.h
# CHECK-NEXT: 1 5 0.50 3 V1UnitV,V1UnitV01 BFMMLA_ZZZ bfmmla z0.s, z1.h, z2.h
# CHECK-NEXT: 1 5 0.50 3 V1UnitV,V1UnitV01 BFMMLA_ZZZ_HtoS bfmmla z0.s, z1.h, z2.h
# CHECK-NEXT: 1 1 1.00 1 V1UnitI,V1UnitM,V1UnitM0 BIC_PPzPP bic p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 1 1.00 1 V1UnitI,V1UnitM,V1UnitM0 BIC_PPzPP bic p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 2 0.50 2 V1UnitV,V1UnitV01 BIC_ZZZ bic z0.d, z0.d, z0.d
Expand Down Expand Up @@ -4228,10 +4228,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 12 7.00 12 V1UnitV[7],V1UnitV0[7],V1UnitV01[7],V1UnitV02[7] SDIV_ZPmZ_S sdiv z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: 1 20 7.00 20 V1UnitV[7],V1UnitV0[7],V1UnitV01[7],V1UnitV02[7] SDIVR_ZPmZ_D sdivr z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 12 7.00 12 V1UnitV[7],V1UnitV0[7],V1UnitV01[7],V1UnitV02[7] SDIVR_ZPmZ_S sdivr z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: 1 4 1.00 1 V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02 SDOT_ZZZI_D sdot z0.d, z1.h, z15.h[1]
# CHECK-NEXT: 1 4 1.00 1 V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02 SDOT_ZZZ_D sdot z0.d, z1.h, z31.h
# CHECK-NEXT: 1 3 0.50 1 V1UnitV,V1UnitV01 SDOT_ZZZ_S sdot z0.s, z1.b, z31.b
# CHECK-NEXT: 1 3 0.50 1 V1UnitV,V1UnitV01 SDOT_ZZZI_S sdot z0.s, z1.b, z7.b[3]
# CHECK-NEXT: 1 4 1.00 1 V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02 SDOT_ZZZI_HtoD sdot z0.d, z1.h, z15.h[1]
# CHECK-NEXT: 1 4 1.00 1 V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02 SDOT_ZZZ_HtoD sdot z0.d, z1.h, z31.h
# CHECK-NEXT: 1 3 0.50 1 V1UnitV,V1UnitV01 SDOT_ZZZ_BtoS sdot z0.s, z1.b, z31.b
# CHECK-NEXT: 1 3 0.50 1 V1UnitV,V1UnitV01 SDOT_ZZZI_BtoS sdot z0.s, z1.b, z7.b[3]
# CHECK-NEXT: 1 2 0.50 2 V1UnitV,V1UnitV01 SEL_ZPZZ_B sel z23.b, p11, z13.b, z8.b
# CHECK-NEXT: 1 2 0.50 2 V1UnitV,V1UnitV01 SEL_ZPZZ_D sel z23.d, p11, z13.d, z8.d
# CHECK-NEXT: 1 2 0.50 2 V1UnitV,V1UnitV01 SEL_ZPZZ_H sel z23.h, p11, z13.h, z8.h
Expand Down Expand Up @@ -4708,11 +4708,11 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 12 7.00 12 V1UnitV[7],V1UnitV0[7],V1UnitV01[7],V1UnitV02[7] UDIV_ZPmZ_S udiv z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: 1 20 7.00 20 V1UnitV[7],V1UnitV0[7],V1UnitV01[7],V1UnitV02[7] UDIVR_ZPmZ_D udivr z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 12 7.00 12 V1UnitV[7],V1UnitV0[7],V1UnitV01[7],V1UnitV02[7] UDIVR_ZPmZ_S udivr z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: 1 4 1.00 1 V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02 UDOT_ZZZI_D udot z0.d, z1.h, z15.h[1]
# CHECK-NEXT: 1 4 1.00 1 V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02 UDOT_ZZZ_D udot z0.d, z1.h, z31.h
# CHECK-NEXT: 1 4 1.00 1 V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02 UDOT_ZZZI_HtoD udot z0.d, z1.h, z15.h[1]
# CHECK-NEXT: 1 4 1.00 1 V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02 UDOT_ZZZ_HtoD udot z0.d, z1.h, z31.h
# CHECK-NEXT: 1 3 1.00 3 V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02 UCVTF_ZPmZ_StoD ucvtf z24.d, p5/m, z9.s
# CHECK-NEXT: 1 3 0.50 1 V1UnitV,V1UnitV01 UDOT_ZZZ_S udot z0.s, z1.b, z31.b
# CHECK-NEXT: 1 3 0.50 1 V1UnitV,V1UnitV01 UDOT_ZZZI_S udot z0.s, z1.b, z7.b[3]
# CHECK-NEXT: 1 3 0.50 1 V1UnitV,V1UnitV01 UDOT_ZZZ_BtoS udot z0.s, z1.b, z31.b
# CHECK-NEXT: 1 3 0.50 1 V1UnitV,V1UnitV01 UDOT_ZZZI_BtoS udot z0.s, z1.b, z7.b[3]
# CHECK-NEXT: 1 2 0.50 2 V1UnitV,V1UnitV01 UMAX_ZI_B umax z0.b, z0.b, #0
# CHECK-NEXT: 1 2 0.50 2 V1UnitV,V1UnitV01 UMAX_ZPmZ_B umax z31.b, p7/m, z31.b, z31.b
# CHECK-NEXT: 1 2 0.50 2 V1UnitV,V1UnitV01 UMAX_ZI_B umax z31.b, z31.b, #255
Expand Down