diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index d1a3c94bc18ed..80e3fe76e5c25 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -164,6 +164,10 @@ enum ArchExtKind : unsigned { AEK_FAMINMAX = 60, // FEAT_FAMINMAX AEK_FP8FMA = 61, // FEAT_FP8FMA AEK_SSVE_FP8FMA = 62, // FEAT_SSVE_FP8FMA + AEK_FP8DOT2 = 63, // FEAT_FP8DOT2 + AEK_SSVE_FP8DOT2 = 64, // FEAT_SSVE_FP8DOT2 + AEK_FP8DOT4 = 65, // FEAT_FP8DOT4 + AEK_SSVE_FP8DOT4 = 66, // FEAT_SSVE_FP8DOT4 AEK_NUM_EXTENSIONS }; using ExtensionBitset = Bitset; @@ -277,6 +281,10 @@ inline constexpr ExtensionInfo Extensions[] = { {"faminmax", AArch64::AEK_FAMINMAX, "+faminmax", "-faminmax", FEAT_INIT, "", 0}, {"fp8fma", AArch64::AEK_FP8FMA, "+fp8fma", "-fp8fma", FEAT_INIT, "+fpmr", 0}, {"ssve-fp8fma", AArch64::AEK_SSVE_FP8FMA, "+ssve-fp8fma", "-ssve-fp8fma", FEAT_INIT, "+sme2", 0}, + {"fp8dot2", AArch64::AEK_FP8DOT2, "+fp8dot2", "-fp8dot2", FEAT_INIT, "", 0}, + {"ssve-fp8dot2", AArch64::AEK_SSVE_FP8DOT2, "+ssve-fp8dot2", "-ssve-fp8dot2", FEAT_INIT, "+sme2", 0}, + {"fp8dot4", AArch64::AEK_FP8DOT4, "+fp8dot4", "-fp8dot4", FEAT_INIT, "", 0}, + {"ssve-fp8dot4", AArch64::AEK_SSVE_FP8DOT4, "+ssve-fp8dot4", "-ssve-fp8dot4", FEAT_INIT, "+sme2", 0}, // Special cases {"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority}, }; diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index d3d4bd84f1e98..aa0efb3e6ec13 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -523,6 +523,18 @@ def FeatureFP8FMA : SubtargetFeature<"fp8fma", "HasFP8FMA", "true", def FeatureSSVE_FP8FMA : SubtargetFeature<"ssve-fp8fma", "HasSSVE_FP8FMA", "true", "Enable SVE2 fp8 multiply-add instructions (FEAT_SSVE_FP8FMA)", [FeatureSME2]>; +def FeatureFP8DOT2: SubtargetFeature<"fp8dot2", "HasFP8DOT2", "true", + "Enable fp8 2-way dot instructions (FEAT_FP8DOT2)">; + +def FeatureSSVE_FP8DOT2 : SubtargetFeature<"ssve-fp8dot2", "HasSSVE_FP8DOT2", "true", + "Enable SVE2 fp8 2-way dot product instructions (FEAT_SSVE_FP8DOT2)", [FeatureSME2]>; + +def FeatureFP8DOT4: SubtargetFeature<"fp8dot4", "HasFP8DOT4", "true", + "Enable fp8 4-way dot instructions (FEAT_FP8DOT4)">; + +def FeatureSSVE_FP8DOT4 : SubtargetFeature<"ssve-fp8dot4", "HasSSVE_FP8DOT4", "true", + "Enable SVE2 fp8 4-way dot product instructions (FEAT_SSVE_FP8DOT4)", [FeatureSME2]>; + def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true", "Apple A7 (the CPU formerly known as Cyclone)">; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 152e3b4c7407d..ea965e2933c8d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -6108,6 +6108,21 @@ multiclass SIMDThreeVectorCvt { V128, v16i8, v4f32, null_frag>; } +// TODO: Create a new Value Type v8f8 and v16f8 +multiclass SIMDThreeSameVectorDOT2 { + def v4f16 : BaseSIMDThreeSameVectorDot<0b0, 0b0, 0b01, 0b1111, asm, ".4h", ".8b", + V64, v4f16, v8i8, null_frag>; + def v8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1111, asm, ".8h", ".16b", + V128, v8f16, v16i8, null_frag>; +} + +multiclass SIMDThreeSameVectorDOT4 { + def v2f32 : BaseSIMDThreeSameVectorDot<0b0, 0b0, 0b00, 0b1111, asm, ".2s", ".8b", + V64, v2f32, v8i8, null_frag>; + def v4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b00, 0b1111, asm, ".4s", ".16b", + V128, v4f32, v16i8, null_frag>; +} + //---------------------------------------------------------------------------- // AdvSIMD two register vector instructions. //---------------------------------------------------------------------------- @@ -8570,10 +8585,10 @@ class SIMDThreeSameVectorMatMul size, bits<4> opc, string asm, - string dst_kind, string lhs_kind, string rhs_kind, - RegisterOperand RegType, - ValueType AccumType, ValueType InputType, - SDPatternOperator OpNode> : + string dst_kind, string lhs_kind, string rhs_kind, + RegisterOperand RegType, + ValueType AccumType, ValueType InputType, + SDPatternOperator OpNode> : BaseSIMDIndexedTied size, string as V128, v4i32, v16i8, OpNode>; } +// TODO: The vectors v8i8 and v16i8 should be v8f8 and v16f8 +multiclass SIMDThreeSameVectorFP8DOT4Index { + def v8f8 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b00, 0b0000, asm, ".2s", ".8b", ".4b", + V64, v2f32, v8i8, null_frag>; + def v16f8 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b00, 0b0000, asm, ".4s", ".16b",".4b", + V128, v4f32, v16i8, null_frag>; +} + // ARMv8.2-A Fused Multiply Add-Long Instructions (Indexed) let mayRaiseFPException = 1, Uses = [FPCR] in class BaseSIMDThreeSameVectorIndexH sz, bits<4> opc, string asm, @@ -8624,6 +8647,16 @@ multiclass SIMDThreeSameVectorFMLIndex opc, string asm, V128, V128_lo, v4f32, v8f16, OpNode>; } +//---------------------------------------------------------------------------- +// FP8 Advanced SIMD vector x indexed element +// TODO: Replace value types v8i8 and v16i8 by v8f8 and v16f8 +multiclass SIMDThreeSameVectorFP8DOT2Index { + def v4f16 : BaseSIMDThreeSameVectorIndexH<0b0, 0b0, 0b01, 0b0000, asm, ".4h", ".8b", ".2b", + V64, V128_lo, v4f16, v8i8, null_frag>; + def v8f16 : BaseSIMDThreeSameVectorIndexH<0b1, 0b0, 0b01, 0b0000, asm, ".8h", ".16b", ".2b", + V128, V128_lo, v8f16, v8i16, null_frag>; +} + multiclass SIMDFPIndexed opc, string asm, SDPatternOperator OpNode> { let mayRaiseFPException = 1, Uses = [FPCR] in { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 20f872dfc17b2..0125d3dbecf96 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -173,6 +173,20 @@ def HasSSVE_FP8FMA : Predicate<"Subtarget->SSVE_FP8FMA() || " AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8FMA, (all_of FeatureSVE2, FeatureFP8FMA)), "ssve-fp8fma or (sve2 and fp8fma)">; +def HasFP8DOT2 : Predicate<"Subtarget->hasFP8DOT2()">, + AssemblerPredicateWithAll<(all_of FeatureFP8DOT2), "fp8dot2">; +def HasSSVE_FP8DOT2 : Predicate<"Subtarget->hasSSVE_FP8DOT2() || " + "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">, + AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2, + (all_of FeatureSVE2, FeatureFP8DOT2)), + "ssve-fp8dot2 or (sve2 and fp8dot2)">; +def HasFP8DOT4 : Predicate<"Subtarget->hasFP8DOT4()">, + AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">; +def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || " + "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">, + AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4, + (all_of FeatureSVE2, FeatureFP8DOT4)), + "ssve-fp8dot4 or (sve2 and fp8dot4)">; // A subset of SVE(2) instructions are legal in Streaming SVE execution mode, // they should be enabled if either has been specified. @@ -9309,6 +9323,16 @@ let Predicates = [HasFP8FMA] in { defm FMLALLTT : SIMDThreeSameVectorMLAL<0b1, 0b01, "fmlalltt">; } // End let Predicates = [HasFP8FMA] +let Predicates = [HasFP8DOT2] in { + defm FDOTlane : SIMDThreeSameVectorFP8DOT2Index<"fdot">; + defm FDOT : SIMDThreeSameVectorDOT2<"fdot">; +} // End let Predicates = [HasFP8DOT2] + +let Predicates = [HasFP8DOT4] in { + defm FDOTlane : SIMDThreeSameVectorFP8DOT4Index<"fdot">; + defm FDOT : SIMDThreeSameVectorDOT4<"fdot">; +} // End let Predicates = [HasFP8DOT4] + include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td" include "AArch64SMEInstrInfo.td" diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index abe07dc0ce327..d186a21f7e773 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4064,3 +4064,20 @@ def FMLALLBT_ZZZ : sve2_fp8_mla<0b001, ZPR32, "fmlallbt">; def FMLALLTB_ZZZ : sve2_fp8_mla<0b010, ZPR32, "fmlalltb">; def FMLALLTT_ZZZ : sve2_fp8_mla<0b011, ZPR32, "fmlalltt">; } // End HasSSVE_FP8FMA + +let Predicates = [HasSSVE_FP8DOT2] in { +// FP8 Widening Dot-Product - Indexed Group +defm FDOT_ZZZI_BtoH : sve2_fp8_dot_indexed<"fdot">; +// FP8 Widening Dot-Product - Group +// TODO: Replace nxv16i8 by nxv16f8 +defm FDOT_ZZZ_BtoH : sve_float_dot<0b0, 0b1, ZPR16, ZPR8, "fdot", nxv16i8, null_frag>; +} + +// TODO: Replace nxv16i8 by nxv16f8 +let Predicates = [HasSSVE_FP8DOT4] in { +// FP8 Widening Dot-Product - Indexed Group +defm FDOT_ZZZI_BtoS : sve_float_dot_indexed<0b1, 0b01, ZPR8, ZPR3b8, "fdot", + nxv16i8, null_frag>; +// FP8 Widening Dot-Product - Group +defm FDOT_ZZZ_BtoS : sve_float_dot<0b1, 0b1, ZPR32, ZPR8, "fdot", nxv16i8, null_frag>; +} diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index afecff1303666..9c16d22677b9e 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3654,6 +3654,10 @@ static const struct Extension { {"faminmax", {AArch64::FeatureFAMINMAX}}, {"fp8fma", {AArch64::FeatureFP8FMA}}, {"ssve-fp8fma", {AArch64::FeatureSSVE_FP8FMA}}, + {"fp8dot2", {AArch64::FeatureFP8DOT2}}, + {"ssve-fp8dot2", {AArch64::FeatureSSVE_FP8DOT2}}, + {"fp8dot4", {AArch64::FeatureFP8DOT4}}, + {"ssve-fp8dot4", {AArch64::FeatureSSVE_FP8DOT4}}, }; static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) { diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 20ccd61219039..78c14379617a2 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -10193,3 +10193,13 @@ class sve2_fp8_mla_long_long_by_indexed_elem TT, string mnemonic> let DestructiveInstType = DestructiveOther; let ElementSize = ZPR32.ElementSize; } + +// FP8 Widening Dot-Product - Indexed Group +multiclass sve2_fp8_dot_indexed{ + def NAME : sve_float_dot_indexed<0b0, ZPR16, ZPR8, ZPR3b8, VectorIndexH, mnemonic> { + bits<3> iop; + let Inst{20-19} = iop{2-1}; + let Inst{11} = iop{0}; + let Inst{10} = 0b1; + } +} diff --git a/llvm/test/MC/AArch64/FP8/directive-arch-negative.s b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s index 7a45f25fa052d..b4a1110e5bca7 100644 --- a/llvm/test/MC/AArch64/FP8/directive-arch-negative.s +++ b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s @@ -23,3 +23,15 @@ fmlalb v0.8h, v0.16b, v0.16b fmlalb z23.h, z13.b, z0.b[7] // CHECK: error: instruction requires: ssve-fp8fma or (sve2 and fp8fma) // CHECK: fmlalb z23.h, z13.b, z0.b[7] + +.arch armv9-a+fp8dot2 +.arch armv9-a+nofp8dot2 +fdot v31.4h, v0.8b, v0.8b +// CHECK: error: instruction requires: fp8dot2 +// CHECK: fdot v31.4h, v0.8b, v0.8b + +.arch armv9-a+fp8dot4 +.arch armv9-a+nofp8dot4 +fdot v0.2s, v0.8b, v31.8b +// CHECK: error: instruction requires: fp8dot4 +// CHECK: fdot v0.2s, v0.8b, v31.8b diff --git a/llvm/test/MC/AArch64/FP8/directive-arch.s b/llvm/test/MC/AArch64/FP8/directive-arch.s index f3627f537b92d..e984210fe3ef1 100644 --- a/llvm/test/MC/AArch64/FP8/directive-arch.s +++ b/llvm/test/MC/AArch64/FP8/directive-arch.s @@ -19,3 +19,13 @@ fmlalb v0.8h, v0.16b, v0.16b fmlalb z23.h, z13.b, z0.b[7] // CHECK: fmlalb z23.h, z13.b, z0.b[7] .arch armv9-a+nossve-fp8fma + +.arch armv9-a+fp8dot2 +fdot v31.4h, v0.8b, v0.8b +// CHECK: fdot v31.4h, v0.8b, v0.8b +.arch armv9-a+nofp8dot2 + +.arch armv9-a+fp8dot4 +fdot v0.2s, v0.8b, v31.8b +// CHECK: fdot v0.2s, v0.8b, v31.8b +.arch armv9-a+nofp8dot4 diff --git a/llvm/test/MC/AArch64/FP8/dot-diagnostic.s b/llvm/test/MC/AArch64/FP8/dot-diagnostic.s new file mode 100644 index 0000000000000..a73310280cec6 --- /dev/null +++ b/llvm/test/MC/AArch64/FP8/dot-diagnostic.s @@ -0,0 +1,59 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+fp8dot2,+fp8dot4 2>&1 < %s| FileCheck %s + +// --------------------------------------------------------------------------// +// Element size extension incorrect + +fdot v31.4h, v0.8h, v0.8b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fdot v31.4h, v0.8h, v0.8b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot v31.8h, v0.16b, v31.16h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier +// CHECK-NEXT: fdot v31.8h, v0.16b, v31.16h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot v0.2s, v0.8s, v31.8b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier +// CHECK-NEXT: fdot v0.2s, v0.8s, v31.8b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot v31.4s, v0, v31.16b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fdot v31.4s, v0, v31.16b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +//--------------------------------------------------------------------------// +// Last Register range is between 0-15 + +fdot v31.4h, v31.8b, v16.2b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fdot v31.4h, v31.8b, v16.2b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot v0.8h, v0.16b, v16.2b[7] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fdot v0.8h, v0.16b, v16.2b[7] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Out of range index +fdot v31.4h, v31.8b, v15.2b[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: fdot v31.4h, v31.8b, v15.2b[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot v0.8h, v0.16b, v15.2b[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: fdot v0.8h, v0.16b, v15.2b[8] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot v0.2s, v0.8b, v31.4b[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: fdot v0.2s, v0.8b, v31.4b[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot v0.4s, v31.16b, v0.4b[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: fdot v0.4s, v31.16b, v0.4b[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/FP8/dot.s b/llvm/test/MC/AArch64/FP8/dot.s new file mode 100644 index 0000000000000..e755430745c34 --- /dev/null +++ b/llvm/test/MC/AArch64/FP8/dot.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fp8dot2,+fp8dot4 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fp8dot2,+fp8dot4 < %s \ +// RUN: | llvm-objdump -d --mattr=+fp8dot2,+fp8dot4 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fp8dot2,+fp8dot4 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fp8dot2,+fp8dot4 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+fp8dot2,+fp8dot4 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +/// VECTOR +fdot v31.4h, v0.8b, v0.8b +// CHECK-INST: fdot v31.4h, v0.8b, v0.8b +// CHECK-ENCODING: [0x1f,0xfc,0x40,0x0e] +// CHECK-ERROR: instruction requires: fp8dot2 +// CHECK-UNKNOWN: 0e40fc1f + +fdot v31.8h, v0.16b, v31.16b +// CHECK-INST: fdot v31.8h, v0.16b, v31.16b +// CHECK-ENCODING: [0x1f,0xfc,0x5f,0x4e] +// CHECK-ERROR: instruction requires: fp8dot2 +// CHECK-UNKNOWN: 4e5ffc1f + +fdot v0.2s, v0.8b, v31.8b +// CHECK-INST: fdot v0.2s, v0.8b, v31.8b +// CHECK-ENCODING: [0x00,0xfc,0x1f,0x0e] +// CHECK-ERROR: instruction requires: fp8dot4 +// CHECK-UNKNOWN: 0e1ffc00 + +fdot v31.4s, v0.16b, v31.16b +// CHECK-INST: fdot v31.4s, v0.16b, v31.16b +// CHECK-ENCODING: [0x1f,0xfc,0x1f,0x4e] +// CHECK-ERROR: instruction requires: fp8dot4 +// CHECK-UNKNOWN: 4e1ffc1f + +//INDEXED +fdot v31.4h, v31.8b, v15.2b[0] +// CHECK-INST: fdot v31.4h, v31.8b, v15.2b[0] +// CHECK-ENCODING: [0xff,0x03,0x4f,0x0f] +// CHECK-ERROR: instruction requires: fp8dot2 +// CHECK-UNKNOWN: 0f4f03ff + +fdot v0.8h, v0.16b, v15.2b[7] +// CHECK-INST: fdot v0.8h, v0.16b, v15.2b[7] +// CHECK-ENCODING: [0x00,0x08,0x7f,0x4f] +// CHECK-ERROR: instruction requires: fp8dot2 +// CHECK-UNKNOWN: 4f7f0800 + +fdot v0.2s, v0.8b, v31.4b[0] +// CHECK-INST: fdot v0.2s, v0.8b, v31.4b[0] +// CHECK-ENCODING: [0x00,0x00,0x1f,0x0f] +// CHECK-ERROR: instruction requires: fp8dot4 +// CHECK-UNKNOWN: 0f1f0000 + +fdot v0.4s, v31.16b, v0.4b[3] +// CHECK-INST: fdot v0.4s, v31.16b, v0.4b[3] +// CHECK-ENCODING: [0xe0,0x0b,0x20,0x4f] +// CHECK-ERROR: instruction requires: fp8dot4 +// CHECK-UNKNOWN: 4f200be0 diff --git a/llvm/test/MC/AArch64/FP8_SVE2/fdot-diagnostics.s b/llvm/test/MC/AArch64/FP8_SVE2/fdot-diagnostics.s new file mode 100644 index 0000000000000..b80e527ec2953 --- /dev/null +++ b/llvm/test/MC/AArch64/FP8_SVE2/fdot-diagnostics.s @@ -0,0 +1,65 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+ssve-fp8dot2,+ssve-fp8dot4 \ +// RUN: 2>&1 < %s | FileCheck %s + +// FDOT2 +// --------------------------------------------------------------------------// + +// z register out of range for index + +fdot z0.h, z0.b, z8.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fdot z0.h, z0.b, z8.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + +// Invalid vector lane index + +fdot z0.h, z0.b, z0.b[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: fdot z0.h, z0.b, z0.b[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot z0.h, z0.b, z0.b[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: fdot z0.h, z0.b, z0.b[8] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// Invalid vector suffix + +fdot z0.d, z0.b, z0.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fdot z0.d, z0.b, z0.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot z0.h, z0.h, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fdot z0.h, z0.h, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + + +// FDOT4 +// --------------------------------------------------------------------------// +// Invalid vector lane index + +fdot z0.s, z0.b, z0.b[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: fdot z0.s, z0.b, z0.b[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot z0.s, z0.b, z0.b[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: fdot z0.s, z0.b, z0.b[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// Invalid vector suffix + +fdot z0.s, z0.s, z0.s[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fdot z0.s, z0.s, z0.s[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot z0.b, z0.b, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fdot z0.b, z0.b, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/FP8_SVE2/fdot.s b/llvm/test/MC/AArch64/FP8_SVE2/fdot.s new file mode 100644 index 0000000000000..eb16b59de7afd --- /dev/null +++ b/llvm/test/MC/AArch64/FP8_SVE2/fdot.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+ssve-fp8dot2,+ssve-fp8dot4 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+fp8dot2,+fp8dot4 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2,+fp8dot2,+fp8dot4 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST + +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+fp8dot2,+fp8dot4 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN + +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+fp8dot2,fp8dot4 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2,+fp8dot2,fp8dot4 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// +// FDOT2 instructions +// +// fdot2 - indexed + +fdot z0.h, z0.b, z0.b[0] // 01100100-00100000-01000100-00000000 +// CHECK-INST: fdot z0.h, z0.b, z0.b[0] +// CHECK-ENCODING: [0x00,0x44,0x20,0x64] +// CHECK-ERROR: instruction requires: ssve-fp8dot2 or (sve2 and fp8dot2) +// CHECK-UNKNOWN: 64204400 + +movprfx z23, z31 +fdot z23.h, z13.b, z0.b[3] // 01100100-00101000-01001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: fdot z23.h, z13.b, z0.b[3] +// CHECK-ENCODING: [0xb7,0x4d,0x28,0x64] +// CHECK-ERROR: instruction requires: ssve-fp8dot2 or (sve2 and fp8dot2) +// CHECK-UNKNOWN: 64284db7 + +fdot z31.h, z31.b, z7.b[7] // 01100100-00111111-01001111-11111111 +// CHECK-INST: fdot z31.h, z31.b, z7.b[7] +// CHECK-ENCODING: [0xff,0x4f,0x3f,0x64] +// CHECK-ERROR: instruction requires: ssve-fp8dot2 or (sve2 and fp8dot2) +// CHECK-UNKNOWN: 643f4fff + + +// fdot2 - group + +fdot z0.h, z0.b, z0.b // 01100100-00100000-10000100-00000000 +// CHECK-INST: fdot z0.h, z0.b, z0.b +// CHECK-ENCODING: [0x00,0x84,0x20,0x64] +// CHECK-ERROR: instruction requires: ssve-fp8dot2 or (sve2 and fp8dot2) +// CHECK-UNKNOWN: 64208400 + +movprfx z23, z31 +fdot z23.h, z13.b, z8.b // 01100100-00101000-10000101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: fdot z23.h, z13.b, z8.b +// CHECK-ENCODING: [0xb7,0x85,0x28,0x64] +// CHECK-ERROR: instruction requires: ssve-fp8dot2 or (sve2 and fp8dot2) +// CHECK-UNKNOWN: 642885b7 + +fdot z31.h, z31.b, z31.b // 01100100-00111111-10000111-11111111 +// CHECK-INST: fdot z31.h, z31.b, z31.b +// CHECK-ENCODING: [0xff,0x87,0x3f,0x64] +// CHECK-ERROR: instruction requires: ssve-fp8dot2 or (sve2 and fp8dot2) +// CHECK-UNKNOWN: 643f87ff + + +// +// FDOT4 instructions +// +// fdot4 - indexed + +fdot z0.s, z0.b, z0.b[0] // 01100100-01100000-01000100-00000000 +// CHECK-INST: fdot z0.s, z0.b, z0.b[0] +// CHECK-ENCODING: [0x00,0x44,0x60,0x64] +// CHECK-ERROR: instruction requires: ssve-fp8dot4 or (sve2 and fp8dot4) +// CHECK-UNKNOWN: 64604400 + +movprfx z23, z31 +fdot z23.s, z13.b, z0.b[1] // 01100100-01101000-01000101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: fdot z23.s, z13.b, z0.b[1] +// CHECK-ENCODING: [0xb7,0x45,0x68,0x64] +// CHECK-ERROR: instruction requires: ssve-fp8dot4 or (sve2 and fp8dot4) +// CHECK-UNKNOWN: 646845b7 + +fdot z31.s, z31.b, z7.b[3] // 01100100-01111111-01000111-11111111 +// CHECK-INST: fdot z31.s, z31.b, z7.b[3] +// CHECK-ENCODING: [0xff,0x47,0x7f,0x64] +// CHECK-ERROR: instruction requires: ssve-fp8dot4 or (sve2 and fp8dot4) +// CHECK-UNKNOWN: 647f47ff + +// fdot4 - group + +fdot z0.s, z0.b, z0.b // 01100100-01100000-10000100-00000000 +// CHECK-INST: fdot z0.s, z0.b, z0.b +// CHECK-ENCODING: [0x00,0x84,0x60,0x64] +// CHECK-ERROR: instruction requires: ssve-fp8dot4 or (sve2 and fp8dot4) +// CHECK-UNKNOWN: 64608400 + +movprfx z23, z31 +fdot z23.s, z13.b, z8.b // 01100100-01101000-10000101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: fdot z23.s, z13.b, z8.b +// CHECK-ENCODING: [0xb7,0x85,0x68,0x64] +// CHECK-ERROR: instruction requires: ssve-fp8dot4 or (sve2 and fp8dot4) +// CHECK-UNKNOWN: 646885b7 + +fdot z31.s, z31.b, z31.b // 01100100-01111111-10000111-11111111 +// CHECK-INST: fdot z31.s, z31.b, z31.b +// CHECK-ENCODING: [0xff,0x87,0x7f,0x64] +// CHECK-ERROR: instruction requires: ssve-fp8dot4 or (sve2 and fp8dot4) +// CHECK-UNKNOWN: 647f87ff diff --git a/llvm/test/MC/AArch64/SVE2p1/fdot-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/fdot-diagnostics.s index bfcbbc4c89e6f..7a68b92ba30e1 100644 --- a/llvm/test/MC/AArch64/SVE2p1/fdot-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE2p1/fdot-diagnostics.s @@ -17,7 +17,7 @@ fdot z0.s, z0.h, z0.h[-1] // Invalid vector suffix fdot z0.h, z0.s, z0.s -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: fdot z0.h, z0.s, z0.s // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index 3c6d463fee259..d4c5d68cb3ca7 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -1713,27 +1713,39 @@ TEST(TargetParserTest, testAArch64Extension) { TEST(TargetParserTest, AArch64ExtensionFeatures) { std::vector Extensions = { - AArch64::AEK_CRC, AArch64::AEK_LSE, AArch64::AEK_RDM, - AArch64::AEK_CRYPTO, AArch64::AEK_SM4, AArch64::AEK_SHA3, - AArch64::AEK_SHA2, AArch64::AEK_AES, AArch64::AEK_DOTPROD, - AArch64::AEK_FP, AArch64::AEK_SIMD, AArch64::AEK_FP16, - AArch64::AEK_FP16FML, AArch64::AEK_PROFILE, AArch64::AEK_RAS, - AArch64::AEK_SVE, AArch64::AEK_SVE2, AArch64::AEK_SVE2AES, - AArch64::AEK_SVE2SM4, AArch64::AEK_SVE2SHA3, AArch64::AEK_SVE2BITPERM, - AArch64::AEK_RCPC, AArch64::AEK_RAND, AArch64::AEK_MTE, - AArch64::AEK_SSBS, AArch64::AEK_SB, AArch64::AEK_PREDRES, - AArch64::AEK_BF16, AArch64::AEK_I8MM, AArch64::AEK_F32MM, - AArch64::AEK_F64MM, AArch64::AEK_TME, AArch64::AEK_LS64, - AArch64::AEK_BRBE, AArch64::AEK_PAUTH, AArch64::AEK_FLAGM, - AArch64::AEK_SME, AArch64::AEK_SMEF64F64, AArch64::AEK_SMEI16I64, - AArch64::AEK_SME2, AArch64::AEK_HBC, AArch64::AEK_MOPS, - AArch64::AEK_PERFMON, AArch64::AEK_SVE2p1, AArch64::AEK_SME2p1, - AArch64::AEK_B16B16, AArch64::AEK_SMEF16F16, AArch64::AEK_CSSC, - AArch64::AEK_RCPC3, AArch64::AEK_THE, AArch64::AEK_D128, - AArch64::AEK_LSE128, AArch64::AEK_SPECRES2, AArch64::AEK_RASv2, - AArch64::AEK_ITE, AArch64::AEK_GCS, AArch64::AEK_FPMR, - AArch64::AEK_FP8, AArch64::AEK_FAMINMAX, AArch64::AEK_FP8FMA, - AArch64::AEK_SSVE_FP8FMA}; + AArch64::AEK_CRC, AArch64::AEK_LSE, + AArch64::AEK_RDM, AArch64::AEK_CRYPTO, + AArch64::AEK_SM4, AArch64::AEK_SHA3, + AArch64::AEK_SHA2, AArch64::AEK_AES, + AArch64::AEK_DOTPROD, AArch64::AEK_FP, + AArch64::AEK_SIMD, AArch64::AEK_FP16, + AArch64::AEK_FP16FML, AArch64::AEK_PROFILE, + AArch64::AEK_RAS, AArch64::AEK_SVE, + AArch64::AEK_SVE2, AArch64::AEK_SVE2AES, + AArch64::AEK_SVE2SM4, AArch64::AEK_SVE2SHA3, + AArch64::AEK_SVE2BITPERM, AArch64::AEK_RCPC, + AArch64::AEK_RAND, AArch64::AEK_MTE, + AArch64::AEK_SSBS, AArch64::AEK_SB, + AArch64::AEK_PREDRES, AArch64::AEK_BF16, + AArch64::AEK_I8MM, AArch64::AEK_F32MM, + AArch64::AEK_F64MM, AArch64::AEK_TME, + AArch64::AEK_LS64, AArch64::AEK_BRBE, + AArch64::AEK_PAUTH, AArch64::AEK_FLAGM, + AArch64::AEK_SME, AArch64::AEK_SMEF64F64, + AArch64::AEK_SMEI16I64, AArch64::AEK_SME2, + AArch64::AEK_HBC, AArch64::AEK_MOPS, + AArch64::AEK_PERFMON, AArch64::AEK_SVE2p1, + AArch64::AEK_SME2p1, AArch64::AEK_B16B16, + AArch64::AEK_SMEF16F16, AArch64::AEK_CSSC, + AArch64::AEK_RCPC3, AArch64::AEK_THE, + AArch64::AEK_D128, AArch64::AEK_LSE128, + AArch64::AEK_SPECRES2, AArch64::AEK_RASv2, + AArch64::AEK_ITE, AArch64::AEK_GCS, + AArch64::AEK_FPMR, AArch64::AEK_FP8, + AArch64::AEK_FAMINMAX, AArch64::AEK_FP8FMA, + AArch64::AEK_SSVE_FP8FMA, AArch64::AEK_FP8DOT2, + AArch64::AEK_SSVE_FP8DOT2, AArch64::AEK_FP8DOT4, + AArch64::AEK_SSVE_FP8DOT4}; std::vector Features; @@ -1810,6 +1822,10 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+faminmax")); EXPECT_TRUE(llvm::is_contained(Features, "+fp8fma")); EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8fma")); + EXPECT_TRUE(llvm::is_contained(Features, "+fp8dot2")); + EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8dot2")); + EXPECT_TRUE(llvm::is_contained(Features, "+fp8dot4")); + EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8dot4")); // Assuming we listed every extension above, this should produce the same // result. (note that AEK_NONE doesn't have a name so it won't be in the @@ -1938,6 +1954,10 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"faminmax", "nofaminmax", "+faminmax", "-faminmax"}, {"fp8fma", "nofp8fma", "+fp8fma", "-fp8fma"}, {"ssve-fp8fma", "nossve-fp8fma", "+ssve-fp8fma", "-ssve-fp8fma"}, + {"fp8dot2", "nofp8dot2", "+fp8dot2", "-fp8dot2"}, + {"ssve-fp8dot2", "nossve-fp8dot2", "+ssve-fp8dot2", "-ssve-fp8dot2"}, + {"fp8dot4", "nofp8dot4", "+fp8dot4", "-fp8dot4"}, + {"ssve-fp8dot4", "nossve-fp8dot4", "+ssve-fp8dot4", "-ssve-fp8dot4"}, }; for (unsigned i = 0; i < std::size(ArchExt); i++) {