Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions clang/include/clang/Basic/arm_mve.td
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,9 @@ multiclass FMA<bit add> {
// second multiply input.
defvar m2_cg = !if(add, (id $m2), (fneg $m2));

defvar unpred_cg = (IRIntBase<"fma", [Vector]> $m1, m2_cg, $addend);
defvar fma = strictFPAlt<IRIntBase<"fma", [Vector]>,
IRInt<"fma", [Vector]>>;
defvar unpred_cg = (fma $m1, m2_cg, $addend);
defvar pred_cg = (IRInt<"fma_predicated", [Vector, Predicate]>
$m1, m2_cg, $addend, $pred);

Expand Down Expand Up @@ -723,7 +725,7 @@ multiclass compare_with_pred<string condname, dag arguments,
NameOverride<"vcmp" # condname # "q_m" # suffix>;
}

multiclass compare<string condname, IRBuilder cmpop> {
multiclass compare<string condname, Builder cmpop> {
// Make all four variants of a comparison: the vector/vector and
// vector/scalar forms, each using compare_with_pred to make a
// predicated and unpredicated version.
Expand Down
9 changes: 5 additions & 4 deletions clang/include/clang/Basic/arm_mve_defs.td
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ class IRBuilderAddrParam<int index_> : IRBuilderParam<index_>;
class IRBuilderIntParam<int index_, string type_> : IRBuilderParam<index_> {
string type = type_;
}
class IRBuilderBase {
class Builder {}
class IRBuilderBase : Builder {
// The prefix of the function call, including an open parenthesis.
string prefix;

Expand Down Expand Up @@ -166,7 +167,7 @@ def address;
// Another node class you can use in the codegen dag. This one corresponds to
// an IR intrinsic function, which has to be specialized to a particular list
// of types.
class IRIntBase<string name_, list<Type> params_ = [], bit appendKind_ = 0> {
class IRIntBase<string name_, list<Type> params_ = [], bit appendKind_ = 0> : Builder {
string intname = name_; // base name of the intrinsic
list<Type> params = params_; // list of parameter types

Expand Down Expand Up @@ -214,8 +215,8 @@ def bitsize;

// strictFPAlt allows a node to have different code generation under strict-fp.
// TODO: The standard node can be IRBuilderBase or IRIntBase.
class strictFPAlt<IRBuilderBase standard_, IRIntBase strictfp_> {
IRBuilderBase standard = standard_;
class strictFPAlt<Builder standard_, IRIntBase strictfp_> : Builder {
Builder standard = standard_;
IRIntBase strictfp = strictfp_;
}

Expand Down
1,012 changes: 692 additions & 320 deletions clang/test/CodeGen/arm-mve-intrinsics/ternary.c

Large diffs are not rendered by default.

15 changes: 11 additions & 4 deletions clang/utils/TableGen/MveEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1260,7 +1260,9 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D,
for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i)
Args.push_back(getCodeForDagArg(D, i, Scope, Param));

auto GenIRBuilderBase = [&](const Record *Op) {
auto GenIRBuilderBase = [&](const Record *Op) -> Result::Ptr {
assert(Op->isSubClassOf("IRBuilderBase") &&
"Expected IRBuilderBase in GenIRBuilderBase\n");
std::set<unsigned> AddressArgs;
std::map<unsigned, std::string> IntegerArgs;
for (const Record *sp : Op->getValueAsListOfDefs("special_params")) {
Expand All @@ -1274,7 +1276,9 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D,
return std::make_shared<IRBuilderResult>(Op->getValueAsString("prefix"),
Args, AddressArgs, IntegerArgs);
};
auto GenIRIntBase = [&](const Record *Op) {
auto GenIRIntBase = [&](const Record *Op) -> Result::Ptr {
assert(Op->isSubClassOf("IRIntBase") &&
"Expected IRIntBase in GenIRIntBase\n");
std::vector<const Type *> ParamTypes;
for (const Record *RParam : Op->getValueAsListOfDefs("params"))
ParamTypes.push_back(getType(RParam, Param));
Expand All @@ -1289,8 +1293,11 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D,
} else if (Op->isSubClassOf("IRIntBase")) {
return GenIRIntBase(Op);
} else if (Op->isSubClassOf("strictFPAlt")) {
auto Standard = GenIRBuilderBase(Op->getValueAsDef("standard"));
auto StrictFp = GenIRIntBase(Op->getValueAsDef("strictfp"));
auto StardardBuilder = Op->getValueAsDef("standard");
Result::Ptr Standard = StardardBuilder->isSubClassOf("IRBuilder")
? GenIRBuilderBase(StardardBuilder)
: GenIRIntBase(StardardBuilder);
Result::Ptr StrictFp = GenIRIntBase(Op->getValueAsDef("strictfp"));
return std::make_shared<StrictFpAltResult>(Standard, StrictFp);
} else {
PrintFatalError("Unsupported dag node " + Op->getName());
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsARM.td
Original file line number Diff line number Diff line change
Expand Up @@ -1362,6 +1362,9 @@ def int_arm_mve_vqmovn_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */,
llvm_i32_ty /* top half */, llvm_anyvector_ty /* pred */], [IntrNoMem]>;

def int_arm_mve_fma: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
LLVMMatchType<0> /* addend */], [IntrNoMem]>;
// fma_predicated returns the add operand for disabled lanes.
def int_arm_mve_fma_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/ARM/ARMInstrMVE.td
Original file line number Diff line number Diff line change
Expand Up @@ -3723,6 +3723,10 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
if fms then {
def : Pat<(VTI.Vec (fma (fneg m1), m2, add)),
(Inst $add, $m1, $m2)>;
def : Pat<(VTI.Vec (int_arm_mve_fma (fneg m1), m2, add)),
(Inst $add, $m1, $m2)>;
def : Pat<(VTI.Vec (int_arm_mve_fma m1, (fneg m2), add)),
(Inst $add, $m1, $m2)>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma (fneg m1), m2, add)),
add)),
Expand All @@ -3734,6 +3738,8 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
} else {
def : Pat<(VTI.Vec (fma m1, m2, add)),
(Inst $add, $m1, $m2)>;
def : Pat<(VTI.Vec (int_arm_mve_fma m1, m2, add)),
(Inst $add, $m1, $m2)>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma m1, m2, add)),
add)),
Expand Down Expand Up @@ -5672,6 +5678,8 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
if scalar_addend then {
def : Pat<(VTI.Vec (fma v1, v2, vs)),
(VTI.Vec (Inst v1, v2, is))>;
def : Pat<(VTI.Vec (int_arm_mve_fma v1, v2, vs)),
(VTI.Vec (Inst v1, v2, is))>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma v1, v2, vs)),
v1)),
Expand All @@ -5681,6 +5689,10 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
(VTI.Vec (Inst v2, v1, is))>;
def : Pat<(VTI.Vec (fma vs, v1, v2)),
(VTI.Vec (Inst v2, v1, is))>;
def : Pat<(VTI.Vec (int_arm_mve_fma v1, vs, v2)),
(VTI.Vec (Inst v2, v1, is))>;
def : Pat<(VTI.Vec (int_arm_mve_fma vs, v1, v2)),
(VTI.Vec (Inst v2, v1, is))>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma vs, v2, v1)),
v1)),
Expand Down
124 changes: 112 additions & 12 deletions llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -o - %s | FileCheck %s

define arm_aapcs_vfpcc <8 x half> @test_vaddq_f16(<8 x half> %a, <8 x half> %b) {
define arm_aapcs_vfpcc <8 x half> @test_vaddq_f16(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-LABEL: test_vaddq_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vadd.f16 q0, q0, q1
Expand All @@ -11,7 +11,7 @@ entry:
ret <8 x half> %0
}

define arm_aapcs_vfpcc <4 x float> @test_vaddq_f32(<4 x float> %a, <4 x float> %b) {
define arm_aapcs_vfpcc <4 x float> @test_vaddq_f32(<4 x float> %a, <4 x float> %b) #0 {
; CHECK-LABEL: test_vaddq_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vadd.f32 q0, q0, q1
Expand All @@ -21,7 +21,7 @@ entry:
ret <4 x float> %0
}

define arm_aapcs_vfpcc <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) {
define arm_aapcs_vfpcc <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-LABEL: test_vsubq_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vsub.f16 q0, q0, q1
Expand All @@ -31,7 +31,7 @@ entry:
ret <8 x half> %0
}

define arm_aapcs_vfpcc <4 x float> @test_vsubq_f32(<4 x float> %a, <4 x float> %b) {
define arm_aapcs_vfpcc <4 x float> @test_vsubq_f32(<4 x float> %a, <4 x float> %b) #0 {
; CHECK-LABEL: test_vsubq_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vsub.f32 q0, q0, q1
Expand All @@ -41,7 +41,7 @@ entry:
ret <4 x float> %0
}

define arm_aapcs_vfpcc <8 x half> @test_vmulq_f16(<8 x half> %a, <8 x half> %b) {
define arm_aapcs_vfpcc <8 x half> @test_vmulq_f16(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-LABEL: test_vmulq_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmul.f16 q0, q0, q1
Expand All @@ -51,7 +51,7 @@ entry:
ret <8 x half> %0
}

define arm_aapcs_vfpcc <4 x float> @test_vmulq_f32(<4 x float> %a, <4 x float> %b) {
define arm_aapcs_vfpcc <4 x float> @test_vmulq_f32(<4 x float> %a, <4 x float> %b) #0 {
; CHECK-LABEL: test_vmulq_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmul.f32 q0, q0, q1
Expand All @@ -64,7 +64,7 @@ entry:



define arm_aapcs_vfpcc <8 x half> @test_vaddq_f16_splat(<8 x half> %a, half %b) {
define arm_aapcs_vfpcc <8 x half> @test_vaddq_f16_splat(<8 x half> %a, half %b) #0 {
; CHECK-LABEL: test_vaddq_f16_splat:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 r0, s4
Expand All @@ -77,7 +77,7 @@ entry:
ret <8 x half> %0
}

define arm_aapcs_vfpcc <4 x float> @test_vaddq_f32_splat(<4 x float> %a, float %b) {
define arm_aapcs_vfpcc <4 x float> @test_vaddq_f32_splat(<4 x float> %a, float %b) #0 {
; CHECK-LABEL: test_vaddq_f32_splat:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s4
Expand All @@ -90,7 +90,7 @@ entry:
ret <4 x float> %0
}

define arm_aapcs_vfpcc <8 x half> @test_vsubq_f16_splat(<8 x half> %a, half %b) {
define arm_aapcs_vfpcc <8 x half> @test_vsubq_f16_splat(<8 x half> %a, half %b) #0 {
; CHECK-LABEL: test_vsubq_f16_splat:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 r0, s4
Expand All @@ -103,7 +103,7 @@ entry:
ret <8 x half> %0
}

define arm_aapcs_vfpcc <4 x float> @test_vsubq_f32_splat(<4 x float> %a, float %b) {
define arm_aapcs_vfpcc <4 x float> @test_vsubq_f32_splat(<4 x float> %a, float %b) #0 {
; CHECK-LABEL: test_vsubq_f32_splat:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s4
Expand All @@ -116,7 +116,7 @@ entry:
ret <4 x float> %0
}

define arm_aapcs_vfpcc <8 x half> @test_vmulq_f16_splat(<8 x half> %a, half %b) {
define arm_aapcs_vfpcc <8 x half> @test_vmulq_f16_splat(<8 x half> %a, half %b) #0 {
; CHECK-LABEL: test_vmulq_f16_splat:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 r0, s4
Expand All @@ -129,7 +129,7 @@ entry:
ret <8 x half> %0
}

define arm_aapcs_vfpcc <4 x float> @test_vmulq_f32_splat(<4 x float> %a, float %b) {
define arm_aapcs_vfpcc <4 x float> @test_vmulq_f32_splat(<4 x float> %a, float %b) #0 {
; CHECK-LABEL: test_vmulq_f32_splat:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s4
Expand All @@ -141,3 +141,103 @@ entry:
%0 = tail call <4 x float> @llvm.arm.mve.vmul.v4f32(<4 x float> %a, <4 x float> %s)
ret <4 x float> %0
}

define arm_aapcs_vfpcc <4 x float> @fma_v4f32(<4 x float> %dst, <4 x float> %s1, <4 x float> %s2) #0 {
; CHECK-LABEL: fma_v4f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vfma.f32 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x float> @llvm.arm.mve.fma.v4f32(<4 x float> %s1, <4 x float> %s2, <4 x float> %dst)
ret <4 x float> %0
}

define arm_aapcs_vfpcc <8 x half> @fma_v8f16(<8 x half> %dst, <8 x half> %s1, <8 x half> %s2) #0 {
; CHECK-LABEL: fma_v8f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vfma.f16 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x half> @llvm.arm.mve.fma.v8f16(<8 x half> %s1, <8 x half> %s2, <8 x half> %dst)
ret <8 x half> %0
}

define arm_aapcs_vfpcc <4 x float> @fma_n_v8f16(<4 x float> %s1, <4 x float> %s2, float %s3) #0 {
; CHECK-LABEL: fma_n_v8f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s8
; CHECK-NEXT: vfma.f32 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%i = insertelement <4 x float> poison, float %s3, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> poison, <4 x i32> zeroinitializer
%0 = tail call <4 x float> @llvm.arm.mve.fma.v4f32(<4 x float> %s2, <4 x float> %sp, <4 x float> %s1)
ret <4 x float> %0
}

define arm_aapcs_vfpcc <8 x half> @fma_n_v4f32(<8 x half> %s1, <8 x half> %s2, half %s3) #0 {
; CHECK-LABEL: fma_n_v4f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 r0, s8
; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%i = insertelement <8 x half> poison, half %s3, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> poison, <8 x i32> zeroinitializer
%0 = tail call <8 x half> @llvm.arm.mve.fma.v8f16(<8 x half> %s2, <8 x half> %sp, <8 x half> %s1)
ret <8 x half> %0
}

define arm_aapcs_vfpcc <4 x float> @fms_v4f32(<4 x float> %dst, <4 x float> %s1, <4 x float> %s2) #0 {
; CHECK-LABEL: fms_v4f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vfms.f32 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%c = fneg <4 x float> %s1
%0 = tail call <4 x float> @llvm.arm.mve.fma.v4f32(<4 x float> %c, <4 x float> %s2, <4 x float> %dst)
ret <4 x float> %0
}

define arm_aapcs_vfpcc <8 x half> @fms_v8f16(<8 x half> %dst, <8 x half> %s1, <8 x half> %s2) #0 {
; CHECK-LABEL: fms_v8f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vfms.f16 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%c = fneg <8 x half> %s1
%0 = tail call <8 x half> @llvm.arm.mve.fma.v8f16(<8 x half> %c, <8 x half> %s2, <8 x half> %dst)
ret <8 x half> %0
}

define arm_aapcs_vfpcc <4 x float> @fms_n_v8f16(<4 x float> %s1, <4 x float> %s2, float %s3) #0 {
; CHECK-LABEL: fms_n_v8f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s8
; CHECK-NEXT: vdup.32 q2, r0
; CHECK-NEXT: vfms.f32 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%c = fneg <4 x float> %s2
%i = insertelement <4 x float> poison, float %s3, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> poison, <4 x i32> zeroinitializer
%0 = tail call <4 x float> @llvm.arm.mve.fma.v4f32(<4 x float> %c, <4 x float> %sp, <4 x float> %s1)
ret <4 x float> %0
}

define arm_aapcs_vfpcc <8 x half> @fms_n_v4f32(<8 x half> %s1, <8 x half> %s2, half %s3) #0 {
; CHECK-LABEL: fms_n_v4f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 r0, s8
; CHECK-NEXT: vdup.16 q2, r0
; CHECK-NEXT: vfms.f16 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%c = fneg <8 x half> %s2
%i = insertelement <8 x half> poison, half %s3, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> poison, <8 x i32> zeroinitializer
%0 = tail call <8 x half> @llvm.arm.mve.fma.v8f16(<8 x half> %c, <8 x half> %sp, <8 x half> %s1)
ret <8 x half> %0
}

attributes #0 = { strictfp }