[ARM] Add early-clobber to MVE VCMLA.f32 #114995

ostannard · 2024-11-05T14:16:58Z

This instruction (but not the f16 variant) cannot us the same register for the output as either of the inputs, so it needs to be marked as early-clobber.

llvmbot · 2024-11-05T14:17:42Z

@llvm/pr-subscribers-backend-arm

Author: Oliver Stannard (ostannard)

Changes

This instruction (but not the f16 variant) cannot us the same register for the output as either of the inputs, so it needs to be marked as early-clobber.

Full diff: https://github.com/llvm/llvm-project/pull/114995.diff

2 Files Affected:

(modified) llvm/lib/Target/ARM/ARMInstrMVE.td (+5-5)
(modified) llvm/test/CodeGen/Thumb2/mve-vcmla.ll (+24)

diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 8c8403ac58b080..22af599f4f0859 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3583,10 +3583,10 @@ def ARMimmOneH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2620))))>; // 1.0 ha
 defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32, ARMimmOneF>;
 defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16, ARMimmOneH>;
 
-class MVE_VCMLA<string suffix, bits<2> size>
+class MVE_VCMLA<string suffix, bits<2> size, string cstr>
   : MVEFloatArithNeon<"vcmla", suffix, size{1}, (outs MQPR:$Qd),
                          (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
-                         "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", size, []> {
+                         "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src"#cstr, size, []> {
   bits<4> Qd;
   bits<4> Qn;
   bits<2> rot;
@@ -3603,8 +3603,8 @@ class MVE_VCMLA<string suffix, bits<2> size>
   let Inst{4} = 0b0;
 }
 
-multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI> {
-  def "" : MVE_VCMLA<VTI.Suffix, VTI.Size>;
+multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI, string cstr=""> {
+  def "" : MVE_VCMLA<VTI.Suffix, VTI.Size, cstr>;
   defvar Inst = !cast<Instruction>(NAME);
 
   let Predicates = [HasMVEFloat] in {
@@ -3633,7 +3633,7 @@ multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI> {
 }
 
 defm MVE_VCMLAf16 : MVE_VCMLA_m<MVE_v8f16>;
-defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32>;
+defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32, ",@earlyclobber $Qd">;
 
 class MVE_VADDSUBFMA_fp<string iname, string suffix, bits<2> size, bit bit_4,
                         bit bit_8, bit bit_21, dag iops=(ins),
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmla.ll b/llvm/test/CodeGen/Thumb2/mve-vcmla.ll
index d1976472e39460..df542be73c58cb 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmla.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmla.ll
@@ -121,3 +121,27 @@ entry:
   %res = fadd <4 x float> %d, %a
   ret <4 x float> %res
 }
+
+define arm_aapcs_vfpcc <8 x half> @same_register_f16(<8 x half> %a) {
+; CHECK-LABEL: same_register_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vcmla.f16 q0, q0, q0, #0
+; CHECK-NEXT:    bx lr
+entry:
+  %d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 0, <8 x half> zeroinitializer, <8 x half> %a, <8 x half> %a)
+  %res = fadd fast <8 x half> %d, %a
+  ret <8 x half> %res
+}
+
+define arm_aapcs_vfpcc <4 x float> @same_register_f32(<4 x float> %a) {
+; CHECK-LABEL: same_register_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov q1, q0
+; CHECK-NEXT:    vcmla.f32 q1, q0, q0, #0
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %a, <4 x float> %a)
+  %res = fadd fast <4 x float> %d, %a
+  ret <4 x float> %res
+}

davemgreen

LGTM, cheers

[ARM] Add early-clobber to MVE VCMLA.f32

8cae064

This instruction (but not the f16 variant) cannot us the same register for the output as either of the inputs, so it needs to be marked as early-clobber.

ostannard requested a review from davemgreen November 5, 2024 14:16

llvmbot added the backend:ARM label Nov 5, 2024

davemgreen approved these changes Nov 5, 2024

View reviewed changes

ostannard merged commit 9b016e3 into llvm:main Nov 6, 2024
10 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[ARM] Add early-clobber to MVE VCMLA.f32 #114995

[ARM] Add early-clobber to MVE VCMLA.f32 #114995

Uh oh!

ostannard commented Nov 5, 2024

Uh oh!

llvmbot commented Nov 5, 2024

Uh oh!

davemgreen left a comment

Uh oh!

Uh oh!

Uh oh!

[ARM] Add early-clobber to MVE VCMLA.f32 #114995

[ARM] Add early-clobber to MVE VCMLA.f32 #114995

Uh oh!

Conversation

ostannard commented Nov 5, 2024

Uh oh!

llvmbot commented Nov 5, 2024

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!