Skip to content

Commit 5cb14dc

Browse files
committed
[AArch64] Look through copy in MachineCombiner FMUL patterns.
This is a small addition to D99662, which added machine combiner patterns for FMUL(DUP(..)). Due to the way these are generated from ISel, they may also be FMUL(COPY(DUP(..))), which this patch now ignores the no-op COPY in. Differential Revision: https://reviews.llvm.org/D126632
1 parent 9771510 commit 5cb14dc

File tree

3 files changed

+15
-9
lines changed

3 files changed

+15
-9
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5170,6 +5170,10 @@ static bool getFMULPatterns(MachineInstr &Root,
51705170
MachineInstr *MI = nullptr;
51715171
if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
51725172
MI = MRI.getUniqueVRegDef(MO.getReg());
5173+
// Ignore No-op COPYs in FMUL(COPY(DUP(..)))
5174+
if (MI && MI->getOpcode() == TargetOpcode::COPY &&
5175+
MI->getOperand(1).getReg().isVirtual())
5176+
MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
51735177
if (MI && MI->getOpcode() == Opcode) {
51745178
Patterns.push_back(Pattern);
51755179
return true;
@@ -5441,6 +5445,9 @@ genIndexedMultiply(MachineInstr &Root,
54415445
MachineInstr *Dup =
54425446
MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
54435447

5448+
if (Dup->getOpcode() == TargetOpcode::COPY)
5449+
Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
5450+
54445451
Register DupSrcReg = Dup->getOperand(1).getReg();
54455452
MRI.clearKillFlags(DupSrcReg);
54465453
MRI.constrainRegClass(DupSrcReg, RC);

llvm/test/CodeGen/AArch64/machine-combiner-copy.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,15 @@ define void @fma_dup_f16(ptr noalias nocapture noundef readonly %A, half noundef
1818
; CHECK-NEXT: add x10, x1, #16
1919
; CHECK-NEXT: add x11, x0, #16
2020
; CHECK-NEXT: mov x12, x9
21-
; CHECK-NEXT: dup v1.8h, v0.h[0]
2221
; CHECK-NEXT: .LBB0_4: // %vector.body
2322
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
24-
; CHECK-NEXT: ldp q2, q3, [x11, #-16]
23+
; CHECK-NEXT: ldp q1, q2, [x11, #-16]
2524
; CHECK-NEXT: subs x12, x12, #16
2625
; CHECK-NEXT: add x11, x11, #32
27-
; CHECK-NEXT: ldp q4, q5, [x10, #-16]
28-
; CHECK-NEXT: fmla v4.8h, v2.8h, v1.8h
29-
; CHECK-NEXT: fmla v5.8h, v3.8h, v0.h[0]
30-
; CHECK-NEXT: stp q4, q5, [x10, #-16]
26+
; CHECK-NEXT: ldp q3, q4, [x10, #-16]
27+
; CHECK-NEXT: fmla v3.8h, v1.8h, v0.h[0]
28+
; CHECK-NEXT: fmla v4.8h, v2.8h, v0.h[0]
29+
; CHECK-NEXT: stp q3, q4, [x10, #-16]
3130
; CHECK-NEXT: add x10, x10, #32
3231
; CHECK-NEXT: b.ne .LBB0_4
3332
; CHECK-NEXT: // %bb.5: // %middle.block

llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -588,12 +588,12 @@ body: |
588588
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY3]], %subreg.dsub
589589
; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr64 = COPY [[COPY1]]
590590
; CHECK-NEXT: [[COPY5:%[0-9]+]]:fpr64 = COPY [[COPY2]]
591-
; CHECK-NEXT: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane killed [[INSERT_SUBREG]], 0
591+
; CHECK-NEXT: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0
592592
; CHECK-NEXT: [[COPY6:%[0-9]+]]:fpr64 = COPY [[DUPv2i32lane]]
593593
; CHECK-NEXT: {{ $}}
594594
; CHECK-NEXT: bb.1:
595-
; CHECK-NEXT: [[FMULv2f32_:%[0-9]+]]:fpr64 = FMULv2f32 [[COPY5]], [[COPY6]]
596-
; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2f32_]], [[COPY4]]
595+
; CHECK-NEXT: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY5]], [[INSERT_SUBREG]], 0
596+
; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2i32_indexed]], [[COPY4]]
597597
; CHECK-NEXT: STRDui killed [[FADDv2f32_]], [[COPY]], 0 :: (store (s64), align 16)
598598
; CHECK-NEXT: B %bb.1
599599
bb.0:

0 commit comments

Comments
 (0)