Skip to content

Commit

Permalink
[AArch64] Make machine combiner patterns preserve MIFlags
Browse files Browse the repository at this point in the history
This is mainly done so that we don't lose the nofpexcept flag once we
start emitting it.

Differential Revision: https://reviews.llvm.org/D118621
  • Loading branch information
john-brawn-arm committed Feb 3, 2022
1 parent c39969e commit 94843ea
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 8 deletions.
6 changes: 4 additions & 2 deletions llvm/lib/CodeGen/TargetInstrInfo.cpp
Expand Up @@ -873,11 +873,13 @@ void TargetInstrInfo::reassociateOps(
MachineInstrBuilder MIB1 =
BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
.addReg(RegX, getKillRegState(KillX))
.addReg(RegY, getKillRegState(KillY));
.addReg(RegY, getKillRegState(KillY))
.setMIFlags(Prev.getFlags());
MachineInstrBuilder MIB2 =
BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
.addReg(RegA, getKillRegState(KillA))
.addReg(NewVR, getKillRegState(true));
.addReg(NewVR, getKillRegState(true))
.setMIFlags(Root.getFlags());

setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2);

Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Expand Up @@ -6214,6 +6214,14 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
if (MUL)
DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);

// Set the flags on the inserted instructions to be the merged flags of the
// instructions that we have combined.
uint16_t Flags = Root.getFlags();
if (MUL)
Flags = Root.mergeFlagsWith(*MUL);
for (auto *MI : InsInstrs)
MI->setFlags(Flags);
}

/// Replace csincr-branch sequence by simple conditional branch
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir
Expand Up @@ -6,7 +6,7 @@
# CHECK: [[C:%.*]]:fpr32 = COPY $s2
# CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1
# CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0
# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]]
# CHECK-NEXT: :fpr32 = nnan ninf nsz arcp contract afn reassoc FMADDSrrr [[B]], [[A]], [[C]]
---
name: scalar_fmadd_fast
alignment: 4
Expand Down Expand Up @@ -45,7 +45,7 @@ body: |
# CHECK: [[C:%.*]]:fpr32 = COPY $s2
# CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1
# CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0
# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]]
# CHECK-NEXT: :fpr32 = contract FMADDSrrr [[B]], [[A]], [[C]]

---
name: scalar_fmadd_contract
Expand Down Expand Up @@ -125,7 +125,7 @@ body: |
# CHECK: [[C:%.*]]:fpr32 = COPY $s2
# CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1
# CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0
# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]]
# CHECK-NEXT: :fpr32 = contract FMADDSrrr [[B]], [[A]], [[C]]

---
name: scalar_fmadd_contract_op1
Expand Down Expand Up @@ -206,7 +206,7 @@ body: |
# CHECK: [[C:%.*]]:fpr128 = COPY $q2
# CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1
# CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0
# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]]
# CHECK-NEXT: fpr128 = nnan ninf nsz arcp contract afn reassoc FMLAv2f64 [[C]], [[B]], [[A]]
---
name: vector_fmadd_fast
alignment: 4
Expand Down Expand Up @@ -245,7 +245,7 @@ body: |
# CHECK: [[C:%.*]]:fpr128 = COPY $q2
# CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1
# CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0
# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]]
# CHECK-NEXT: fpr128 = contract FMLAv2f64 [[C]], [[B]], [[A]]
---
name: vector_fmadd_contract
alignment: 4
Expand Down Expand Up @@ -324,7 +324,7 @@ body: |
# CHECK: [[C:%.*]]:fpr128 = COPY $q2
# CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1
# CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0
# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]]
# CHECK-NEXT: fpr128 = contract FMLAv2f64 [[C]], [[B]], [[A]]

---
name: vector_fmadd_contract_op1
Expand Down
132 changes: 132 additions & 0 deletions llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir
@@ -0,0 +1,132 @@
# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SAFE
# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu -enable-unsafe-fp-math %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE

# fadd without the reassoc flags can be reassociate only when unsafe fp math is
# enabled.
# CHECK-LABEL: name: fadd_no_reassoc
# CHECK: %4:fpr32 = FADDSrr %0, %1
# CHECK-SAFE-NEXT: %5:fpr32 = FADDSrr killed %4, %2
# CHECK-SAFE-NEXT: %6:fpr32 = FADDSrr killed %5, %3
# CHECK-UNSAFE-NEXT: %9:fpr32 = FADDSrr %2, %3
# CHECK-UNSAFE-NEXT: %6:fpr32 = FADDSrr killed %4, killed %9
---
name: fadd_no_reassoc
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: fpr32 }
- { id: 1, class: fpr32 }
- { id: 2, class: fpr32 }
- { id: 3, class: fpr32 }
- { id: 4, class: fpr32 }
- { id: 5, class: fpr32 }
- { id: 6, class: fpr32 }
liveins:
- { reg: '$s0', virtual-reg: '%0' }
- { reg: '$s1', virtual-reg: '%1' }
- { reg: '$s2', virtual-reg: '%2' }
- { reg: '$s3', virtual-reg: '%3' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $s0, $s1, $s2, $s3
%3:fpr32 = COPY $s3
%2:fpr32 = COPY $s2
%1:fpr32 = COPY $s1
%0:fpr32 = COPY $s0
%4:fpr32 = FADDSrr %0, %1
%5:fpr32 = FADDSrr killed %4, %2
%6:fpr32 = FADDSrr killed %5, %3
$s0 = COPY %6
RET_ReallyLR implicit $s0
# FIXME: We should be able to reassociate without unsafe fp math, but currently
# the reassoc flag is ignored.
# CHECK-LABEL: name: fadd_reassoc
# CHECK: %4:fpr32 = reassoc FADDSrr %0, %1
# CHECK-SAFE-NEXT: %5:fpr32 = reassoc FADDSrr killed %4, %2
# CHECK-SAFE-NEXT: %6:fpr32 = reassoc FADDSrr killed %5, %3
# CHECK-UNSAFE-NEXT: %9:fpr32 = reassoc FADDSrr %2, %3
# CHECK-UNSAFE-NEXT: %6:fpr32 = reassoc FADDSrr killed %4, killed %9
---
name: fadd_reassoc
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: fpr32 }
- { id: 1, class: fpr32 }
- { id: 2, class: fpr32 }
- { id: 3, class: fpr32 }
- { id: 4, class: fpr32 }
- { id: 5, class: fpr32 }
- { id: 6, class: fpr32 }
liveins:
- { reg: '$s0', virtual-reg: '%0' }
- { reg: '$s1', virtual-reg: '%1' }
- { reg: '$s2', virtual-reg: '%2' }
- { reg: '$s3', virtual-reg: '%3' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $s0, $s1, $s2, $s3
%3:fpr32 = COPY $s3
%2:fpr32 = COPY $s2
%1:fpr32 = COPY $s1
%0:fpr32 = COPY $s0
%4:fpr32 = reassoc FADDSrr %0, %1
%5:fpr32 = reassoc FADDSrr killed %4, %2
%6:fpr32 = reassoc FADDSrr killed %5, %3
$s0 = COPY %6
RET_ReallyLR implicit $s0
# Check that flags on the instructions are preserved after reassociation.
# CHECK-LABEL: name: fadd_flags
# CHECK: %4:fpr32 = nsz FADDSrr %0, %1
# CHECK-SAFE-NEXT: %5:fpr32 = nnan FADDSrr killed %4, %2
# CHECK-SAFE-NEXT: %6:fpr32 = ninf FADDSrr killed %5, %3
# CHECK-UNSAFE-NEXT: %9:fpr32 = nnan FADDSrr %2, %3
# CHECK-UNSAFE-NEXT: %6:fpr32 = ninf FADDSrr killed %4, killed %9
---
name: fadd_flags
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: fpr32 }
- { id: 1, class: fpr32 }
- { id: 2, class: fpr32 }
- { id: 3, class: fpr32 }
- { id: 4, class: fpr32 }
- { id: 5, class: fpr32 }
- { id: 6, class: fpr32 }
liveins:
- { reg: '$s0', virtual-reg: '%0' }
- { reg: '$s1', virtual-reg: '%1' }
- { reg: '$s2', virtual-reg: '%2' }
- { reg: '$s3', virtual-reg: '%3' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $s0, $s1, $s2, $s3
%3:fpr32 = COPY $s3
%2:fpr32 = COPY $s2
%1:fpr32 = COPY $s1
%0:fpr32 = COPY $s0
%4:fpr32 = nsz FADDSrr %0, %1
%5:fpr32 = nnan FADDSrr killed %4, %2
%6:fpr32 = ninf FADDSrr killed %5, %3
$s0 = COPY %6
RET_ReallyLR implicit $s0

0 comments on commit 94843ea

Please sign in to comment.