Skip to content

Commit

Permalink
[ARM] Improve detection of fallthough when aligning blocks
Browse files Browse the repository at this point in the history
We align non-fallthrough branches under Cortex-M at O3 to lead to fewer
instruction fetches. This improves that for the block after a LE or
LETP. These blocks will still have terminating branches until the
LowOverheadLoops pass is run (as they are not handled by analyzeBranch,
the branch is not removed until later), so canFallThrough will return
false. These extra branches will eventually be removed, leaving a
fallthrough, so treat them as such and don't add unnecessary alignments.

Differential Revision: https://reviews.llvm.org/D107810
  • Loading branch information
davemgreen committed Sep 27, 2021
1 parent 1b49a72 commit bb2d23d
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 5 deletions.
25 changes: 22 additions & 3 deletions llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
Expand Up @@ -18,6 +18,7 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MVETailPredUtils.h"
#include "Thumb2InstrInfo.h"
#include "Utils/ARMBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
Expand Down Expand Up @@ -340,12 +341,12 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
// Align blocks where the previous block does not fall through. This may add
// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a
// measure of how much to align, and only runs at CodeGenOpt::Aggressive.
static bool AlignBlocks(MachineFunction *MF) {
static bool AlignBlocks(MachineFunction *MF, const ARMSubtarget *STI) {
if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive ||
MF->getFunction().hasOptSize())
return false;

auto *TLI = MF->getSubtarget().getTargetLowering();
auto *TLI = STI->getTargetLowering();
const Align Alignment = TLI->getPrefLoopAlignment();
if (Alignment < 4)
return false;
Expand All @@ -357,7 +358,25 @@ static bool AlignBlocks(MachineFunction *MF) {
Changed = true;
MBB.setAlignment(Alignment);
}

PrevCanFallthough = MBB.canFallThrough();

// For LOB's, the ARMLowOverheadLoops pass may remove the unconditional
// branch later in the pipeline.
if (STI->hasLOB()) {
for (const auto &MI : reverse(MBB.terminators())) {
if (MI.getOpcode() == ARM::t2B &&
MI.getOperand(0).getMBB() == MBB.getNextNode())
continue;
if (isLoopStart(MI) || MI.getOpcode() == ARM::t2LoopEnd ||
MI.getOpcode() == ARM::t2LoopEndDec) {
PrevCanFallthough = true;
break;
}
// Any other terminator - nothing to do
break;
}
}
}

return Changed;
Expand Down Expand Up @@ -406,7 +425,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
}

// Align any non-fallthrough blocks
MadeChange |= AlignBlocks(MF);
MadeChange |= AlignBlocks(MF, STI);

// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll
Expand Up @@ -14,7 +14,6 @@ define i32 @loop(i32* nocapture readonly %x) {
; CHECK-NEXT: ldr r2, [r0], #4
; CHECK-NEXT: add r1, r2
; CHECK-NEXT: le lr, .LBB0_1
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: pop {r7, pc}
Expand Down Expand Up @@ -54,7 +53,6 @@ define i64 @loopif(i32* nocapture readonly %x, i32 %y, i32 %n) {
; CHECK-NEXT: ldr r2, [r12], #4
; CHECK-NEXT: smlal r0, r3, r2, r1
; CHECK-NEXT: le lr, .LBB1_2
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: pop {r7, pc}
Expand Down

0 comments on commit bb2d23d

Please sign in to comment.