16 changes: 16 additions & 0 deletions llvm/lib/CodeGen/MachineLoopInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,22 @@ MachineBasicBlock *MachineLoop::findLoopControlBlock() {
return nullptr;
}

DebugLoc MachineLoop::getStartLoc() const {
// Try the pre-header first.
if (MachineBasicBlock *PHeadMBB = getLoopPreheader())
if (const BasicBlock *PHeadBB = PHeadMBB->getBasicBlock())
if (DebugLoc DL = PHeadBB->getTerminator()->getDebugLoc())
return DL;

// If we have no pre-header or there are no instructions with debug
// info in it, try the header.
if (MachineBasicBlock *HeadMBB = getHeader())
if (const BasicBlock *HeadBB = HeadMBB->getBasicBlock())
return HeadBB->getTerminator()->getDebugLoc();

return DebugLoc();
}

MachineBasicBlock *
MachineLoopInfo::findLoopPreheader(MachineLoop *L,
bool SpeculativePreheader) const {
Expand Down
90 changes: 90 additions & 0 deletions llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
///===- MachineOptimizationRemarkEmitter.cpp - Opt Diagnostic -*- C++ -*---===//
///
/// The LLVM Compiler Infrastructure
///
/// This file is distributed under the University of Illinois Open Source
/// License. See LICENSE.TXT for details.
///
///===---------------------------------------------------------------------===//
/// \file
/// Optimization diagnostic interfaces for machine passes. It's packaged as an
/// analysis pass so that by using this service passes become dependent on MBFI
/// as well. MBFI is used to compute the "hotness" of the diagnostic message.
///
///===---------------------------------------------------------------------===//

#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/LLVMContext.h"

using namespace llvm;

Optional<uint64_t>
MachineOptimizationRemarkEmitter::computeHotness(const MachineBasicBlock &MBB) {
if (!MBFI)
return None;

return MBFI->getBlockProfileCount(&MBB);
}

void MachineOptimizationRemarkEmitter::computeHotness(
DiagnosticInfoMIROptimization &Remark) {
const MachineBasicBlock *MBB = Remark.getBlock();
if (MBB)
Remark.setHotness(computeHotness(*MBB));
}

void MachineOptimizationRemarkEmitter::emit(
DiagnosticInfoOptimizationBase &OptDiagCommon) {
auto &OptDiag = cast<DiagnosticInfoMIROptimization>(OptDiagCommon);
computeHotness(OptDiag);

LLVMContext &Ctx = MF.getFunction()->getContext();
yaml::Output *Out = Ctx.getDiagnosticsOutputFile();
if (Out) {
auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiagCommon);
*Out << P;
}
// FIXME: now that IsVerbose is part of DI, filtering for this will be moved
// from here to clang.
if (!OptDiag.isVerbose() || shouldEmitVerbose())
Ctx.diagnose(OptDiag);
}

MachineOptimizationRemarkEmitterPass::MachineOptimizationRemarkEmitterPass()
: MachineFunctionPass(ID) {
initializeMachineOptimizationRemarkEmitterPassPass(
*PassRegistry::getPassRegistry());
}

bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction(
MachineFunction &MF) {
MachineBlockFrequencyInfo *MBFI;

if (MF.getFunction()->getContext().getDiagnosticHotnessRequested())
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
else
MBFI = nullptr;

ORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
return false;
}

void MachineOptimizationRemarkEmitterPass::getAnalysisUsage(
AnalysisUsage &AU) const {
AU.addRequired<MachineBlockFrequencyInfo>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}

char MachineOptimizationRemarkEmitterPass::ID = 0;
static const char ore_name[] = "Machine Optimization Remark Emitter";
#define ORE_NAME "machine-opt-remark-emitter"

INITIALIZE_PASS_BEGIN(MachineOptimizationRemarkEmitterPass, ORE_NAME, ore_name,
false, true)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_END(MachineOptimizationRemarkEmitterPass, ORE_NAME, ore_name,
false, true)
84 changes: 84 additions & 0 deletions llvm/lib/CodeGen/RegAllocGreedy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
Expand Down Expand Up @@ -125,6 +127,7 @@ class RAGreedy : public MachineFunctionPass,
MachineBlockFrequencyInfo *MBFI;
MachineDominatorTree *DomTree;
MachineLoopInfo *Loops;
MachineOptimizationRemarkEmitter *ORE;
EdgeBundles *Bundles;
SpillPlacement *SpillPlacer;
LiveDebugVariables *DebugVars;
Expand Down Expand Up @@ -419,6 +422,20 @@ class RAGreedy : public MachineFunctionPass,
void collectHintInfo(unsigned, HintsInfo &);

bool isUnusedCalleeSavedReg(unsigned PhysReg) const;

/// Compute and report the number of spills and reloads for a loop.
void reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
unsigned &FoldedReloads, unsigned &Spills,
unsigned &FoldedSpills);

/// Report the number of spills and reloads for each loop.
void reportNumberOfSplillsReloads() {
for (MachineLoop *L : *Loops) {
unsigned Reloads, FoldedReloads, Spills, FoldedSpills;
reportNumberOfSplillsReloads(L, Reloads, FoldedReloads, Spills,
FoldedSpills);
}
}
};
} // end anonymous namespace

Expand All @@ -439,6 +456,7 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(RAGreedy, "greedy",
"Greedy Register Allocator", false, false)

Expand Down Expand Up @@ -490,6 +508,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveRegMatrix>();
AU.addRequired<EdgeBundles>();
AU.addRequired<SpillPlacement>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}

Expand Down Expand Up @@ -2611,6 +2630,69 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
return 0;
}

void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
unsigned &FoldedReloads,
unsigned &Spills,
unsigned &FoldedSpills) {
Reloads = 0;
FoldedReloads = 0;
Spills = 0;
FoldedSpills = 0;

// Sum up the spill and reloads in subloops.
for (MachineLoop *SubLoop : *L) {
unsigned SubReloads;
unsigned SubFoldedReloads;
unsigned SubSpills;
unsigned SubFoldedSpills;

reportNumberOfSplillsReloads(SubLoop, SubReloads, SubFoldedReloads,
SubSpills, SubFoldedSpills);
Reloads += SubReloads;
FoldedReloads += SubFoldedReloads;
Spills += SubSpills;
FoldedSpills += SubFoldedSpills;
}

const MachineFrameInfo &MFI = MF->getFrameInfo();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
int FI;

for (MachineBasicBlock *MBB : L->getBlocks())
// Handle blocks that were not included in subloops.
if (Loops->getLoopFor(MBB) == L)
for (MachineInstr &MI : *MBB) {
const MachineMemOperand *MMO;

if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI))
++Reloads;
else if (TII->hasLoadFromStackSlot(MI, MMO, FI) &&
MFI.isSpillSlotObjectIndex(FI))
++FoldedReloads;
else if (TII->isStoreToStackSlot(MI, FI) &&
MFI.isSpillSlotObjectIndex(FI))
++Spills;
else if (TII->hasStoreToStackSlot(MI, MMO, FI) &&
MFI.isSpillSlotObjectIndex(FI))
++FoldedSpills;
}

if (Reloads || FoldedReloads || Spills || FoldedSpills) {
using namespace ore;
MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload",
L->getStartLoc(), L->getHeader());
if (Spills)
R << NV("NumSpills", Spills) << " spills ";
if (FoldedSpills)
R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
if (Reloads)
R << NV("NumReloads", Reloads) << " reloads ";
if (FoldedReloads)
R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
ORE->emit(R << "generated in loop");
}
}

bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
<< "********** Function: " << mf.getName() << '\n');
Expand All @@ -2633,6 +2715,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
Indexes = &getAnalysis<SlotIndexes>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
DomTree = &getAnalysis<MachineDominatorTree>();
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
Loops = &getAnalysis<MachineLoopInfo>();
Bundles = &getAnalysis<EdgeBundles>();
Expand All @@ -2658,6 +2741,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
allocatePhysRegs();
tryHintsRecoloring();
postOptimization();
reportNumberOfSplillsReloads();

releaseMemory();
return true;
Expand Down
15 changes: 7 additions & 8 deletions llvm/lib/IR/DiagnosticInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,9 @@ OptimizationRemark::OptimizationRemark(const char *PassName,
RemarkName, *Inst->getParent()->getParent(),
Inst->getDebugLoc(), Inst->getParent()) {}

bool OptimizationRemark::isEnabled() const {
bool OptimizationRemark::isEnabled(StringRef PassName) {
return PassRemarksOptLoc.Pattern &&
PassRemarksOptLoc.Pattern->match(getPassName());
PassRemarksOptLoc.Pattern->match(PassName);
}

OptimizationRemarkMissed::OptimizationRemarkMissed(const char *PassName,
Expand All @@ -243,9 +243,9 @@ OptimizationRemarkMissed::OptimizationRemarkMissed(const char *PassName,
*Inst->getParent()->getParent(),
Inst->getDebugLoc(), Inst->getParent()) {}

bool OptimizationRemarkMissed::isEnabled() const {
bool OptimizationRemarkMissed::isEnabled(StringRef PassName) {
return PassRemarksMissedOptLoc.Pattern &&
PassRemarksMissedOptLoc.Pattern->match(getPassName());
PassRemarksMissedOptLoc.Pattern->match(PassName);
}

OptimizationRemarkAnalysis::OptimizationRemarkAnalysis(const char *PassName,
Expand Down Expand Up @@ -273,10 +273,9 @@ OptimizationRemarkAnalysis::OptimizationRemarkAnalysis(enum DiagnosticKind Kind,
*cast<BasicBlock>(CodeRegion)->getParent(),
DLoc, CodeRegion) {}

bool OptimizationRemarkAnalysis::isEnabled() const {
return shouldAlwaysPrint() ||
(PassRemarksAnalysisOptLoc.Pattern &&
PassRemarksAnalysisOptLoc.Pattern->match(getPassName()));
bool OptimizationRemarkAnalysis::isEnabled(StringRef PassName) {
return PassRemarksAnalysisOptLoc.Pattern &&
PassRemarksAnalysisOptLoc.Pattern->match(PassName);
}

void DiagnosticInfoMIRParser::print(DiagnosticPrinter &DP) const {
Expand Down
65 changes: 65 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-spill-remarks.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -aarch64-neon-syntax=apple -pass-remarks-missed=regalloc 2>&1 | FileCheck -check-prefix=REMARK %s
; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -aarch64-neon-syntax=apple 2>&1 | FileCheck -check-prefix=NO_REMARK %s

; This has two nested loops, each with one value that has to be spilled and
; then reloaded.

; (loop3:)
; REMARK: remark: /tmp/kk.c:3:20: 1 spills 1 reloads generated in loop
; (loop2:)
; REMARK: remark: /tmp/kk.c:2:20: 1 spills 1 reloads generated in loop
; (loop:)
; REMARK: remark: /tmp/kk.c:1:20: 2 spills 2 reloads generated in loop

; NO_REMARK-NOT: remark

define void @fpr128(<4 x float>* %p) nounwind ssp {
entry:
br label %loop, !dbg !8

loop:
%i = phi i32 [ 0, %entry], [ %i.2, %end2 ]
br label %loop2, !dbg !9

loop2:
%j = phi i32 [ 0, %loop], [ %j.2, %loop2 ]
call void asm sideeffect "; inlineasm", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15},~{q16},~{q17},~{q18},~{q19},~{q20},~{q21},~{q22},~{q23},~{q24},~{q25},~{q26},~{q27},~{q28},~{q29},~{q30},~{q31},~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{memory}"() nounwind
%j.2 = add i32 %j, 1
%c2 = icmp slt i32 %j.2, 100
br i1 %c2, label %loop2, label %end2

end2:
call void asm sideeffect "; inlineasm", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15},~{q16},~{q17},~{q18},~{q19},~{q20},~{q21},~{q22},~{q23},~{q24},~{q25},~{q26},~{q27},~{q28},~{q29},~{q30},~{q31},~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{memory}"() nounwind
%i.2 = add i32 %i, 1
%c = icmp slt i32 %i.2, 100
br i1 %c, label %loop, label %end

end:
br label %loop3

loop3:
%k = phi i32 [ 0, %end], [ %k.2, %loop3 ]
call void asm sideeffect "; inlineasm", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15},~{q16},~{q17},~{q18},~{q19},~{q20},~{q21},~{q22},~{q23},~{q24},~{q25},~{q26},~{q27},~{q28},~{q29},~{q30},~{q31},~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{memory}"() nounwind
%k.2 = add i32 %k, 1
%c3 = icmp slt i32 %k.2, 100
br i1 %c3, label %loop3, label %end3, !dbg !10

end3:
ret void
}

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
!llvm.ident = !{!5}

!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "/tmp/kk.c", directory: "/tmp")
!2 = !{}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"PIC Level", i32 2}
!5 = !{!"clang version 3.9.0 "}
!6 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2)
!7 = !DISubroutineType(types: !2)
!8 = !DILocation(line: 1, column: 20, scope: !6)
!9 = !DILocation(line: 2, column: 20, scope: !6)
!10 = !DILocation(line: 3, column: 20, scope: !6)
4 changes: 4 additions & 0 deletions llvm/tools/llc/llc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ static void DiagnosticHandler(const DiagnosticInfo &DI, void *Context) {
if (DI.getSeverity() == DS_Error)
*HasError = true;

if (auto *Remark = dyn_cast<DiagnosticInfoOptimizationBase>(&DI))
if (!Remark->isEnabled())
return;

DiagnosticPrinterRawOStream DP(errs());
errs() << LLVMContext::getDiagnosticMessagePrefix(DI.getSeverity()) << ": ";
DI.print(DP);
Expand Down