Skip to content

Commit

Permalink
[ARM] Add OptMinSize to ARMSubtarget
Browse files Browse the repository at this point in the history
    
In many places in the backend, we like to know whether we're
optimising for code size and this is performed by checking the
current machine function attributes. A subtarget is created on a
per-function basis, so it's possible to know when we're compiling for
code size on construction so record this in the new object.

Differential Revision: https://reviews.llvm.org/D57812

llvm-svn: 353501
  • Loading branch information
sparker-arm committed Feb 8, 2019
1 parent 807960e commit 5b09834
Show file tree
Hide file tree
Showing 15 changed files with 60 additions and 47 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2265,7 +2265,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
unsigned NumBytes) {
// This optimisation potentially adds lots of load and store
// micro-operations, it's only really a great benefit to code-size.
if (!MF.getFunction().optForMinSize())
if (!Subtarget.optForMinSize())
return false;

// If only one register is pushed/popped, LLVM can use an LDR/STR
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMFastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
}

unsigned ResultReg = 0;
if (Subtarget->useMovt(*FuncInfo.MF))
if (Subtarget->useMovt())
ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());

if (ResultReg)
Expand Down Expand Up @@ -555,7 +555,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
bool IsPositionIndependent = isPositionIndependent();
// Use movw+movt when possible, it avoids constant pool entries.
// Non-darwin targets only support static movt relocations in FastISel.
if (Subtarget->useMovt(*FuncInfo.MF) &&
if (Subtarget->useMovt() &&
(Subtarget->isTargetMachO() || !IsPositionIndependent)) {
unsigned Opc;
unsigned char TF = 0;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
}
if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
if (Subtarget->useMovt()) return 2; // MOVW + MOVT
return 3; // Literal pool load
}

Expand Down
21 changes: 12 additions & 9 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2069,7 +2069,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
auto *BB = CLI.CS.getParent();
bool PreferIndirect =
Subtarget->isThumb() && MF.getFunction().optForMinSize() &&
Subtarget->isThumb() && Subtarget->optForMinSize() &&
count_if(GV->users(), [&BB](const User *U) {
return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
}) > 2;
Expand Down Expand Up @@ -2141,7 +2141,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallOpc = ARMISD::CALL_NOLINK;
else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
// Emit regular call when code size is the priority
!MF.getFunction().optForMinSize())
!Subtarget->optForMinSize())
// "mov lr, pc; b _foo" to avoid confusing the RSP
CallOpc = ARMISD::CALL_NOLINK;
else
Expand Down Expand Up @@ -3224,7 +3224,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
} else if (Subtarget->isRWPI() && !IsRO) {
// SB-relative.
SDValue RelAddr;
if (Subtarget->useMovt(DAG.getMachineFunction())) {
if (Subtarget->useMovt()) {
++NumMovwMovt;
SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
Expand All @@ -3244,7 +3244,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,

// If we have T2 ops, we can materialize the address directly via movt/movw
// pair. This is always cheaper.
if (Subtarget->useMovt(DAG.getMachineFunction())) {
if (Subtarget->useMovt()) {
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
Expand All @@ -3267,7 +3267,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();

if (Subtarget->useMovt(DAG.getMachineFunction()))
if (Subtarget->useMovt())
++NumMovwMovt;

// FIXME: Once remat is capable of dealing with instructions with register
Expand All @@ -3287,7 +3287,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
assert(Subtarget->useMovt() &&
"Windows on ARM expects to use movw/movt");
assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows");
Expand Down Expand Up @@ -7808,8 +7808,7 @@ ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
return SDValue();

const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
const auto &MF = DAG.getMachineFunction();
const bool MinSize = MF.getFunction().optForMinSize();
const bool MinSize = ST.optForMinSize();
const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
: ST.hasDivideInARMMode();

Expand Down Expand Up @@ -8979,7 +8978,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,

// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
if (Subtarget->useMovt(*MF)) {
if (Subtarget->useMovt()) {
unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
Vtmp = MRI.createVirtualRegister(TRC);
Expand Down Expand Up @@ -14714,6 +14713,10 @@ bool ARMTargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget->hasV6T2Ops();
}

bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
return !Subtarget->optForMinSize();
}

Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Expand Down
6 changes: 1 addition & 5 deletions llvm/lib/Target/ARM/ARMISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -567,11 +567,7 @@ class VectorType;
return HasStandaloneRem;
}

bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
if (DAG.getMachineFunction().getFunction().optForMinSize())
return false;
return true;
}
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;

CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
const TargetMachine &TM = MF.getTarget();

if (!Subtarget.useMovt(MF)) {
if (!Subtarget.useMovt()) {
if (TM.isPositionIndependent())
expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
else
Expand Down
15 changes: 7 additions & 8 deletions llvm/lib/Target/ARM/ARMInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -354,14 +354,14 @@ def UseNegativeImmediates :

// FIXME: Eventually this will be just "hasV6T2Ops".
let RecomputePerFunction = 1 in {
def UseMovt : Predicate<"Subtarget->useMovt(*MF)">;
def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">;
def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
def UseMovt : Predicate<"Subtarget->useMovt()">;
def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
def UseMovtInPic : Predicate<"Subtarget->useMovt() && Subtarget->allowPositionIndependentMovt()">;
def DontUseMovtInPic : Predicate<"!Subtarget->useMovt() || !Subtarget->allowPositionIndependentMovt()">;

def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
" TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
"MF->getFunction().optForMinSize())">;
"Subtarget->optForMinSize())">;
}
def UseMulOps : Predicate<"Subtarget->useMulOps()">;

Expand Down Expand Up @@ -718,15 +718,14 @@ def mod_imm_neg : Operand<i32>, PatLeaf<(imm), [{

/// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal()
def arm_i32imm : PatLeaf<(imm), [{
if (Subtarget->useMovt(*MF))
if (Subtarget->useMovt())
return true;
return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
}]> {
// Ideally this would be an IntImmLeaf, but then we wouldn't have access to
// the MachineFunction.
let GISelPredicateCode = [{
const auto &MF = *MI.getParent()->getParent();
if (STI.useMovt(MF))
if (STI.useMovt())
return true;

const auto &MO = MI.getOperand(1);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
auto &MBB = *MIB->getParent();
auto &MF = *MBB.getParent();

bool UseMovt = STI.useMovt(MF);
bool UseMovt = STI.useMovt();

unsigned Size = TM.getPointerSize(0);
unsigned Alignment = 4;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1286,7 +1286,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// can still change to a writeback form as that will save us 2 bytes
// of code size. It can create WAW hazards though, so only do it if
// we're minimizing code size.
if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
if (!STI->optForMinSize() || !BaseKill)
return false;

bool HighRegsUsed = false;
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/ARM/ARMRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)> {
(decimate (rotl SPR, 1), 4),
(decimate (rotl SPR, 1), 2))];
let AltOrderSelect = [{
return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
}];
let DiagnosticString = "operand must be a register in range [s0, s31]";
}
Expand All @@ -313,7 +313,7 @@ def HPR : RegisterClass<"ARM", [f16], 32, (sequence "S%u", 0, 31)> {
(decimate (rotl HPR, 1), 4),
(decimate (rotl HPR, 1), 2))];
let AltOrderSelect = [{
return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
}];
let DiagnosticString = "operand must be a register in range [s0, s31]";
}
Expand All @@ -335,7 +335,7 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 6
let AltOrders = [(rotl DPR, 16),
(add (decimate (rotl DPR, 16), 2), (rotl DPR, 16))];
let AltOrderSelect = [{
return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
}];
let DiagnosticType = "DPR";
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(

// Code size optimisation: do not inline memcpy if expansion results in
// more instructions than the libary call.
if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) {
if (NumMEMCPYs > 1 && Subtarget.optForMinSize()) {
return SDValue();
}

Expand Down
16 changes: 9 additions & 7 deletions llvm/lib/Target/ARM/ARMSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,12 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,

ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const ARMBaseTargetMachine &TM, bool IsLittle)
const ARMBaseTargetMachine &TM, bool IsLittle,
bool MinSize)
: ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options),
TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)),
CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle),
TargetTriple(TT), Options(TM.Options), TM(TM),
FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
InstrInfo(isThumb1Only()
Expand Down Expand Up @@ -373,20 +375,20 @@ bool ARMSubtarget::enablePostRAScheduler() const {

bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }

bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
bool ARMSubtarget::useStride4VFPs() const {
// For general targets, the prologue can grow when VFPs are allocated with
// stride 4 (more vpush instructions). But WatchOS uses a compact unwind
// format which it's more important to get right.
return isTargetWatchABI() ||
(useWideStrideVFP() && !MF.getFunction().optForMinSize());
(useWideStrideVFP() && !OptMinSize);
}

bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
bool ARMSubtarget::useMovt() const {
// NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
// immediates as it is inherently position independent, and may be out of
// range otherwise.
return !NoMovt && hasV8MBaselineOps() &&
(isTargetWindows() || !MF.getFunction().optForMinSize() || genExecuteOnly());
(isTargetWindows() || !OptMinSize || genExecuteOnly());
}

bool ARMSubtarget::useFastISel() const {
Expand Down
12 changes: 9 additions & 3 deletions llvm/lib/Target/ARM/ARMSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
/// What alignment is preferred for loop bodies, in log2(bytes).
unsigned PrefLoopAlignment = 0;

/// OptMinSize - True if we're optimising for minimum code size, equal to
/// the function attribute.
bool OptMinSize = false;

/// IsLittle - The target is Little Endian
bool IsLittle;

Expand All @@ -467,7 +471,8 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
/// of the specified triple.
///
ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
const ARMBaseTargetMachine &TM, bool IsLittle);
const ARMBaseTargetMachine &TM, bool IsLittle,
bool MinSize = false);

/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
Expand Down Expand Up @@ -709,6 +714,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
bool disablePostRAScheduler() const { return DisablePostRAScheduler; }
bool useSoftFloat() const { return UseSoftFloat; }
bool isThumb() const { return InThumbMode; }
bool optForMinSize() const { return OptMinSize; }
bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
bool isThumb2() const { return InThumbMode && HasThumb2; }
bool hasThumb2() const { return HasThumb2; }
Expand All @@ -735,9 +741,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
isThumb1Only();
}

bool useStride4VFPs(const MachineFunction &MF) const;
bool useStride4VFPs() const;

bool useMovt(const MachineFunction &MF) const;
bool useMovt() const;

bool supportsTailCall() const { return SupportsTailCall; }

Expand Down
11 changes: 9 additions & 2 deletions llvm/lib/Target/ARM/ARMTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,13 +263,20 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
if (SoftFloat)
FS += FS.empty() ? "+soft-float" : ",+soft-float";

auto &I = SubtargetMap[CPU + FS];
// Use the optminsize to identify the subtarget, but don't use it in the
// feature string.
std::string Key = CPU + FS;
if (F.optForMinSize())
Key += "+minsize";

auto &I = SubtargetMap[Key];
if (!I) {
// This needs to be done before we create a new subtarget since any
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle,
F.optForMinSize());

if (!I->isThumb() && !I->hasARMOps())
F.getContext().emitError("Function '" + F.getName() + "' uses ARM "
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
break;
case ARM::t2LDR_POST:
case ARM::t2STR_POST: {
if (!MBB.getParent()->getFunction().optForMinSize())
if (!MinimizeSize)
return false;

if (!MI->hasOneMemOperand() ||
Expand Down Expand Up @@ -1128,7 +1128,7 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {

// Optimizing / minimizing size? Minimizing size implies optimizing for size.
OptimizeSize = MF.getFunction().optForSize();
MinimizeSize = MF.getFunction().optForMinSize();
MinimizeSize = STI->optForMinSize();

BlockInfo.clear();
BlockInfo.resize(MF.getNumBlockIDs());
Expand Down

0 comments on commit 5b09834

Please sign in to comment.