Skip to content

Commit

Permalink
[arm] Fix Unnecessary reloads from GOT.
Browse files Browse the repository at this point in the history
Summary:
This fixes PR35221.
Use pseudo-instructions to let MachineCSE hoist global address computation.

Subscribers: aemerson, javed.absar, kristof.beyls, llvm-commits, hiraditya

Differential Revision: https://reviews.llvm.org/D39871

llvm-svn: 318081
  • Loading branch information
eugenis committed Nov 13, 2017
1 parent 2ba105a commit 76d5ac4
Show file tree
Hide file tree
Showing 12 changed files with 78 additions and 43 deletions.
5 changes: 4 additions & 1 deletion llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
Expand Up @@ -1311,6 +1311,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
const MachineOperand &MO1 = MI.getOperand(1);
auto Flags = MO1.getTargetFlags();
const GlobalValue *GV = MO1.getGlobal();
bool IsARM =
Opcode != ARM::tLDRLIT_ga_pcrel && Opcode != ARM::tLDRLIT_ga_abs;
Expand All @@ -1329,7 +1330,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,

if (IsPIC) {
unsigned PCAdj = IsARM ? 8 : 4;
auto Modifier = STI->getCPModifier(GV);
auto Modifier = (Flags & ARMII::MO_GOT)
? ARMCP::GOT_PREL
: ARMCP::no_modifier;
ARMPCLabelIndex = AFI->createPICLabelUId();
CPV = ARMConstantPoolConstant::Create(
GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, Modifier,
Expand Down
24 changes: 4 additions & 20 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Expand Up @@ -3164,28 +3164,12 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,

if (isPositionIndependent()) {
bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);

MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
/*AddCurrentAddress=*/UseGOT_PREL);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
SDValue Chain = Result.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
UseGOT_PREL ? ARMII::MO_GOT : 0);
SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
if (UseGOT_PREL)
Result =
DAG.getLoad(PtrVT, dl, Chain, Result,
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
} else if (Subtarget->isROPI() && IsRO) {
Expand Down
16 changes: 9 additions & 7 deletions llvm/lib/Target/ARM/ARMInstrInfo.td
Expand Up @@ -332,6 +332,8 @@ def UseNegativeImmediates :
let RecomputePerFunction = 1 in {
def UseMovt : Predicate<"Subtarget->useMovt(*MF)">;
def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">;
def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
}
def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
def UseMulOps : Predicate<"Subtarget->useMulOps()">;
Expand Down Expand Up @@ -5644,26 +5646,26 @@ let isReMaterializable = 1 in {
def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
IIC_iMOVix2addpc,
[(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
Requires<[IsARM, UseMovt]>;
Requires<[IsARM, UseMovtInPic]>;

def LDRLIT_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
IIC_iLoadiALU,
[(set GPR:$dst,
(ARMWrapperPIC tglobaladdr:$addr))]>,
Requires<[IsARM, DontUseMovt]>;
Requires<[IsARM, DontUseMovtInPic]>;

let AddedComplexity = 10 in
def LDRLIT_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
NoItinerary,
[(set GPR:$dst,
(load (ARMWrapperPIC tglobaladdr:$addr)))]>,
Requires<[IsARM, DontUseMovt]>;
Requires<[IsARM, DontUseMovtInPic]>;

let AddedComplexity = 10 in
def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
IIC_iMOVix2ld,
[(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
Requires<[IsARM, UseMovt]>;
Requires<[IsARM, UseMovtInPic]>;
} // isReMaterializable

// The many different faces of TLS access.
Expand All @@ -5676,15 +5678,15 @@ def : Pat<(ARMWrapper tglobaltlsaddr:$src),
Requires<[IsARM, DontUseMovt]>;

def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
(MOV_ga_pcrel tglobaltlsaddr:$addr)>, Requires<[IsARM, UseMovt]>;
(MOV_ga_pcrel tglobaltlsaddr:$addr)>, Requires<[IsARM, UseMovtInPic]>;

def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
(LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
Requires<[IsARM, DontUseMovt]>;
Requires<[IsARM, DontUseMovtInPic]>;
let AddedComplexity = 10 in
def : Pat<(load (ARMWrapperPIC tglobaltlsaddr:$addr)),
(MOV_ga_pcrel_ldr tglobaltlsaddr:$addr)>,
Requires<[IsARM, UseMovt]>;
Requires<[IsARM, UseMovtInPic]>;


// ConstantPool, GlobalAddress, and JumpTable
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMInstrThumb.td
Expand Up @@ -1509,7 +1509,7 @@ def tLDRLIT_ga_pcrel : PseudoInst<(outs tGPR:$dst), (ins i32imm:$addr),
IIC_iLoadiALU,
[(set tGPR:$dst,
(ARMWrapperPIC tglobaladdr:$addr))]>,
Requires<[IsThumb, DontUseMovt]>;
Requires<[IsThumb, DontUseMovtInPic]>;

def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src),
IIC_iLoad_i,
Expand All @@ -1520,7 +1520,7 @@ def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src),
// TLS globals
def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
(tLDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
Requires<[IsThumb, DontUseMovt]>;
Requires<[IsThumb, DontUseMovtInPic]>;
def : Pat<(ARMWrapper tglobaltlsaddr:$addr),
(tLDRLIT_ga_abs tglobaltlsaddr:$addr)>,
Requires<[IsThumb, DontUseMovt]>;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMInstrThumb2.td
Expand Up @@ -3843,13 +3843,13 @@ let isReMaterializable = 1 in {
def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
IIC_iMOVix2addpc,
[(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
Requires<[IsThumb, HasV8MBaseline, UseMovt]>;
Requires<[IsThumb, HasV8MBaseline, UseMovtInPic]>;

}

def : T2Pat<(ARMWrapperPIC tglobaltlsaddr :$dst),
(t2MOV_ga_pcrel tglobaltlsaddr:$dst)>,
Requires<[IsThumb2, UseMovt]>;
Requires<[IsThumb2, UseMovtInPic]>;
def : T2Pat<(ARMWrapper tglobaltlsaddr:$dst),
(t2MOVi32imm tglobaltlsaddr:$dst)>,
Requires<[IsThumb2, UseMovt]>;
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/ARM/ARMInstructionSelector.cpp
Expand Up @@ -538,8 +538,12 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
: (Indirect ? ARM::LDRLIT_ga_pcrel_ldr : ARM::LDRLIT_ga_pcrel);
MIB->setDesc(TII.get(Opc));

int TargetFlags = ARMII::MO_NO_FLAG;
if (STI.isTargetDarwin())
MIB->getOperand(1).setTargetFlags(ARMII::MO_NONLAZY);
TargetFlags |= ARMII::MO_NONLAZY;
if (STI.isGVInGOT(GV))
TargetFlags |= ARMII::MO_GOT;
MIB->getOperand(1).setTargetFlags(TargetFlags);

if (Indirect)
MIB.addMemOperand(MF.getMachineMemOperand(
Expand Down
8 changes: 3 additions & 5 deletions llvm/lib/Target/ARM/ARMSubtarget.cpp
Expand Up @@ -344,11 +344,9 @@ bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
return false;
}

ARMCP::ARMCPModifier ARMSubtarget::getCPModifier(const GlobalValue *GV) const {
if (isTargetELF() && TM.isPositionIndependent() &&
!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
return ARMCP::GOT_PREL;
return ARMCP::no_modifier;
bool ARMSubtarget::isGVInGOT(const GlobalValue *GV) const {
return isTargetELF() && TM.isPositionIndependent() &&
!TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
}

unsigned ARMSubtarget::getMispredictionPenalty() const {
Expand Down
9 changes: 8 additions & 1 deletion llvm/lib/Target/ARM/ARMSubtarget.h
Expand Up @@ -752,7 +752,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
bool isGVIndirectSymbol(const GlobalValue *GV) const;

/// Returns the constant pool modifier needed to access the GV.
ARMCP::ARMCPModifier getCPModifier(const GlobalValue *GV) const;
bool isGVInGOT(const GlobalValue *GV) const;

/// True if fast-isel is used.
bool useFastISel() const;
Expand All @@ -767,6 +767,13 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
return ARM::BX_RET;
return ARM::MOVPCLR;
}

/// Allow movt+movw for PIC global address calculation.
/// ELF does not have GOT relocations for movt+movw.
/// ROPI does not use GOT.
bool allowPositionIndependentMovt() const {
return isROPI() || !isTargetELF();
}
};

} // end namespace llvm
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
Expand Up @@ -228,7 +228,10 @@ namespace ARMII {

/// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects
/// just that part of the flag set.
MO_OPTION_MASK = 0x0f,
MO_OPTION_MASK = 0x3,

/// MO_GOT - On a symbol operand, this represents a GOT relative relocation.
MO_GOT = 0x8,

/// MO_SBREL - On a symbol operand, this represents a static base relative
/// relocation. Used in movw and movt instructions.
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/ARM/GlobalISel/arm-select-globals-pic.mir
Expand Up @@ -56,7 +56,7 @@ body: |
%0(p0) = G_GLOBAL_VALUE @external_global
; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel_ldr {{.*}} @external_global :: (load 4 from got)
; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr {{.*}}@external_global :: (load 4 from got)
; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr @external_global :: (load 4 from got)
; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr target-flags(<unknown>) @external_global :: (load 4 from got)
%1(s32) = G_LOAD %0(p0) :: (load 4 from @external_global)
; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @external_global)
Expand Down Expand Up @@ -108,7 +108,7 @@ body: |
%0(p0) = G_GLOBAL_VALUE @external_constant
; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel_ldr {{.*}} @external_constant :: (load 4 from got)
; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr {{.*}}@external_constant :: (load 4 from got)
; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr @external_constant :: (load 4 from got)
; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr target-flags(<unknown>) @external_constant :: (load 4 from got)
%1(s32) = G_LOAD %0(p0) :: (load 4 from @external_constant)
; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @external_constant)
Expand Down
30 changes: 30 additions & 0 deletions llvm/test/CodeGen/ARM/load-global2.ll
@@ -0,0 +1,30 @@
; PR35221. Test that external global address is not reloaded from GOT in each BB.
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX-PIC

@x = external global i8, align 1

define signext i8 @foo() {
entry:
; LINUX-PIC: ldr r[[A:.]], .LCPI0_0
; LINUX-PIC: ldr r[[B:.]], [pc, r[[A]]]
; LINUX-PIC: ldrb r{{.}}, [r[[B]]]
%0 = load i8, i8* @x
%tobool = icmp eq i8 %0, 0
br i1 %tobool, label %bb1, label %bb2

bb1:
call void @bar()
; No more pc-relative loads! Reuse r[[B]].
; LINUX-PIC: bl bar
; LINUX-PIC-NOT: ldr{{.*}}[pc,
; LINUX-PIC: ldrsb r{{.}}, [r[[B]]]
%1 = load i8, i8* @x
ret i8 %1

bb2:
ret i8 0
}

declare void @bar()


6 changes: 5 additions & 1 deletion llvm/test/CodeGen/Thumb2/v8_IT_3.ll
Expand Up @@ -55,7 +55,11 @@ bb4:
; CHECK-PIC: cmp
; CHECK-PIC: cmp
; CHECK-PIC: cmp
; CHECK-PIC-NEXT: bne
; CHECK-PIC: it eq
; CHECK-PIC-NEXT: ldreq
; CHECK-PIC-NEXT: it eq
; CHECK-PIC-NEXT: cmpeq
; CHECK-PIC-NEXT: beq
; CHECK-PIC: %bb6
; CHECK-PIC-NEXT: movs
; CHECK-PIC-NEXT: add
Expand Down

0 comments on commit 76d5ac4

Please sign in to comment.