Skip to content

Commit

Permalink
[MachineCSE] Add new callback for is caller preserved or constant phy…
Browse files Browse the repository at this point in the history
…sregs

The instructions addis,addi, bl are used to calculate the address of TLS thread
local variables. These TLS access code sequences are generated repeatedly every
time the thread local variable is accessed. By communicating to Machine CSE that
X2 is guaranteed to have the same value within the same function call (so called
Caller Preserved Physical Register), the redundant TLS access code sequences are
cleaned up.

Differential Revision: https://reviews.llvm.org/D39173

llvm-svn: 318661
  • Loading branch information
Tony Jiang committed Nov 20, 2017
1 parent 45d25e1 commit f75f4d6
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 26 deletions.
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/MachineRegisterInfo.h
Expand Up @@ -581,6 +581,10 @@ class MachineRegisterInfo {
/// function. Writing to a constant register has no effect.
bool isConstantPhysReg(unsigned PhysReg) const;

/// Returns true if either isConstantPhysReg or TRI->isCallerPreservedPhysReg
/// returns true. This is a utility member function.
bool isCallerPreservedOrConstPhysReg(unsigned PhysReg) const;

/// Get an iterator over the pressure sets affected by the given physical or
/// virtual register. If RegUnit is physical, it must be a register unit (from
/// MCRegUnitIterator).
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/MachineCSE.cpp
Expand Up @@ -250,8 +250,8 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg))
continue;
// Reading constant physregs is ok.
if (!MRI->isConstantPhysReg(Reg))
// Reading either caller preserved or constant physregs is ok.
if (!MRI->isCallerPreservedOrConstPhysReg(Reg))
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
PhysRefs.insert(*AI);
}
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/CodeGen/MachineRegisterInfo.cpp
Expand Up @@ -487,6 +487,13 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
return true;
}

bool
MachineRegisterInfo::isCallerPreservedOrConstPhysReg(unsigned PhysReg) const {
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
return isConstantPhysReg(PhysReg) ||
TRI->isCallerPreservedPhysReg(PhysReg, *MF);
}

/// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the
/// specified register as undefined which causes the DBG_VALUE to be
/// deleted during LiveDebugVariables analysis.
Expand Down
63 changes: 63 additions & 0 deletions llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll
@@ -0,0 +1,63 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
; The instructions addis,addi, bl are used to calculate the address of TLS
; thread local variables. These TLS access code sequences are generated
; repeatedly every time the thread local variable is accessed. By communicating
; to Machine CSE that X2 is guaranteed to have the same value within the same
; function call (so called Caller Preserved Physical Register), the redudant
; TLS access code sequences are cleaned up.

%"struct.CC::TT" = type { i64, i32 }
%class.CC = type { %struct.SS }
%struct.SS = type { void ()* }

@_ZN2CC2ccE = external thread_local global %"struct.CC::TT", align 8

define noalias i8* @_ZN2CC3funEv(%class.CC* %this) {
; CHECK-LABEL: _ZN2CC3funEv:
; CHECK: mflr 0
; CHECK-NEXT: std 0, 16(1)
; CHECK-NEXT: stdu 1, -48(1)
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: std 30, 32(1)
; CHECK-NEXT: mr 30, 3
; CHECK-NEXT: ld 12, 0(30)
; CHECK-NEXT: std 2, 24(1)
; CHECK-NEXT: mtctr 12
; CHECK-NEXT: bctrl
; CHECK-NEXT: ld 2, 24(1)
; CHECK-NEXT: addis 3, 2, _ZN2CC2ccE@got@tlsgd@ha
; CHECK-NEXT: addi 3, 3, _ZN2CC2ccE@got@tlsgd@l
; CHECK-NEXT: bl __tls_get_addr(_ZN2CC2ccE@tlsgd)
; CHECK-NEXT: nop
; CHECK-NEXT: ld 4, 0(3)
; CHECK-NEXT: cmpldi 4, 0
; CHECK-NEXT: beq 0, .LBB0_2
; CHECK: addi 4, 3, 8
; CHECK-NEXT: mr 3, 30
; CHECK-NEXT: bl _ZN2CC3barEPi
; CHECK-NEXT: nop
; CHECK: ld 30, 32(1)
; CHECK-NEXT: li 3, 0
; CHECK-NEXT: addi 1, 1, 48
; CHECK-NEXT: ld 0, 16(1)
; CHECK-NEXT: mtlr 0
; CHECK-NEXT: blr
entry:
%foo = getelementptr inbounds %class.CC, %class.CC* %this, i64 0, i32 0, i32 0
%0 = load void ()*, void ()** %foo, align 8
tail call void %0()
%1 = load i64, i64* getelementptr inbounds (%"struct.CC::TT", %"struct.CC::TT"* @_ZN2CC2ccE, i64 0, i32 0)
%tobool = icmp eq i64 %1, 0
br i1 %tobool, label %if.end, label %if.then

if.then:
tail call void @_ZN2CC3barEPi(%class.CC* nonnull %this, i32* getelementptr inbounds (%"struct.CC::TT", %"struct.CC::TT"* @_ZN2CC2ccE, i64 0, i32 1))
br label %if.end

if.end:
ret i8* null
}

declare void @_ZN2CC3barEPi(%class.CC*, i32*)
44 changes: 20 additions & 24 deletions llvm/test/CodeGen/PowerPC/licm-tocReg.ll
@@ -1,20 +1,20 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s

; The instructions ADDIStocHA/LDtocL are used to calculate the address of
; globals. The ones that are in bb.3.if.end could not be hoisted by Machine
; The instructions ADDIStocHA/LDtocL are used to calculate the address of
; globals. The ones that are in bb.3.if.end could not be hoisted by Machine
; LICM due to BCTRL_LDinto_toc in bb2.if.then. This call causes the compiler
; to insert a save TOC to stack before the call and load into X2 to restore TOC
; after. By communicating to Machine LICM that X2 is guaranteed to have the
; after. By communicating to Machine LICM that X2 is guaranteed to have the
; same value before and after BCTRL_LDinto_toc, these instructions can be
; hoisted out of bb.3.if.end to outside of the loop.

; Pre Machine LICM MIR
;
;body:
;body:
; bb.0.entry:
; successors: %bb.2.if.then(0x40000000), %bb.3.if.end(0x40000000)
; liveins: %x3
;
;
; %4 = COPY %x3
; %5 = ADDIStocHA %x2, @ga
; %6 = LDtocL @ga, killed %5 :: (load 8 from got)
Expand All @@ -26,7 +26,7 @@
; %11 = CMPW killed %7, killed %10
; BCC 44, killed %11, %bb.2.if.then
; B %bb.3.if.end
;
;
; bb.2.if.then:
; %1 = PHI %0, %bb.0.entry, %3, %bb.3.if.end
; ADJCALLSTACKDOWN 32, 0, implicit-def dead %r1, implicit %r1
Expand All @@ -41,10 +41,10 @@
; %22 = COPY %x3
; %x3 = COPY %22
; BLR8 implicit %lr8, implicit %rm, implicit %x3
;
;
; bb.3.if.end:
; successors: %bb.2.if.then(0x04000000), %bb.3.if.end(0x7c000000)
;
;
; %2 = PHI %0, %bb.0.entry, %3, %bb.3.if.end
; %12 = ADDI %2, 1
; %13 = ADDIStocHA %x2, @ga
Expand All @@ -62,27 +62,23 @@
@ga = external global i32, align 4
@gb = external global i32, align 4

; Function Attrs: nounwind
define signext i32 @test(i32 (i32)* nocapture %FP) local_unnamed_addr #0 {
; CHECK-LABEL: test:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: addis 4, 2, .LC0@toc@ha
; CHECK-NEXT: addis 5, 2, .LC1@toc@ha
; CHECK-NEXT: ld 4, .LC0@toc@l(4)
; CHECK-NEXT: ld 5, .LC1@toc@l(5)
; CHECK-NEXT: lwz 6, 0(4)
; CHECK-NEXT: lwz 5, 0(5)
; CHECK-NEXT: cmpw 6, 5
; CHECK-NEXT: lwz 5, 0(4)
; CHECK-NEXT: addis 6, 2, .LC0@toc@ha
; CHECK-NEXT: addis 4, 2, .LC1@toc@ha
; CHECK-NEXT: ld 5, .LC1@toc@l(4)
; CHECK-NEXT: ld 6, .LC0@toc@l(6)
; CHECK-NEXT: lwz 4, 0(5)
; CHECK-NEXT: lwz 7, 0(6)
; CHECK-NEXT: cmpw 4, 7
; CHECK-NEXT: lwz 7, 0(5)
; CHECK-NEXT: mr 4, 3
; CHECK-NEXT: bgt 0, .LBB0_3
; CHECK-NEXT: # BB#1:
; CHECK-NEXT: addis 3, 2, .LC0@toc@ha
; CHECK-NEXT: addis 6, 2, .LC1@toc@ha
; CHECK-NEXT: ld 3, .LC0@toc@l(3)
; CHECK-NEXT: ld 6, .LC1@toc@l(6)
; CHECK-NEXT: bgt 0, .LBB0_2
; CHECK-NOT: addis {{[0-9]+}}, 2, .LC0@toc@ha
; CHECK-NOT: addis {{[0-9]+}}, 2, .LC1@toc@ha
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_2: # %if.end
; CHECK-NEXT: .LBB0_1: # %if.end
; CHECK-NOT: addis {{[0-9]+}}, 2, .LC0@toc@ha
; CHECK-NOT: addis {{[0-9]+}}, 2, .LC1@toc@ha
; CHECK: blr
Expand Down

0 comments on commit f75f4d6

Please sign in to comment.