Skip to content

Commit

Permalink
[PowerPC] Optimize TLS initial-exec sequence to use X-Form loads/stores
Browse files Browse the repository at this point in the history
This patch adds new load/store instructions for integer scalar types
which can be used for X-Form when fed by add with an @tls relocation.

Differential Revision: https://reviews.llvm.org/D43315

llvm-svn: 327635
  • Loading branch information
syzaara committed Mar 15, 2018
1 parent 5b330e8 commit 1110c4d
Show file tree
Hide file tree
Showing 3 changed files with 324 additions and 2 deletions.
113 changes: 112 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Expand Up @@ -101,6 +101,11 @@ static cl::opt<bool> EnableBranchHint(
cl::desc("Enable static hinting of branches on ppc"),
cl::Hidden);

static cl::opt<bool> EnableTLSOpt(
"ppc-tls-opt", cl::init(true),
cl::desc("Enable tls optimization peephole"),
cl::Hidden);

enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
Expand Down Expand Up @@ -199,6 +204,14 @@ namespace {
bool tryBitPermutation(SDNode *N);
bool tryIntCompareInGPR(SDNode *N);

// tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
// an X-Form load instruction with the offset being a relocation coming from
// the PPCISD::ADD_TLS.
bool tryTLSXFormLoad(LoadSDNode *N);
// tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
// an X-Form store instruction with the offset being a relocation coming from
// the PPCISD::ADD_TLS.
bool tryTLSXFormStore(StoreSDNode *N);
/// SelectCC - Select a comparison of the specified values with the
/// specified condition code, returning the CR# of the expression.
SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
Expand Down Expand Up @@ -582,6 +595,90 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
return false;
}

bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
SDValue Base = ST->getBasePtr();
if (Base.getOpcode() != PPCISD::ADD_TLS)
return false;
SDValue Offset = ST->getOffset();
if (!Offset.isUndef())
return false;

SDLoc dl(ST);
EVT MemVT = ST->getMemoryVT();
EVT RegVT = ST->getValue().getValueType();

unsigned Opcode;
switch (MemVT.getSimpleVT().SimpleTy) {
default:
return false;
case MVT::i8: {
Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
break;
}
case MVT::i16: {
Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
break;
}
case MVT::i32: {
Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
break;
}
case MVT::i64: {
Opcode = PPC::STDXTLS;
break;
}
}
SDValue Chain = ST->getChain();
SDVTList VTs = ST->getVTList();
SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
Chain};
SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
transferMemOperands(ST, MN);
ReplaceNode(ST, MN);
return true;
}

bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
SDValue Base = LD->getBasePtr();
if (Base.getOpcode() != PPCISD::ADD_TLS)
return false;
SDValue Offset = LD->getOffset();
if (!Offset.isUndef())
return false;

SDLoc dl(LD);
EVT MemVT = LD->getMemoryVT();
EVT RegVT = LD->getValueType(0);
unsigned Opcode;
switch (MemVT.getSimpleVT().SimpleTy) {
default:
return false;
case MVT::i8: {
Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
break;
}
case MVT::i16: {
Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
break;
}
case MVT::i32: {
Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
break;
}
case MVT::i64: {
Opcode = PPC::LDXTLS;
break;
}
}
SDValue Chain = LD->getChain();
SDVTList VTs = LD->getVTList();
SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
transferMemOperands(LD, MN);
ReplaceNode(LD, MN);
return true;
}

/// Turn an or of two masked values into the rotate left word immediate then
/// mask insert (rlwimi) instruction.
bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
Expand Down Expand Up @@ -3949,14 +4046,28 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
}

case ISD::STORE: {
// Change TLS initial-exec D-form stores to X-form stores.
StoreSDNode *ST = cast<StoreSDNode>(N);
if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() &&
ST->getAddressingMode() != ISD::PRE_INC)
if (tryTLSXFormStore(ST))
return;
break;
}
case ISD::LOAD: {
// Handle preincrement loads.
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT();

// Normal loads are handled by code generated from the .td file.
if (LD->getAddressingMode() != ISD::PRE_INC)
if (LD->getAddressingMode() != ISD::PRE_INC) {
// Change TLS initial-exec D-form loads to X-form loads.
if (EnableTLSOpt && PPCSubTarget->isELFv2ABI())
if (tryTLSXFormLoad(LD))
return;
break;
}

SDValue Offset = LD->getOffset();
if (Offset.getOpcode() == ISD::TargetConstant ||
Expand Down
44 changes: 43 additions & 1 deletion llvm/lib/Target/PowerPC/PPCInstr64Bit.td
Expand Up @@ -499,7 +499,49 @@ defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc_nox0:$rA, tlsreg:$rB),
"add $rT, $rA, $rB", IIC_IntSimple,
[(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;

let mayLoad = 1 in {
def LBZXTLS : XForm_1<31, 87, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
"lbzx $rD, $rA, $rB", IIC_LdStLoad, []>;
def LHZXTLS : XForm_1<31, 279, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
"lhzx $rD, $rA, $rB", IIC_LdStLoad, []>;
def LWZXTLS : XForm_1<31, 23, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
"lwzx $rD, $rA, $rB", IIC_LdStLoad, []>;
def LDXTLS : XForm_1<31, 21, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
"ldx $rD, $rA, $rB", IIC_LdStLD, []>, isPPC64;
def LBZXTLS_32 : XForm_1<31, 87, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
"lbzx $rD, $rA, $rB", IIC_LdStLoad, []>;
def LHZXTLS_32 : XForm_1<31, 279, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
"lhzx $rD, $rA, $rB", IIC_LdStLoad, []>;
def LWZXTLS_32 : XForm_1<31, 23, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
"lwzx $rD, $rA, $rB", IIC_LdStLoad, []>;

}

let mayStore = 1 in {
def STBXTLS : XForm_8<31, 215, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
"stbx $rS, $rA, $rB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
def STHXTLS : XForm_8<31, 407, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
"sthx $rS, $rA, $rB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
def STWXTLS : XForm_8<31, 151, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
"stwx $rS, $rA, $rB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
def STDXTLS : XForm_8<31, 149, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
"stdx $rS, $rA, $rB", IIC_LdStSTD, []>, isPPC64,
PPC970_DGroup_Cracked;
def STBXTLS_32 : XForm_8<31, 215, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
"stbx $rS, $rA, $rB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
def STHXTLS_32 : XForm_8<31, 407, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
"sthx $rS, $rA, $rB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
def STWXTLS_32 : XForm_8<31, 151, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
"stwx $rS, $rA, $rB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;

}

let isCommutable = 1 in
defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"addc", "$rT, $rA, $rB", IIC_IntGeneral,
Expand Down
169 changes: 169 additions & 0 deletions llvm/test/CodeGen/PowerPC/tls-pie-xform.ll
@@ -0,0 +1,169 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=CHECK

@var_char = external thread_local local_unnamed_addr global i8, align 1
@var_short = external thread_local local_unnamed_addr global i16, align 2
@var_int = external thread_local local_unnamed_addr global i32, align 4
@var_long_long = external thread_local local_unnamed_addr global i64, align 8

define dso_local zeroext i8 @test_char_one() {
; CHECK-LABEL: test_char_one:
; CHECK: # %bb.0: # %entry
; CHECK: addis 3, 2, var_char@got@tprel@ha
; CHECK-NEXT: ld 3, var_char@got@tprel@l(3)
; CHECK-NEXT: lbzx 3, 3, var_char@tls
entry:
%0 = load i8, i8* @var_char, align 1, !tbaa !4
ret i8 %0
}

define dso_local void @test_char_two(i32 signext %a) {
; CHECK-LABEL: test_char_two:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_char@got@tprel@ha
; CHECK-NEXT: ld 4, var_char@got@tprel@l(4)
; CHECK-NEXT: stbx 3, 4, var_char@tls
entry:
%conv = trunc i32 %a to i8
store i8 %conv, i8* @var_char, align 1, !tbaa !4
ret void
}

define dso_local zeroext i8 @test_char_three(i8 zeroext %a) {
; CHECK-LABEL: test_char_three:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_char@got@tprel@ha
; CHECK-NEXT: ld 4, var_char@got@tprel@l(4)
; CHECK-NEXT: lbzx 5, 4, var_char@tls
; CHECK: stbx {{[0-9]+}}, 4, var_char@tls
entry:
%0 = load i8, i8* @var_char, align 1, !tbaa !4
%add = add i8 %0, %a
store i8 %add, i8* @var_char, align 1, !tbaa !4
ret i8 %add
}

define dso_local signext i16 @test_short_one() {
; CHECK-LABEL: test_short_one:
; CHECK: # %bb.0: # %entry
; CHECK: addis 3, 2, var_short@got@tprel@ha
; CHECK-NEXT: ld 3, var_short@got@tprel@l(3)
; CHECK-NEXT: lhzx 3, 3, var_short@tls
entry:
%0 = load i16, i16* @var_short, align 2, !tbaa !7
ret i16 %0
}

define dso_local void @test_short_two(i32 signext %a) {
; CHECK-LABEL: test_short_two:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_short@got@tprel@ha
; CHECK-NEXT: ld 4, var_short@got@tprel@l(4)
; CHECK-NEXT: sthx 3, 4, var_short@tls
entry:
%conv = trunc i32 %a to i16
store i16 %conv, i16* @var_short, align 2, !tbaa !7
ret void
}

define dso_local signext i16 @test_short_three(i16 signext %a) {
; CHECK-LABEL: test_short_three:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_short@got@tprel@ha
; CHECK-NEXT: ld 4, var_short@got@tprel@l(4)
; CHECK-NEXT: lhzx 5, 4, var_short@tls
; CHECK: sthx {{[0-9]+}}, 4, var_short@tls
entry:
%0 = load i16, i16* @var_short, align 2, !tbaa !7
%add = add i16 %0, %a
store i16 %add, i16* @var_short, align 2, !tbaa !7
ret i16 %add
}

define dso_local signext i32 @test_int_one() {
; CHECK-LABEL: test_int_one:
; CHECK: # %bb.0: # %entry
; CHECK: addis 3, 2, var_int@got@tprel@ha
; CHECK-NEXT: ld 3, var_int@got@tprel@l(3)
; CHECK-NEXT: lwzx 3, 3, var_int@tls
entry:
%0 = load i32, i32* @var_int, align 4, !tbaa !9
ret i32 %0
}

define dso_local void @test_int_two(i32 signext %a) {
; CHECK-LABEL: test_int_two:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_int@got@tprel@ha
; CHECK-NEXT: ld 4, var_int@got@tprel@l(4)
; CHECK-NEXT: stwx 3, 4, var_int@tls
entry:
store i32 %a, i32* @var_int, align 4, !tbaa !9
ret void
}

define dso_local signext i32 @test_int_three(i32 signext %a) {
; CHECK-LABEL: test_int_three:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_int@got@tprel@ha
; CHECK-NEXT: ld 4, var_int@got@tprel@l(4)
; CHECK-NEXT: lwzx 5, 4, var_int@tls
; CHECK: stwx {{[0-9]+}}, 4, var_int@tls
entry:
%0 = load i32, i32* @var_int, align 4, !tbaa !9
%add = add nsw i32 %0, %a
store i32 %add, i32* @var_int, align 4, !tbaa !9
ret i32 %add
}

define dso_local i64 @test_longlong_one() {
; CHECK-LABEL: test_longlong_one:
; CHECK: # %bb.0: # %entry
; CHECK: addis 3, 2, var_long_long@got@tprel@ha
; CHECK-NEXT: ld 3, var_long_long@got@tprel@l(3)
; CHECK-NEXT: ldx 3, 3, var_long_long@tls
entry:
%0 = load i64, i64* @var_long_long, align 8, !tbaa !11
ret i64 %0
}

define dso_local void @test_longlong_two(i32 signext %a) {
; CHECK-LABEL: test_longlong_two:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_long_long@got@tprel@ha
; CHECK-NEXT: ld 4, var_long_long@got@tprel@l(4)
; CHECK-NEXT: stdx 3, 4, var_long_long@tls
entry:
%conv = sext i32 %a to i64
store i64 %conv, i64* @var_long_long, align 8, !tbaa !11
ret void
}

define dso_local i64 @test_longlong_three(i64 %a) {
; CHECK-LABEL: test_longlong_three:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_long_long@got@tprel@ha
; CHECK-NEXT: ld 4, var_long_long@got@tprel@l(4)
; CHECK-NEXT: ldx 5, 4, var_long_long@tls
; CHECK: stdx {{[0-9]+}}, 4, var_long_long@tls
entry:
%0 = load i64, i64* @var_long_long, align 8, !tbaa !11
%add = add nsw i64 %0, %a
store i64 %add, i64* @var_long_long, align 8, !tbaa !11
ret i64 %add
}

!llvm.module.flags = !{!0, !1, !2}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 1}
!2 = !{i32 7, !"PIE Level", i32 1}
!4 = !{!5, !5, i64 0}
!5 = !{!"omnipotent char", !6, i64 0}
!6 = !{!"Simple C/C++ TBAA"}
!7 = !{!8, !8, i64 0}
!8 = !{!"short", !5, i64 0}
!9 = !{!10, !10, i64 0}
!10 = !{!"int", !5, i64 0}
!11 = !{!12, !12, i64 0}
!12 = !{!"long long", !5, i64 0}

0 comments on commit 1110c4d

Please sign in to comment.