Skip to content

Commit

Permalink
[PowerPC][TLS] Add additional TLS X-Form loads/store instructions
Browse files Browse the repository at this point in the history
This patch is a follow up to D43315, and adds the following new load/store
TLS specific instructions for integer and floating point scalar types:
```
LHAXTLS
LWAXTLS
LHAXTLS_32
LWAXTLS_32
LFSXTLS
LFDXTLS
STFSXTLS
STFDXTLS
```
These instructions can be used to optimized TLS sequences when D-Form
loads/stores follow an ADD_TLS instruction.

Duplicate versions of these instructions are also added within an isAsmParserOnly=1
block (similar to D47382) to allow llvm-mc to assemble these instructions.

Differential Revision: https://reviews.llvm.org/D153645
  • Loading branch information
amy-kwan committed Jun 27, 2023
1 parent 457dc72 commit 11b71ad
Show file tree
Hide file tree
Showing 9 changed files with 225 additions and 14 deletions.
18 changes: 12 additions & 6 deletions llvm/lib/Target/PowerPC/P10InstrResources.td
Expand Up @@ -1296,11 +1296,9 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI, LDtocL, SPILLTOVSR_LD,
LDBRX,
DFLOADf32, DFLOADf64, LFD,
LFDX, XFLOADf32, XFLOADf64,
LFIWAX, LIWAX,
LFIWZX, LIWZX,
LHA, LHA8,
LHAX, LHAX8,
LHBRX, LHBRX8,
LHZ, LHZ8,
LVEBX,
Expand All @@ -1309,7 +1307,7 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
LVX,
LVXL,
LWA, LWA_32,
LWAX, LWAX_32,
LWAX, LWAXTLS, LWAXTLS_, LWAXTLS_32, LWAX_32,
LWBRX, LWBRX8,
LWZ, LWZ8, LWZtoc, LWZtocL,
LXSD,
Expand Down Expand Up @@ -1340,6 +1338,8 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
ICBT,
LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32,
LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX,
LFDX, LFDXTLS, LFDXTLS_, XFLOADf32, XFLOADf64,
LHAX, LHAX8, LHAXTLS, LHAXTLS_, LHAXTLS_32,
LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32,
LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32,
LXVL,
Expand Down Expand Up @@ -1442,11 +1442,17 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
(instrs
LFS,
LFSX,
LXSSP,
LXSSPX
)>;

// 2-way crack instructions
// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 2 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
(instrs
LFSX, LFSXTLS, LFSXTLS_
)>;

// 4-way crack instructions
// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY],
Expand Down Expand Up @@ -1823,12 +1829,10 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
DFSTOREf32, DFSTOREf64, STFD,
STFDU,
STFDUX,
STFDX,
STFIWX, STIWX,
STFS,
STFSU,
STFSUX,
STFSX,
STH, STH8,
STHBRX,
STHU, STHU8,
Expand Down Expand Up @@ -1867,6 +1871,8 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
CP_COPY, CP_COPY8,
STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32,
SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_,
STFDX, STFDXTLS, STFDXTLS_,
STFSX, STFSXTLS, STFSXTLS_,
STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32,
STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32,
STXVL,
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/PowerPC/P9InstrResources.td
Expand Up @@ -765,6 +765,7 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
(instrs
LFIWZX,
LFDX,
(instregex "LFDXTLS?(_)?$"),
LFD
)>;

Expand Down Expand Up @@ -815,9 +816,9 @@ def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
DISP_1C, DISP_1C],
(instrs
(instregex "LHA(X)?(8)?$"),
(instregex "LHA(X)?(TLS)?(8)?(_32)?(_)?$"),
(instregex "CP_PASTE(8)?_rec$"),
(instregex "LWA(X)?(_32)?$"),
(instregex "LWA(X)?(TLS)?(_32)?(_)?$"),
TCHECK
)>;

Expand Down Expand Up @@ -850,6 +851,7 @@ def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
LFSX,
(instregex "LFSXTLS?(_)?$"),
LFS
)>;

Expand Down Expand Up @@ -891,7 +893,7 @@ def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
// all three dispatches for the superslice.
def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
(instrs
(instregex "STF(S|D|IWX|SX|DX)$"),
(instregex "STF(S|D|IWX|SX|DX|SXTLS|DXTLS|SXTLS_|DXTLS_)$"),
(instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
(instregex "STW(8)?$"),
(instregex "(D|X)FSTORE(f32|f64)$"),
Expand Down
27 changes: 25 additions & 2 deletions llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Expand Up @@ -739,6 +739,14 @@ bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
Opcode = PPC::STDXTLS;
break;
}
case MVT::f32: {
Opcode = PPC::STFSXTLS;
break;
}
case MVT::f64: {
Opcode = PPC::STFDXTLS;
break;
}
}
SDValue Chain = ST->getChain();
SDVTList VTs = ST->getVTList();
Expand All @@ -763,6 +771,7 @@ bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
SDLoc dl(LD);
EVT MemVT = LD->getMemoryVT();
EVT RegVT = LD->getValueType(0);
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
unsigned Opcode;
switch (MemVT.getSimpleVT().SimpleTy) {
default:
Expand All @@ -772,17 +781,31 @@ bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
break;
}
case MVT::i16: {
Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
if (RegVT == MVT::i32)
Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
else
Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
break;
}
case MVT::i32: {
Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
if (RegVT == MVT::i32)
Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
else
Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
break;
}
case MVT::i64: {
Opcode = PPC::LDXTLS;
break;
}
case MVT::f32: {
Opcode = PPC::LFSXTLS;
break;
}
case MVT::f64: {
Opcode = PPC::LFDXTLS;
break;
}
}
SDValue Chain = LD->getChain();
SDVTList VTs = LD->getVTList();
Expand Down
42 changes: 42 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstr64Bit.td
Expand Up @@ -724,18 +724,32 @@ def LBZXTLS : XForm_1<31, 87, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$R
"lbzx $RST, $RA, $RB", IIC_LdStLoad, []>;
def LHZXTLS : XForm_1<31, 279, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lhzx $RST, $RA, $RB", IIC_LdStLoad, []>;
def LHAXTLS : XForm_1<31, 343, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lhax $RST, $RA, $RB", IIC_LdStLoad, []>;
def LWZXTLS : XForm_1<31, 23, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lwzx $RST, $RA, $RB", IIC_LdStLoad, []>;
def LWAXTLS : XForm_1<31, 341, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lwax $RST, $RA, $RB", IIC_LdStLoad, []>;
def LDXTLS : XForm_1<31, 21, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"ldx $RST, $RA, $RB", IIC_LdStLD, []>, isPPC64;
def LBZXTLS_32 : XForm_1<31, 87, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lbzx $RST, $RA, $RB", IIC_LdStLoad, []>;
def LHZXTLS_32 : XForm_1<31, 279, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lhzx $RST, $RA, $RB", IIC_LdStLoad, []>;
def LHAXTLS_32 : XForm_1<31, 343, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lhax $RST, $RA, $RB", IIC_LdStLoad, []>;
def LWZXTLS_32 : XForm_1<31, 23, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lwzx $RST, $RA, $RB", IIC_LdStLoad, []>;
def LWAXTLS_32 : XForm_1<31, 341, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lwax $RST, $RA, $RB", IIC_LdStLoad, []>;

}
let mayLoad = 1, Predicates = [HasFPU] in {
def LFSXTLS : XForm_25<31, 535, (outs f4rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lfsx $RST, $RA, $RB", IIC_LdStLFD, []>;
def LFDXTLS : XForm_25<31, 599, (outs f8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lfdx $RST, $RA, $RB", IIC_LdStLFD, []>;
}

let mayStore = 1 in {
def STBXTLS : XForm_8<31, 215, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
Expand All @@ -761,6 +775,14 @@ def STWXTLS_32 : XForm_8<31, 151, (outs), (ins gprc:$RST, ptr_rc_nor0:$RA, tlsre
PPC970_DGroup_Cracked;

}
let mayStore = 1, Predicates = [HasFPU] in {
def STFSXTLS : XForm_8<31, 663, (outs), (ins f4rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
"stfsx $RST, $RA, $RB", IIC_LdStSTFD, []>,
PPC970_DGroup_Cracked;
def STFDXTLS : XForm_8<31, 727, (outs), (ins f8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
"stfdx $RST, $RA, $RB", IIC_LdStSTFD, []>,
PPC970_DGroup_Cracked;
}

let isCommutable = 1 in
defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
Expand Down Expand Up @@ -834,12 +856,23 @@ def LBZXTLS_ : XForm_1<31, 87, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$
"lbzx $RST, $RA, $RB", IIC_LdStLoad, []>;
def LHZXTLS_ : XForm_1<31, 279, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lhzx $RST, $RA, $RB", IIC_LdStLoad, []>;
def LHAXTLS_ : XForm_1<31, 343, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lhax $RST, $RA, $RB", IIC_LdStLoad, []>;
def LWZXTLS_ : XForm_1<31, 23, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lwzx $RST, $RA, $RB", IIC_LdStLoad, []>;
def LWAXTLS_ : XForm_1<31, 341, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lwax $RST, $RA, $RB", IIC_LdStLoad, []>;
def LDXTLS_ : XForm_1<31, 21, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"ldx $RST, $RA, $RB", IIC_LdStLD, []>, isPPC64;
}

let mayLoad = 1, Predicates = [HasFPU] in {
def LFSXTLS_ : XForm_25<31, 535, (outs f4rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lfsx $RST, $RA, $RB", IIC_LdStLFD, []>;
def LFDXTLS_ : XForm_25<31, 599, (outs f8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lfdx $RST, $RA, $RB", IIC_LdStLFD, []>;
}

let mayStore = 1 in {
def STBXTLS_ : XForm_8<31, 215, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
"stbx $RST, $RA, $RB", IIC_LdStStore, []>,
Expand All @@ -854,6 +887,15 @@ def STDXTLS_ : XForm_8<31, 149, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg
"stdx $RST, $RA, $RB", IIC_LdStSTD, []>, isPPC64,
PPC970_DGroup_Cracked;
}

let mayStore = 1, Predicates = [HasFPU] in {
def STFSXTLS_ : XForm_8<31, 663, (outs), (ins f4rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
"stfsx $RST, $RA, $RB", IIC_LdStSTFD, []>,
PPC970_DGroup_Cracked;
def STFDXTLS_ : XForm_8<31, 727, (outs), (ins f8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
"stfdx $RST, $RA, $RB", IIC_LdStSTFD, []>,
PPC970_DGroup_Cracked;
}
}

let isCommutable = 1 in {
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
Expand Up @@ -592,8 +592,8 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1)
;
; CHECK-32-P10-LABEL: testDouble1:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28
; CHECK-32-P10-NEXT: addi 4, 1, -16
; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28
; CHECK-32-P10-NEXT: stxv 34, -16(1)
; CHECK-32-P10-NEXT: stfdx 1, 4, 3
; CHECK-32-P10-NEXT: lxv 34, -16(1)
Expand Down Expand Up @@ -650,8 +650,8 @@ define <2 x double> @testDouble2(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32
; CHECK-32-P10-LABEL: testDouble2:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lfd 0, 0(3)
; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
; CHECK-32-P10-NEXT: addi 6, 1, -32
; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
; CHECK-32-P10-NEXT: stxv 34, -32(1)
; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
; CHECK-32-P10-NEXT: stfdx 0, 6, 4
Expand Down Expand Up @@ -723,8 +723,8 @@ define <2 x double> @testDouble3(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32
; CHECK-32-P10-LABEL: testDouble3:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: plfd 0, 65536(3), 0
; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
; CHECK-32-P10-NEXT: addi 6, 1, -32
; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
; CHECK-32-P10-NEXT: stxv 34, -32(1)
; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
; CHECK-32-P10-NEXT: stfdx 0, 6, 4
Expand Down
74 changes: 74 additions & 0 deletions llvm/test/CodeGen/PowerPC/tls-pie-xform.ll
Expand Up @@ -5,6 +5,8 @@
@var_short = external thread_local local_unnamed_addr global i16, align 2
@var_int = external thread_local local_unnamed_addr global i32, align 4
@var_long_long = external thread_local local_unnamed_addr global i64, align 8
@var_float = external thread_local local_unnamed_addr global float, align 4
@var_double = external thread_local local_unnamed_addr global double, align 8

define dso_local zeroext i8 @test_char_one() {
; CHECK-LABEL: test_char_one:
Expand Down Expand Up @@ -53,6 +55,18 @@ define dso_local signext i16 @test_short_one() {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, var_short@got@tprel@ha
; CHECK-NEXT: ld 3, var_short@got@tprel@l(3)
; CHECK-NEXT: lhax 3, 3, var_short@tls
; CHECK-NEXT: blr
entry:
%0 = load i16, ptr @var_short, align 2, !tbaa !7
ret i16 %0
}

define dso_local zeroext i16 @test_short_one_zeroext() {
; CHECK-LABEL: test_short_one_zeroext:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, var_short@got@tprel@ha
; CHECK-NEXT: ld 3, var_short@got@tprel@l(3)
; CHECK-NEXT: lhzx 3, 3, var_short@tls
; CHECK-NEXT: blr
entry:
Expand Down Expand Up @@ -95,6 +109,18 @@ define dso_local signext i32 @test_int_one() {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, var_int@got@tprel@ha
; CHECK-NEXT: ld 3, var_int@got@tprel@l(3)
; CHECK-NEXT: lwax 3, 3, var_int@tls
; CHECK-NEXT: blr
entry:
%0 = load i32, ptr @var_int, align 4, !tbaa !9
ret i32 %0
}

define dso_local zeroext i32 @test_int_one_zeroext() {
; CHECK-LABEL: test_int_one_zeroext:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, var_int@got@tprel@ha
; CHECK-NEXT: ld 3, var_int@got@tprel@l(3)
; CHECK-NEXT: lwzx 3, 3, var_int@tls
; CHECK-NEXT: blr
entry:
Expand Down Expand Up @@ -172,6 +198,54 @@ entry:
ret i64 %add
}

define float @test_float_one() {
; CHECK-LABEL: test_float_one:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, var_float@got@tprel@ha
; CHECK-NEXT: ld 3, var_float@got@tprel@l(3)
; CHECK-NEXT: lfsx 1, 3, var_float@tls
; CHECK-NEXT: blr
entry:
%0 = load float, ptr @var_float, align 4
ret float %0
}

define void @test_float_two(float %a) {
; CHECK-LABEL: test_float_two:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, var_float@got@tprel@ha
; CHECK-NEXT: ld 3, var_float@got@tprel@l(3)
; CHECK-NEXT: stfsx 1, 3, var_float@tls
; CHECK-NEXT: blr
entry:
store float %a, ptr @var_float, align 4
ret void
}

define double @test_double_one() {
; CHECK-LABEL: test_double_one:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, var_double@got@tprel@ha
; CHECK-NEXT: ld 3, var_double@got@tprel@l(3)
; CHECK-NEXT: lfdx 1, 3, var_double@tls
; CHECK-NEXT: blr
entry:
%0 = load double, ptr @var_double, align 8
ret double %0
}

define void @test_double_two(double %a) {
; CHECK-LABEL: test_double_two:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, var_double@got@tprel@ha
; CHECK-NEXT: ld 3, var_double@got@tprel@l(3)
; CHECK-NEXT: stfdx 1, 3, var_double@tls
; CHECK-NEXT: blr
entry:
store double %a, ptr @var_double, align 8
ret void
}

!llvm.module.flags = !{!0, !1, !2}

!0 = !{i32 1, !"wchar_size", i32 4}
Expand Down

0 comments on commit 11b71ad

Please sign in to comment.