Skip to content

Commit

Permalink
[SystemZ] Support load-and-trap instructions
Browse files Browse the repository at this point in the history
This adds support for the instructions provided with the
load-and-trap facility.

llvm-svn: 288030
  • Loading branch information
uweigand committed Nov 28, 2016
1 parent 7583991 commit 2d9e3d9
Show file tree
Hide file tree
Showing 15 changed files with 835 additions and 7 deletions.
61 changes: 56 additions & 5 deletions llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ using namespace llvm;
#define DEBUG_TYPE "systemz-elim-compare"

STATISTIC(BranchOnCounts, "Number of branch-on-count instructions");
STATISTIC(LoadAndTraps, "Number of load-and-trap instructions");
STATISTIC(EliminatedComparisons, "Number of eliminated comparisons");
STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions");

Expand Down Expand Up @@ -73,6 +74,8 @@ class SystemZElimCompare : public MachineFunctionPass {
Reference getRegReferences(MachineInstr &MI, unsigned Reg);
bool convertToBRCT(MachineInstr &MI, MachineInstr &Compare,
SmallVectorImpl<MachineInstr *> &CCUsers);
bool convertToLoadAndTrap(MachineInstr &MI, MachineInstr &Compare,
SmallVectorImpl<MachineInstr *> &CCUsers);
bool convertToLoadAndTest(MachineInstr &MI);
bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare,
SmallVectorImpl<MachineInstr *> &CCUsers);
Expand Down Expand Up @@ -225,6 +228,48 @@ bool SystemZElimCompare::convertToBRCT(
return true;
}

// Compare compares the result of MI against zero. If MI is a suitable load
// instruction and if CCUsers is a single conditional trap on zero, eliminate
// the load and convert the branch to a load-and-trap. Return true on success.
bool SystemZElimCompare::convertToLoadAndTrap(
MachineInstr &MI, MachineInstr &Compare,
SmallVectorImpl<MachineInstr *> &CCUsers) {
unsigned LATOpcode = TII->getLoadAndTrap(MI.getOpcode());
if (!LATOpcode)
return false;

// Check whether we have a single CondTrap that traps on zero.
if (CCUsers.size() != 1)
return false;
MachineInstr *Branch = CCUsers[0];
if (Branch->getOpcode() != SystemZ::CondTrap ||
Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP ||
Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_EQ)
return false;

// We already know that there are no references to the register between
// MI and Compare. Make sure that there are also no references between
// Compare and Branch.
unsigned SrcReg = getCompareSourceReg(Compare);
MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
for (++MBBI; MBBI != MBBE; ++MBBI)
if (getRegReferences(*MBBI, SrcReg))
return false;

// The transformation is OK. Rebuild Branch as a load-and-trap.
MachineOperand Target(Branch->getOperand(2));
while (Branch->getNumOperands())
Branch->RemoveOperand(0);
Branch->setDesc(TII->get(LATOpcode));
MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
.addOperand(MI.getOperand(0))
.addOperand(MI.getOperand(1))
.addOperand(MI.getOperand(2))
.addOperand(MI.getOperand(3));
MI.eraseFromParent();
return true;
}

// If MI is a load instruction, try to convert it into a LOAD AND TEST.
// Return true on success.
bool SystemZElimCompare::convertToLoadAndTest(MachineInstr &MI) {
Expand Down Expand Up @@ -353,11 +398,17 @@ bool SystemZElimCompare::optimizeCompareZero(
MachineInstr &MI = *MBBI;
if (resultTests(MI, SrcReg)) {
// Try to remove both MI and Compare by converting a branch to BRCT(G).
// We don't care in this case whether CC is modified between MI and
// Compare.
if (!CCRefs.Use && !SrcRefs && convertToBRCT(MI, Compare, CCUsers)) {
BranchOnCounts += 1;
return true;
// or a load-and-trap instruction. We don't care in this case whether
// CC is modified between MI and Compare.
if (!CCRefs.Use && !SrcRefs) {
if (convertToBRCT(MI, Compare, CCUsers)) {
BranchOnCounts += 1;
return true;
}
if (convertToLoadAndTrap(MI, Compare, CCUsers)) {
LoadAndTraps += 1;
return true;
}
}
// Try to eliminate Compare by reusing a CC result from MI.
if ((!CCRefs && convertToLoadAndTest(MI)) ||
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ def Arch9NewFeatures : SystemZFeatureList<[
//
//===----------------------------------------------------------------------===//

def FeatureLoadAndTrap : SystemZFeature<
"load-and-trap", "LoadAndTrap",
"Assume that the load-and-trap facility is installed"
>;

def FeatureMiscellaneousExtensions : SystemZFeature<
"miscellaneous-extensions", "MiscellaneousExtensions",
"Assume that the miscellaneous-extensions facility is installed"
Expand All @@ -100,6 +105,7 @@ def FeatureTransactionalExecution : SystemZFeature<
>;

def Arch10NewFeatures : SystemZFeatureList<[
FeatureLoadAndTrap,
FeatureMiscellaneousExtensions,
FeatureProcessorAssist,
FeatureTransactionalExecution
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1678,6 +1678,25 @@ unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode,
return 0;
}

unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const {
if (!STI.hasLoadAndTrap())
return 0;
switch (Opcode) {
case SystemZ::L:
case SystemZ::LY:
return SystemZ::LAT;
case SystemZ::LG:
return SystemZ::LGAT;
case SystemZ::LFH:
return SystemZ::LFHAT;
case SystemZ::LLGF:
return SystemZ::LLGFAT;
case SystemZ::LLGT:
return SystemZ::LLGTAT;
}
return 0;
}

void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned Reg, uint64_t Value) const {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,10 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
SystemZII::FusedCompareType Type,
const MachineInstr *MI = nullptr) const;

// If Opcode is a LOAD opcode for with an associated LOAD AND TRAP
// operation exists, returh the opcode for the latter, otherwise return 0.
unsigned getLoadAndTrap(unsigned Opcode) const;

// Emit code before MBBI in MI to move immediate value Value into
// physical register Reg.
void loadImmediate(MachineBasicBlock &MBB,
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,13 @@ let Predicates = [FeatureLoadAndZeroRightmostByte] in {
(LZRG bdxaddr20only:$src)>;
}

// Load and trap.
let Predicates = [FeatureLoadAndTrap] in {
def LAT : UnaryRXY<"lat", 0xE39F, null_frag, GR32, 4>;
def LFHAT : UnaryRXY<"lfhat", 0xE3C8, null_frag, GRH32, 4>;
def LGAT : UnaryRXY<"lgat", 0xE385, null_frag, GR64, 8>;
}

// Register stores.
let SimpleBDXStore = 1 in {
// Expands to ST, STY or STFH, depending on the choice of register.
Expand Down Expand Up @@ -662,6 +669,12 @@ let Predicates = [FeatureLoadAndZeroRightmostByte] in {
(LLZRGF bdxaddr20only:$src)>;
}

// Load and trap.
let Predicates = [FeatureLoadAndTrap] in {
def LLGFAT : UnaryRXY<"llgfat", 0xE39D, null_frag, GR64, 4>;
def LLGTAT : UnaryRXY<"llgtat", 0xE39C, null_frag, GR64, 4>;
}

//===----------------------------------------------------------------------===//
// Truncations
//===----------------------------------------------------------------------===//
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,9 @@ def : InstRW<[FXa], (instregex "LR(Mux)?$")>;
// Load and zero rightmost byte
def : InstRW<[LSU], (instregex "LZR(F|G)$")>;

// Load and trap
def : InstRW<[FXb, LSU, Lat5], (instregex "L(FH|G)?AT$")>;

// Load and test
def : InstRW<[FXa, LSU, Lat5], (instregex "LT(G)?$")>;
def : InstRW<[FXa], (instregex "LT(G)?R$")>;
Expand Down Expand Up @@ -256,6 +259,9 @@ def : InstRW<[LSU], (instregex "LLG(C|H|F|T|HRL|FRL)$")>;
// Load and zero rightmost byte
def : InstRW<[LSU], (instregex "LLZRGF$")>;

// Load and trap
def : InstRW<[FXb, LSU, Lat5], (instregex "LLG(F|T)?AT$")>;

//===----------------------------------------------------------------------===//
// Truncations
//===----------------------------------------------------------------------===//
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ def : InstRW<[FXU], (instregex "LG(F|H)I$")>;
def : InstRW<[FXU], (instregex "LHI(Mux)?$")>;
def : InstRW<[FXU], (instregex "LR(Mux)?$")>;

// Load and trap
def : InstRW<[FXU, LSU, Lat5], (instregex "L(FH|G)?AT$")>;

// Load and test
def : InstRW<[FXU, LSU, Lat5], (instregex "LT(G)?$")>;
def : InstRW<[FXU], (instregex "LT(G)?R$")>;
Expand Down Expand Up @@ -224,6 +227,9 @@ def : InstRW<[FXU, LSU, Lat5], (instregex "LL(C|H)H$")>;
def : InstRW<[LSU], (instregex "LLHRL$")>;
def : InstRW<[LSU], (instregex "LLG(C|H|F|T|HRL|FRL)$")>;

// Load and trap
def : InstRW<[FXU, LSU, Lat5], (instregex "LLG(F|T)?AT$")>;

//===----------------------------------------------------------------------===//
// Truncations
//===----------------------------------------------------------------------===//
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
HasPopulationCount(false), HasFastSerialization(false),
HasInterlockedAccess1(false), HasMiscellaneousExtensions(false),
HasTransactionalExecution(false), HasProcessorAssist(false),
HasVector(false), HasLoadStoreOnCond2(false),
HasLoadAndTrap(false), HasTransactionalExecution(false),
HasProcessorAssist(false), HasVector(false), HasLoadStoreOnCond2(false),
HasLoadAndZeroRightmostByte(false),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(), FrameLowering() {}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class SystemZSubtarget : public SystemZGenSubtargetInfo {
bool HasFastSerialization;
bool HasInterlockedAccess1;
bool HasMiscellaneousExtensions;
bool HasLoadAndTrap;
bool HasTransactionalExecution;
bool HasProcessorAssist;
bool HasVector;
Expand Down Expand Up @@ -113,6 +114,9 @@ class SystemZSubtarget : public SystemZGenSubtargetInfo {
return HasMiscellaneousExtensions;
}

// Return true if the target has the load-and-trap facility.
bool hasLoadAndTrap() const { return HasLoadAndTrap; }

// Return true if the target has the transactional-execution facility.
bool hasTransactionalExecution() const { return HasTransactionalExecution; }

Expand Down
157 changes: 157 additions & 0 deletions llvm/test/CodeGen/SystemZ/trap-03.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
; Test load-and-trap instructions (LAT/LGAT)
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s

declare void @llvm.trap()

; Check LAT with no displacement.
define i32 @f1(i32 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: lat %r2, 0(%r2)
; CHECK: br %r14
entry:
%val = load i32, i32 *%ptr
%cmp = icmp eq i32 %val, 0
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %entry
tail call void @llvm.trap()
unreachable

if.end: ; preds = %entry
ret i32 %val
}

; Check the high end of the LAT range.
define i32 @f2(i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: lat %r2, 524284(%r2)
; CHECK: br %r14
%ptr = getelementptr i32, i32 *%src, i64 131071
%val = load i32, i32 *%ptr
%cmp = icmp eq i32 %val, 0
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %entry
tail call void @llvm.trap()
unreachable

if.end: ; preds = %entry
ret i32 %val
}

; Check the next word up, which needs separate address logic.
; Other sequences besides this one would be OK.
define i32 @f3(i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: agfi %r2, 524288
; CHECK: lat %r2, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i32, i32 *%src, i64 131072
%val = load i32, i32 *%ptr
%cmp = icmp eq i32 %val, 0
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %entry
tail call void @llvm.trap()
unreachable

if.end: ; preds = %entry
ret i32 %val
}

; Check that LAT allows an index.
define i32 @f4(i64 %src, i64 %index) {
; CHECK-LABEL: f4:
; CHECK: lat %r2, 524287(%r3,%r2)
; CHECK: br %r14
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i32 *
%val = load i32, i32 *%ptr
%cmp = icmp eq i32 %val, 0
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %entry
tail call void @llvm.trap()
unreachable

if.end: ; preds = %entry
ret i32 %val
}

; Check LGAT with no displacement.
define i64 @f5(i64 *%ptr) {
; CHECK-LABEL: f5:
; CHECK: lgat %r2, 0(%r2)
; CHECK: br %r14
entry:
%val = load i64, i64 *%ptr
%cmp = icmp eq i64 %val, 0
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %entry
tail call void @llvm.trap()
unreachable

if.end: ; preds = %entry
ret i64 %val
}

; Check the high end of the LGAT range.
define i64 @f6(i64 *%src) {
; CHECK-LABEL: f6:
; CHECK: lgat %r2, 524280(%r2)
; CHECK: br %r14
%ptr = getelementptr i64, i64 *%src, i64 65535
%val = load i64, i64 *%ptr
%cmp = icmp eq i64 %val, 0
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %entry
tail call void @llvm.trap()
unreachable

if.end: ; preds = %entry
ret i64 %val
}

; Check the next word up, which needs separate address logic.
; Other sequences besides this one would be OK.
define i64 @f7(i64 *%src) {
; CHECK-LABEL: f7:
; CHECK: agfi %r2, 524288
; CHECK: lgat %r2, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i64, i64 *%src, i64 65536
%val = load i64, i64 *%ptr
%cmp = icmp eq i64 %val, 0
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %entry
tail call void @llvm.trap()
unreachable

if.end: ; preds = %entry
ret i64 %val
}

; Check that LGAT allows an index.
define i64 @f8(i64 %src, i64 %index) {
; CHECK-LABEL: f8:
; CHECK: lgat %r2, 524287(%r3,%r2)
; CHECK: br %r14
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i64 *
%val = load i64, i64 *%ptr
%cmp = icmp eq i64 %val, 0
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %entry
tail call void @llvm.trap()
unreachable

if.end: ; preds = %entry
ret i64 %val
}
Loading

0 comments on commit 2d9e3d9

Please sign in to comment.