Skip to content

Commit

Permalink
[SystemZ] Temporarily revert memcmp and memcpy patches
Browse files Browse the repository at this point in the history
Seem to cause test failures in compiler-rt.

Revert "[SystemZ] Implement memcmp of variable length with CLC."
This reverts commit 7a4e9a0.

Revert "[SystemZ] Implement memcpy of variable length with MVC."
This reverts commit c6c13c5.
  • Loading branch information
JonPsson committed Oct 6, 2021
1 parent d2b9d0f commit 3562076
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 105 deletions.
33 changes: 12 additions & 21 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Expand Up @@ -7836,11 +7836,9 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(

// When generating more than one CLC, all but the last will need to
// branch to the end when a difference is found.
MachineBasicBlock *EndMBB =
(Opcode == SystemZ::CLC &&
(ImmLength > 256 || LenMinus1Reg != SystemZ::NoRegister)
? SystemZ::splitBlockAfter(MI, MBB)
: nullptr);
MachineBasicBlock *EndMBB = (ImmLength > 256 && Opcode == SystemZ::CLC
? SystemZ::splitBlockAfter(MI, MBB)
: nullptr);

// Check for the loop form, in which operand 5 is the trip count.
if (MI.getNumExplicitOperands() > 5) {
Expand Down Expand Up @@ -7882,8 +7880,8 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
StartMBB = SystemZ::emitBlockAfter(MBB);
LoopMBB = SystemZ::emitBlockAfter(StartMBB);
NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
DoneMBB = SystemZ::emitBlockAfter(NextMBB);
NextMBB = LoopMBB;
DoneMBB = SystemZ::emitBlockAfter(LoopMBB);

// MBB:
// # Jump to AllDoneMBB if LenMinus1Reg is -1, or fall thru to StartMBB.
Expand Down Expand Up @@ -8002,24 +8000,19 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
: MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
.addReg(StartDestReg).addMBB(StartMBB)
.addReg(NextDestReg).addMBB(NextMBB);
.addReg(NextDestReg).addMBB(LoopMBB);
if (!HaveSingleBase)
BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
.addReg(StartSrcReg).addMBB(StartMBB)
.addReg(NextSrcReg).addMBB(NextMBB);
.addReg(NextSrcReg).addMBB(LoopMBB);
MRI.constrainRegClass(LenMinus1Reg, &SystemZ::ADDR64BitRegClass);
MachineInstrBuilder EXRL_MIB =
BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
.addImm(Opcode)
.addReg(LenMinus1Reg)
.addReg(RemDestReg).addImm(DestDisp)
.addReg(RemSrcReg).addImm(SrcDisp);
BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
.addImm(Opcode)
.addReg(LenMinus1Reg)
.addReg(RemDestReg).addImm(DestDisp)
.addReg(RemSrcReg).addImm(SrcDisp);
MBB->addSuccessor(AllDoneMBB);
MBB = AllDoneMBB;
if (EndMBB) {
EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
MBB->addLiveIn(SystemZ::CC);
}
}
}

Expand Down Expand Up @@ -8539,7 +8532,6 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
return emitAtomicCmpSwapW(MI, MBB);
case SystemZ::MVCSequence:
case SystemZ::MVCLoop:
case SystemZ::MVCLoopVarLen:
return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
case SystemZ::NCSequence:
case SystemZ::NCLoop:
Expand All @@ -8553,7 +8545,6 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
return emitMemMemWrapper(MI, MBB, SystemZ::XC);
case SystemZ::CLCSequence:
case SystemZ::CLCLoop:
case SystemZ::CLCLoopVarLen:
return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
case SystemZ::CLSTLoop:
return emitStringWrapper(MI, MBB, SystemZ::CLST);
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/SystemZ/SystemZInstrFormats.td
Expand Up @@ -5365,10 +5365,6 @@ multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
imm64:$length, GR64:$count256),
[(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length, GR64:$count256))]>;
def LoopVarLen : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
GR64:$length, GR64:$count256),
[(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src,
GR64:$length, GR64:$count256))]>;
}
}

Expand Down
43 changes: 16 additions & 27 deletions llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
Expand Up @@ -45,20 +45,6 @@ static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence,
DAG.getConstant(Size, DL, PtrVT));
}

static SDValue emitMemMemVarLen(SelectionDAG &DAG, const SDLoc &DL,
unsigned Loop, SDValue Chain, SDValue Dst,
SDValue Src, SDValue Size) {
SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64,
DAG.getZExtOrTrunc(Size, DL, MVT::i64),
DAG.getConstant(-1, DL, MVT::i64));
SDValue TripC = DAG.getNode(ISD::SRL, DL, MVT::i64, LenMinus1,
DAG.getConstant(8, DL, MVT::i64));
SDVTList VTs = Loop == SystemZISD::CLC_LOOP
? DAG.getVTList(MVT::i32, MVT::Other)
: DAG.getVTList(MVT::Other);
return DAG.getNode(Loop, DL, VTs, Chain, Dst, Src, LenMinus1, TripC);
}

SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline,
Expand All @@ -69,8 +55,7 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
if (auto *CSize = dyn_cast<ConstantSDNode>(Size))
return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
Chain, Dst, Src, CSize->getZExtValue());

return emitMemMemVarLen(DAG, DL, SystemZISD::MVC_LOOP, Chain, Dst, Src, Size);
return SDValue();
}

// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by
Expand Down Expand Up @@ -155,10 +140,16 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
}

// Variable length
if (CByte && CByte->getZExtValue() == 0)
if (CByte && CByte->getZExtValue() == 0) {
// Handle the special case of a variable length memset of 0 with XC.
return emitMemMemVarLen(DAG, DL, SystemZISD::XC_LOOP, Chain, Dst, Dst, Size);

SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64,
DAG.getZExtOrTrunc(Size, DL, MVT::i64),
DAG.getConstant(-1, DL, MVT::i64));
SDValue TripC = DAG.getNode(ISD::SRL, DL, MVT::i64, LenMinus1,
DAG.getConstant(8, DL, MVT::i64));
return DAG.getNode(SystemZISD::XC_LOOP, DL, MVT::Other, Chain, Dst, Dst,
LenMinus1, TripC);
}
return SDValue();
}

Expand Down Expand Up @@ -202,17 +193,15 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1,
SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const {
SDValue CCReg;
// Swap operands to invert CC == 1 vs. CC == 2 cases.
if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
uint64_t Bytes = CSize->getZExtValue();
assert(Bytes > 0 && "Caller should have handled 0-size case");
CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes);
} else
CCReg = emitMemMemVarLen(DAG, DL, SystemZISD::CLC_LOOP, Chain, Src2, Src1,
Size);
Chain = CCReg.getValue(1);
return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
// Swap operands to invert CC == 1 vs. CC == 2 cases.
SDValue CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes);
Chain = CCReg.getValue(1);
return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
}
return std::make_pair(SDValue(), SDValue());
}

std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr(
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/SystemZ/loop-03.ll
Expand Up @@ -19,7 +19,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r

define void @fun0(%0*) {
; CHECK-LABEL: .LBB0_4
; CHECK: => This Inner Loop Header
; CHECK: => This Inner Loop Header: Depth=2
; CHECK-NOT: 16-byte Folded Spill
; CHECK-NOT: 16-byte Folded Reload

Expand Down
27 changes: 0 additions & 27 deletions llvm/test/CodeGen/SystemZ/memcmp-01.ll
Expand Up @@ -219,30 +219,3 @@ define i32 @f13(i8 *%src1, i8 *%src2) {
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 769)
ret i32 %res
}

define i32 @f14(i8 *%src1, i8 *%src2, i64 %Len) {
; CHECK-LABEL: f14:
; CHECK: # %bb.0:
; CHECK-NEXT: aghi %r4, -1
; CHECK-NEXT: cghi %r4, -1
; CHECK-NEXT: je .LBB13_5
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: srlg %r0, %r4, 8
; CHECK-NEXT: cgije %r0, 0, .LBB13_4
; CHECK-NEXT: .LBB13_2: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: clc 0(256,%r3), 0(%r2)
; CHECK-NEXT: jlh .LBB13_5
; CHECK-NEXT: # %bb.3: # in Loop: Header=BB13_2 Depth=1
; CHECK-NEXT: la %r3, 256(%r3)
; CHECK-NEXT: la %r2, 256(%r2)
; CHECK-NEXT: brctg %r0, .LBB13_2
; CHECK-NEXT: .LBB13_4:
; CHECK-NEXT: exrl %r4, .Ltmp0
; CHECK-NEXT: .LBB13_5:
; CHECK-NEXT: ipm %r2
; CHECK-NEXT: sll %r2, 2
; CHECK-NEXT: sra %r2, 30
; CHECK-NEXT: br %r14
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 %Len)
ret i32 %res
}
25 changes: 0 additions & 25 deletions llvm/test/CodeGen/SystemZ/memcpy-01.ll
Expand Up @@ -217,28 +217,3 @@ define void @f16() {
call void @foo(i8* %dest, i8* %src)
ret void
}

; Test a variable length loop.
define void @f17(i8* %dest, i8* %src, i64 %Len) {
; CHECK-LABEL: f17:
; CHECK: # %bb.0:
; CHECK-NEXT: aghi %r4, -1
; CHECK-NEXT: cgibe %r4, -1, 0(%r14)
; CHECK-NEXT: .LBB16_1:
; CHECK-NEXT: srlg %r0, %r4, 8
; CHECK-NEXT: cgije %r0, 0, .LBB16_3
; CHECK-NEXT: .LBB16_2: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: pfd 2, 768(%r2)
; CHECK-NEXT: mvc 0(256,%r2), 0(%r3)
; CHECK-NEXT: la %r2, 256(%r2)
; CHECK-NEXT: la %r3, 256(%r3)
; CHECK-NEXT: brctg %r0, .LBB16_2
; CHECK-NEXT: .LBB16_3:
; CHECK-NEXT: exrl %r4, .Ltmp0
; CHECK-NEXT: br %r14
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 %Len, i1 false)
ret void
}

; CHECK: .Ltmp0:
; CHECK-NEXT: mvc 0(1,%r2), 0(%r3)
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/SystemZ/tail-call-mem-intrinsics.ll
@@ -1,5 +1,13 @@
; RUN: llc -mtriple=s390x-linux-gnu < %s | FileCheck %s

; CHECK-LABEL: tail_memcpy:
; CHECK: jg memcpy
define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false)
ret void
}

; CHECK-LABEL: tail_memmove:
; CHECK: jg memmove
define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
Expand Down

0 comments on commit 3562076

Please sign in to comment.