@@ -5714,6 +5714,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
5714
5714
OPCODE (OC);
5715
5715
OPCODE (XC);
5716
5716
OPCODE (CLC);
5717
+ OPCODE (MEMSET_MVC);
5717
5718
OPCODE (STPCPY);
5718
5719
OPCODE (STRCMP);
5719
5720
OPCODE (SEARCH_STRING);
@@ -7860,8 +7861,10 @@ MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
7860
7861
return MBB;
7861
7862
}
7862
7863
7863
- MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper (
7864
- MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
7864
+ MachineBasicBlock *
7865
+ SystemZTargetLowering::emitMemMemWrapper (MachineInstr &MI,
7866
+ MachineBasicBlock *MBB,
7867
+ unsigned Opcode, bool IsMemset) const {
7865
7868
MachineFunction &MF = *MBB->getParent ();
7866
7869
const SystemZInstrInfo *TII =
7867
7870
static_cast <const SystemZInstrInfo *>(Subtarget.getInstrInfo ());
@@ -7870,18 +7873,64 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
7870
7873
7871
7874
MachineOperand DestBase = earlyUseOperand (MI.getOperand (0 ));
7872
7875
uint64_t DestDisp = MI.getOperand (1 ).getImm ();
7873
- MachineOperand SrcBase = earlyUseOperand (MI.getOperand (2 ));
7874
- uint64_t SrcDisp = MI.getOperand (3 ).getImm ();
7875
- MachineOperand &LengthMO = MI.getOperand (4 );
7876
+ MachineOperand SrcBase = MachineOperand::CreateReg (0U , false );
7877
+ uint64_t SrcDisp;
7878
+
7879
+ // Fold the displacement Disp if it is out of range.
7880
+ auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
7881
+ if (!isUInt<12 >(Disp)) {
7882
+ Register Reg = MRI.createVirtualRegister (&SystemZ::ADDR64BitRegClass);
7883
+ unsigned Opcode = TII->getOpcodeForOffset (SystemZ::LA, Disp);
7884
+ BuildMI (*MI.getParent (), MI, MI.getDebugLoc (), TII->get (Opcode), Reg)
7885
+ .add (Base).addImm (Disp).addReg (0 );
7886
+ Base = MachineOperand::CreateReg (Reg, false );
7887
+ Disp = 0 ;
7888
+ }
7889
+ };
7890
+
7891
+ if (!IsMemset) {
7892
+ SrcBase = earlyUseOperand (MI.getOperand (2 ));
7893
+ SrcDisp = MI.getOperand (3 ).getImm ();
7894
+ } else {
7895
+ SrcBase = DestBase;
7896
+ SrcDisp = DestDisp++;
7897
+ foldDisplIfNeeded (DestBase, DestDisp);
7898
+ }
7899
+
7900
+ MachineOperand &LengthMO = MI.getOperand (IsMemset ? 2 : 4 );
7876
7901
bool IsImmForm = LengthMO.isImm ();
7877
7902
bool IsRegForm = !IsImmForm;
7878
7903
7904
+ // Build and insert one Opcode of Length, with special treatment for memset.
7905
+ auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
7906
+ MachineBasicBlock::iterator InsPos,
7907
+ MachineOperand DBase, uint64_t DDisp,
7908
+ MachineOperand SBase, uint64_t SDisp,
7909
+ unsigned Length) -> void {
7910
+ assert (Length > 0 && Length <= 256 && " Building memory op with bad length." );
7911
+ if (IsMemset) {
7912
+ MachineOperand ByteMO = earlyUseOperand (MI.getOperand (3 ));
7913
+ if (ByteMO.isImm ())
7914
+ BuildMI (*InsMBB, InsPos, DL, TII->get (SystemZ::MVI))
7915
+ .add (SBase).addImm (SDisp).add (ByteMO);
7916
+ else
7917
+ BuildMI (*InsMBB, InsPos, DL, TII->get (SystemZ::STC))
7918
+ .add (ByteMO).add (SBase).addImm (SDisp).addReg (0 );
7919
+ if (--Length == 0 )
7920
+ return ;
7921
+ }
7922
+ BuildMI (*MBB, InsPos, DL, TII->get (Opcode))
7923
+ .add (DBase).addImm (DDisp).addImm (Length)
7924
+ .add (SBase).addImm (SDisp)
7925
+ .setMemRefs (MI.memoperands ());
7926
+ };
7927
+
7879
7928
bool NeedsLoop = false ;
7880
7929
uint64_t ImmLength = 0 ;
7881
- Register LenMinus1Reg = SystemZ::NoRegister;
7930
+ Register LenAdjReg = SystemZ::NoRegister;
7882
7931
if (IsImmForm) {
7883
7932
ImmLength = LengthMO.getImm ();
7884
- ImmLength++ ; // Add back the '1' subtracted originally .
7933
+ ImmLength += IsMemset ? 2 : 1 ; // Add back the subtracted adjustment .
7885
7934
if (ImmLength == 0 ) {
7886
7935
MI.eraseFromParent ();
7887
7936
return MBB;
@@ -7905,7 +7954,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
7905
7954
NeedsLoop = true ;
7906
7955
} else {
7907
7956
NeedsLoop = true ;
7908
- LenMinus1Reg = LengthMO.getReg ();
7957
+ LenAdjReg = LengthMO.getReg ();
7909
7958
}
7910
7959
7911
7960
// When generating more than one CLC, all but the last will need to
@@ -7923,17 +7972,17 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
7923
7972
ImmLength &= 255 ;
7924
7973
} else {
7925
7974
BuildMI (*MBB, MI, DL, TII->get (SystemZ::SRLG), StartCountReg)
7926
- .addReg (LenMinus1Reg )
7975
+ .addReg (LenAdjReg )
7927
7976
.addReg (0 )
7928
7977
.addImm (8 );
7929
7978
}
7930
7979
7980
+ bool HaveSingleBase = DestBase.isIdenticalTo (SrcBase);
7931
7981
auto loadZeroAddress = [&]() -> MachineOperand {
7932
7982
Register Reg = MRI.createVirtualRegister (&SystemZ::ADDR64BitRegClass);
7933
7983
BuildMI (*MBB, MI, DL, TII->get (SystemZ::LGHI), Reg).addImm (0 );
7934
7984
return MachineOperand::CreateReg (Reg, false );
7935
7985
};
7936
- bool HaveSingleBase = DestBase.isIdenticalTo (SrcBase);
7937
7986
if (DestBase.isReg () && DestBase.getReg () == SystemZ::NoRegister)
7938
7987
DestBase = loadZeroAddress ();
7939
7988
if (SrcBase.isReg () && SrcBase.getReg () == SystemZ::NoRegister)
@@ -7968,14 +8017,41 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
7968
8017
DoneMBB = SystemZ::emitBlockAfter (NextMBB);
7969
8018
7970
8019
// MBB:
7971
- // # Jump to AllDoneMBB if LenMinus1Reg is -1 , or fall thru to StartMBB.
8020
+ // # Jump to AllDoneMBB if LenAdjReg means 0 , or fall thru to StartMBB.
7972
8021
BuildMI (MBB, DL, TII->get (SystemZ::CGHI))
7973
- .addReg (LenMinus1Reg ).addImm (-1 );
8022
+ .addReg (LenAdjReg ).addImm (IsMemset ? - 2 : -1 );
7974
8023
BuildMI (MBB, DL, TII->get (SystemZ::BRC))
7975
8024
.addImm (SystemZ::CCMASK_ICMP).addImm (SystemZ::CCMASK_CMP_EQ)
7976
8025
.addMBB (AllDoneMBB);
7977
8026
MBB->addSuccessor (AllDoneMBB);
7978
- MBB->addSuccessor (StartMBB);
8027
+ if (!IsMemset)
8028
+ MBB->addSuccessor (StartMBB);
8029
+ else {
8030
+ // MemsetOneCheckMBB:
8031
+ // # Jump to MemsetOneMBB for a memset of length 1, or
8032
+ // # fall thru to StartMBB.
8033
+ MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter (MBB);
8034
+ MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter (&*MF.rbegin ());
8035
+ MBB->addSuccessor (MemsetOneCheckMBB);
8036
+ MBB = MemsetOneCheckMBB;
8037
+ BuildMI (MBB, DL, TII->get (SystemZ::CGHI))
8038
+ .addReg (LenAdjReg).addImm (-1 );
8039
+ BuildMI (MBB, DL, TII->get (SystemZ::BRC))
8040
+ .addImm (SystemZ::CCMASK_ICMP).addImm (SystemZ::CCMASK_CMP_EQ)
8041
+ .addMBB (MemsetOneMBB);
8042
+ MBB->addSuccessor (MemsetOneMBB, {10 , 100 });
8043
+ MBB->addSuccessor (StartMBB, {90 , 100 });
8044
+
8045
+ // MemsetOneMBB:
8046
+ // # Jump back to AllDoneMBB after a single MVI or STC.
8047
+ MBB = MemsetOneMBB;
8048
+ insertMemMemOp (MBB, MBB->end (),
8049
+ MachineOperand::CreateReg (StartDestReg, false ), DestDisp,
8050
+ MachineOperand::CreateReg (StartSrcReg, false ), SrcDisp,
8051
+ 1 );
8052
+ BuildMI (MBB, DL, TII->get (SystemZ::J)).addMBB (AllDoneMBB);
8053
+ MBB->addSuccessor (AllDoneMBB);
8054
+ }
7979
8055
7980
8056
// StartMBB:
7981
8057
// # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
@@ -8032,10 +8108,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
8032
8108
if (Opcode == SystemZ::MVC)
8033
8109
BuildMI (MBB, DL, TII->get (SystemZ::PFD))
8034
8110
.addImm (SystemZ::PFD_WRITE)
8035
- .addReg (ThisDestReg).addImm (DestDisp + 768 ).addReg (0 );
8036
- BuildMI (MBB, DL, TII-> get (Opcode))
8037
- . addReg (ThisDestReg). addImm ( DestDisp). addImm ( 256 )
8038
- . addReg (ThisSrcReg). addImm ( SrcDisp);
8111
+ .addReg (ThisDestReg).addImm (DestDisp - IsMemset + 768 ).addReg (0 );
8112
+ insertMemMemOp (MBB, MBB-> end (),
8113
+ MachineOperand::CreateReg (ThisDestReg, false ), DestDisp,
8114
+ MachineOperand::CreateReg (ThisSrcReg, false ), SrcDisp, 256 );
8039
8115
if (EndMBB) {
8040
8116
BuildMI (MBB, DL, TII->get (SystemZ::BRC))
8041
8117
.addImm (SystemZ::CCMASK_ICMP).addImm (SystemZ::CCMASK_CMP_NE)
@@ -8075,7 +8151,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
8075
8151
// # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
8076
8152
// # Use EXecute Relative Long for the remainder of the bytes. The target
8077
8153
// instruction of the EXRL will have a length field of 1 since 0 is an
8078
- // illegal value. The number of bytes processed becomes (%LenMinus1Reg &
8154
+ // illegal value. The number of bytes processed becomes (%LenAdjReg &
8079
8155
// 0xff) + 1.
8080
8156
// # Fall through to AllDoneMBB.
8081
8157
Register RemSrcReg = MRI.createVirtualRegister (&SystemZ::ADDR64BitRegClass);
@@ -8088,10 +8164,14 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
8088
8164
BuildMI (MBB, DL, TII->get (SystemZ::PHI), RemSrcReg)
8089
8165
.addReg (StartSrcReg).addMBB (StartMBB)
8090
8166
.addReg (NextSrcReg).addMBB (NextMBB);
8167
+ if (IsMemset)
8168
+ insertMemMemOp (MBB, MBB->end (),
8169
+ MachineOperand::CreateReg (RemDestReg, false ), DestDisp,
8170
+ MachineOperand::CreateReg (RemSrcReg, false ), SrcDisp, 1 );
8091
8171
MachineInstrBuilder EXRL_MIB =
8092
8172
BuildMI (MBB, DL, TII->get (SystemZ::EXRL_Pseudo))
8093
8173
.addImm (Opcode)
8094
- .addReg (LenMinus1Reg )
8174
+ .addReg (LenAdjReg )
8095
8175
.addReg (RemDestReg).addImm (DestDisp)
8096
8176
.addReg (RemSrcReg).addImm (SrcDisp);
8097
8177
MBB->addSuccessor (AllDoneMBB);
@@ -8107,32 +8187,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
8107
8187
while (ImmLength > 0 ) {
8108
8188
uint64_t ThisLength = std::min (ImmLength, uint64_t (256 ));
8109
8189
// The previous iteration might have created out-of-range displacements.
8110
- // Apply them using LAY if so.
8111
- if (!isUInt<12 >(DestDisp)) {
8112
- Register Reg = MRI.createVirtualRegister (&SystemZ::ADDR64BitRegClass);
8113
- BuildMI (*MBB, MI, MI.getDebugLoc (), TII->get (SystemZ::LAY), Reg)
8114
- .add (DestBase)
8115
- .addImm (DestDisp)
8116
- .addReg (0 );
8117
- DestBase = MachineOperand::CreateReg (Reg, false );
8118
- DestDisp = 0 ;
8119
- }
8120
- if (!isUInt<12 >(SrcDisp)) {
8121
- Register Reg = MRI.createVirtualRegister (&SystemZ::ADDR64BitRegClass);
8122
- BuildMI (*MBB, MI, MI.getDebugLoc (), TII->get (SystemZ::LAY), Reg)
8123
- .add (SrcBase)
8124
- .addImm (SrcDisp)
8125
- .addReg (0 );
8126
- SrcBase = MachineOperand::CreateReg (Reg, false );
8127
- SrcDisp = 0 ;
8128
- }
8129
- BuildMI (*MBB, MI, DL, TII->get (Opcode))
8130
- .add (DestBase)
8131
- .addImm (DestDisp)
8132
- .addImm (ThisLength)
8133
- .add (SrcBase)
8134
- .addImm (SrcDisp)
8135
- .setMemRefs (MI.memoperands ());
8190
+ // Apply them using LA/LAY if so.
8191
+ foldDisplIfNeeded (DestBase, DestDisp);
8192
+ foldDisplIfNeeded (SrcBase, SrcDisp);
8193
+ insertMemMemOp (MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
8136
8194
DestDisp += ThisLength;
8137
8195
SrcDisp += ThisLength;
8138
8196
ImmLength -= ThisLength;
@@ -8630,6 +8688,11 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
8630
8688
case SystemZ::CLCImm:
8631
8689
case SystemZ::CLCReg:
8632
8690
return emitMemMemWrapper (MI, MBB, SystemZ::CLC);
8691
+ case SystemZ::MemsetImmImm:
8692
+ case SystemZ::MemsetImmReg:
8693
+ case SystemZ::MemsetRegImm:
8694
+ case SystemZ::MemsetRegReg:
8695
+ return emitMemMemWrapper (MI, MBB, SystemZ::MVC, true /* IsMemset*/ );
8633
8696
case SystemZ::CLSTLoop:
8634
8697
return emitStringWrapper (MI, MBB, SystemZ::CLST);
8635
8698
case SystemZ::MVSTLoop:
0 commit comments