-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SystemZ] Simplify handling of AtomicRMW instructions. #74789
Conversation
You can test this locally with the following command:git-clang-format --diff 0808be47b8fbf0307d0b6f2eb45ba9bfe1b3ae65 08b7f348b361f13db5f5cd6bd6888f4448fbc4df -- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp llvm/lib/Target/SystemZ/SystemZISelLowering.h View the diff from clang-format here.diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index f79787d4ba..4fc97150c3 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -4367,20 +4367,19 @@ static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL,
EVT WideVT = MVT::i32;
// Get the address of the containing word.
- AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
- DAG.getConstant(-4, DL, PtrVT));
+ AlignedAddr =
+ DAG.getNode(ISD::AND, DL, PtrVT, Addr, DAG.getConstant(-4, DL, PtrVT));
// Get the number of bits that the word must be rotated left in order
// to bring the field to the top bits of a GR32.
- BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
- DAG.getConstant(3, DL, PtrVT));
+ BitShift =
+ DAG.getNode(ISD::SHL, DL, PtrVT, Addr, DAG.getConstant(3, DL, PtrVT));
BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
// Get the complementing shift amount, for rotating a field in the top
// bits back to its proper position.
NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
DAG.getConstant(0, DL, WideVT), BitShift);
-
}
// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
@@ -4457,10 +4456,9 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
SDValue Src2 = Node->getVal();
SDLoc DL(Src2);
SDValue NegSrc2 =
- DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
- return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
- Node->getChain(), Node->getBasePtr(), NegSrc2,
- Node->getMemOperand());
+ DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, Node->getChain(),
+ Node->getBasePtr(), NegSrc2, Node->getMemOperand());
}
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
@@ -7968,14 +7966,14 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
DebugLoc DL = MI.getDebugLoc();
// Get the right opcodes for the displacement.
- unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
+ unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
assert(LOpcode && CSOpcode && "Displacement out of range");
// Create virtual registers for temporary results.
- Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
- Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
- Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
@@ -8005,14 +8003,17 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
.addReg(OrigVal).addMBB(StartMBB)
.addReg(Dest).addMBB(LoopMBB);
BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
- .addReg(OldVal).addReg(BitShift).addImm(0);
+ .addReg(OldVal)
+ .addReg(BitShift)
+ .addImm(0);
if (Invert) {
// Perform the operation normally and then invert every bit of the field.
Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
// XILF with the upper BitSize bits set.
BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
- .addReg(Tmp).addImm(-1U << (32 - BitSize));
+ .addReg(Tmp)
+ .addImm(-1U << (32 - BitSize));
} else if (BinOpcode)
// A simply binary operation.
BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
@@ -8025,7 +8026,9 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
.addReg(RotatedOldVal).addReg(Src2.getReg())
.addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
- .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
+ .addReg(RotatedNewVal)
+ .addReg(NegBitShift)
+ .addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
.addReg(OldVal)
.addReg(NewVal)
@@ -8063,14 +8066,14 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
DebugLoc DL = MI.getDebugLoc();
// Get the right opcodes for the displacement.
- unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
+ unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
assert(LOpcode && CSOpcode && "Displacement out of range");
// Create virtual registers for temporary results.
- Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
- Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
- Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
@@ -8100,7 +8103,9 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
.addReg(OrigVal).addMBB(StartMBB)
.addReg(Dest).addMBB(UpdateMBB);
BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
- .addReg(OldVal).addReg(BitShift).addImm(0);
+ .addReg(OldVal)
+ .addReg(BitShift)
+ .addImm(0);
BuildMI(MBB, DL, TII->get(CompareOpcode))
.addReg(RotatedOldVal).addReg(Src2);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
@@ -8113,8 +8118,11 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
// # fall through to UpdateMBB
MBB = UseAltMBB;
BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
- .addReg(RotatedOldVal).addReg(Src2)
- .addImm(32).addImm(31 + BitSize).addImm(0);
+ .addReg(RotatedOldVal)
+ .addReg(Src2)
+ .addImm(32)
+ .addImm(31 + BitSize)
+ .addImm(0);
MBB->addSuccessor(UpdateMBB);
// UpdateMBB:
@@ -8129,7 +8137,9 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
.addReg(RotatedOldVal).addMBB(LoopMBB)
.addReg(RotatedAltVal).addMBB(UseAltMBB);
BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
- .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
+ .addReg(RotatedNewVal)
+ .addReg(NegBitShift)
+ .addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
.addReg(OldVal)
.addReg(NewVal)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See inline comments for possible follow-on actions. Otherwise LGTM.
RMW->getType()->isIntegerTy(128)) | ||
? AtomicExpansionKind::CmpXChg | ||
: AtomicExpansionKind::None; | ||
// Don't expand subword operations as they require special treatment. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I understand the AtomicExpand pass does have support for subword operations - not sure if this is sufficient for what we need. In any case, that could be looked at as a follow-on.
.addReg(Tmp).addImm(-1U << (32 - BitSize)); | ||
else { | ||
// Use LCGR and add -1 to the result, which is more compact than | ||
// an XILF, XILH pair. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As an aside, if LCGR/AGHI is really a more efficient way to implement a 64-bit NOT, we should do this generally (not just special-cased for inside atomic ops. This can be looked at as a follow-on.
Let the AtomicExpand pass do more of the job of expanding AtomicRMWInst:s in order to simplify the handling in the backend.
The only cases that the backend needs to handle itself are those of subword size (8/16 bits) and those directly corresponding to a target instruction.
Tests updated with some codegen changes for z10 that are hopefully ok. The min/max and nand tests are not run for later cpus and should also be relevant for them. One thing noted is that the IfConverter now fails to produce a CondReturn in the min/max tests. Some tests that seemed to test different immediate instructions now get a different codegen which should be correct,
even though perhaps some of the testing done is now redundant.