Skip to content

Commit

Permalink
[X86] Use lock or/and/xor for cases that we only care about the EFLAGS
Browse files Browse the repository at this point in the history
This is a follow up of D137711 to fix the reset of #58685.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D138294
  • Loading branch information
phoebewang committed Nov 20, 2022
1 parent 27998d9 commit 510e5fb
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 68 deletions.
6 changes: 6 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsX86.td
Expand Up @@ -78,6 +78,12 @@ let TargetPrefix = "x86" in {
[ImmArg<ArgIndex<2>>]>;
def int_x86_atomic_sub_cc : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
[ImmArg<ArgIndex<2>>]>;
def int_x86_atomic_or_cc : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
[ImmArg<ArgIndex<2>>]>;
def int_x86_atomic_and_cc : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
[ImmArg<ArgIndex<2>>]>;
def int_x86_atomic_xor_cc : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
[ImmArg<ArgIndex<2>>]>;
}

// Read Processor Register.
Expand Down
47 changes: 45 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -5661,7 +5661,10 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::x86_axor32:
case Intrinsic::x86_axor64:
case Intrinsic::x86_atomic_add_cc:
case Intrinsic::x86_atomic_sub_cc: {
case Intrinsic::x86_atomic_sub_cc:
case Intrinsic::x86_atomic_or_cc:
case Intrinsic::x86_atomic_and_cc:
case Intrinsic::x86_atomic_xor_cc: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.ptrVal = I.getArgOperand(0);
unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();
Expand Down Expand Up @@ -28385,7 +28388,10 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
{Chain, Op1, Op2}, VT, MMO);
}
case Intrinsic::x86_atomic_add_cc:
case Intrinsic::x86_atomic_sub_cc: {
case Intrinsic::x86_atomic_sub_cc:
case Intrinsic::x86_atomic_or_cc:
case Intrinsic::x86_atomic_and_cc:
case Intrinsic::x86_atomic_xor_cc: {
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
SDValue Op1 = Op.getOperand(2);
Expand All @@ -28402,6 +28408,15 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
case Intrinsic::x86_atomic_sub_cc:
Opc = X86ISD::LSUB;
break;
case Intrinsic::x86_atomic_or_cc:
Opc = X86ISD::LOR;
break;
case Intrinsic::x86_atomic_and_cc:
Opc = X86ISD::LAND;
break;
case Intrinsic::x86_atomic_xor_cc:
Opc = X86ISD::LXOR;
break;
}
MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();
SDValue LockArith =
Expand Down Expand Up @@ -31417,6 +31432,23 @@ static bool shouldExpandCmpArithRMWInIR(AtomicRMWInst *AI) {
return Pred == CmpInst::ICMP_SLT;
return false;
}
if (Opc == AtomicRMWInst::Or) {
if (match(I, m_OneUse(m_c_Or(m_Specific(Op), m_Value()))) &&
match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLT;
}
if (Opc == AtomicRMWInst::And) {
if (match(I, m_OneUse(m_c_And(m_Specific(Op), m_Value()))) &&
match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLT;
}
if (Opc == AtomicRMWInst::Xor) {
if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value())))
return Pred == CmpInst::ICMP_EQ;
if (match(I, m_OneUse(m_c_Xor(m_Specific(Op), m_Value()))) &&
match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
return Pred == CmpInst::ICMP_SLT;
}

return false;
}
Expand Down Expand Up @@ -31446,6 +31478,15 @@ void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
case AtomicRMWInst::Sub:
IID = Intrinsic::x86_atomic_sub_cc;
break;
case AtomicRMWInst::Or:
IID = Intrinsic::x86_atomic_or_cc;
break;
case AtomicRMWInst::And:
IID = Intrinsic::x86_atomic_and_cc;
break;
case AtomicRMWInst::Xor:
IID = Intrinsic::x86_atomic_xor_cc;
break;
}
Function *CmpArith =
Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType());
Expand Down Expand Up @@ -31487,6 +31528,8 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::Or:
case AtomicRMWInst::And:
case AtomicRMWInst::Xor:
if (shouldExpandCmpArithRMWInIR(AI))
return AtomicExpansionKind::CmpArithIntrinsic;
return shouldExpandLogicAtomicRMWInIR(AI);
case AtomicRMWInst::Nand:
case AtomicRMWInst::Max:
Expand Down
75 changes: 9 additions & 66 deletions llvm/test/CodeGen/X86/pr58685.ll
Expand Up @@ -51,16 +51,7 @@ define i1 @lock_sub_sets(ptr %0, i32 %1) nounwind {
define i1 @lock_or_sete(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_or_sete:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB4_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: orl %esi, %ecx
; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
; CHECK-NEXT: jne .LBB4_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: orl %esi, %eax
; CHECK-NEXT: lock orl %esi, (%rdi)
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%3 = atomicrmw or ptr %0, i32 %1 seq_cst, align 4
Expand All @@ -72,18 +63,8 @@ define i1 @lock_or_sete(ptr %0, i32 %1) nounwind {
define i1 @lock_or_sets(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_or_sets:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB5_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: orl %esi, %ecx
; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
; CHECK-NEXT: jne .LBB5_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: orl %esi, %eax
; CHECK-NEXT: shrl $31, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: lock orl %esi, (%rdi)
; CHECK-NEXT: sets %al
; CHECK-NEXT: retq
%3 = atomicrmw or ptr %0, i32 %1 seq_cst, align 4
%4 = or i32 %3, %1
Expand All @@ -94,16 +75,7 @@ define i1 @lock_or_sets(ptr %0, i32 %1) nounwind {
define i1 @lock_and_sete(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_and_sete:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB6_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: andl %esi, %ecx
; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
; CHECK-NEXT: jne .LBB6_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: testl %esi, %eax
; CHECK-NEXT: lock andl %esi, (%rdi)
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%3 = atomicrmw and ptr %0, i32 %1 seq_cst, align 4
Expand All @@ -115,18 +87,8 @@ define i1 @lock_and_sete(ptr %0, i32 %1) nounwind {
define i1 @lock_and_sets(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_and_sets:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB7_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: andl %esi, %ecx
; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
; CHECK-NEXT: jne .LBB7_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: shrl $31, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: lock andl %esi, (%rdi)
; CHECK-NEXT: sets %al
; CHECK-NEXT: retq
%3 = atomicrmw and ptr %0, i32 %1 seq_cst, align 4
%4 = and i32 %3, %1
Expand All @@ -137,16 +99,7 @@ define i1 @lock_and_sets(ptr %0, i32 %1) nounwind {
define i1 @lock_xor_sete(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_xor_sete:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB8_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: xorl %esi, %ecx
; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
; CHECK-NEXT: jne .LBB8_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: cmpl %esi, %eax
; CHECK-NEXT: lock xorl %esi, (%rdi)
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%3 = atomicrmw xor ptr %0, i32 %1 seq_cst, align 4
Expand All @@ -157,18 +110,8 @@ define i1 @lock_xor_sete(ptr %0, i32 %1) nounwind {
define i1 @lock_xor_sets(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_xor_sets:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB9_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: xorl %esi, %ecx
; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
; CHECK-NEXT: jne .LBB9_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: shrl $31, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: lock xorl %esi, (%rdi)
; CHECK-NEXT: sets %al
; CHECK-NEXT: retq
%3 = atomicrmw xor ptr %0, i32 %1 seq_cst, align 4
%4 = xor i32 %3, %1
Expand Down

0 comments on commit 510e5fb

Please sign in to comment.