Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,10 @@ class LLVM_ABI TargetLoweringBase {
/// and conditional branches. With multiple condition registers, the code
/// generator will not aggressively sink comparisons into the blocks of their
/// users.
virtual bool hasMultipleConditionRegisters(EVT VT) const { return false; }
virtual bool hasMultipleConditionRegisters(EVT ResVT,
std::optional<EVT> CmpVT) const {
return false;
}

/// Return true if the target has BitExtract instructions.
bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
Expand Down Expand Up @@ -2493,7 +2496,7 @@ class LLVM_ABI TargetLoweringBase {
EVT VT) const {
// If a target has multiple condition registers, then it likely has logical
// operations on those registers.
if (hasMultipleConditionRegisters(VT))
if (hasMultipleConditionRegisters(VT, std::nullopt))
return false;
// Only do the transform if the value won't be split into multiple
// registers.
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1840,7 +1840,11 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
///
/// Return true if any changes are made.
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
std::optional<EVT> CmpVT;
if (Cmp->getOperand(0)->getType()->isIntegerTy())
CmpVT = EVT::getEVT(Cmp->getOperand(0)->getType());

if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType()), CmpVT))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this equivalent to just checking if the compared type is legal?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very similar, we don't account for CondCode or anything like that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't just check if type legal then? Maybe limited to scalar. I think it applies to other targets as well.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had looked at that but the regressions on other targets wasn't something I wanted to start diving into - but I'll raise a PR for reference.

return false;

// Avoid sinking soft-FP comparisons, since this can move them into a loop.
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -915,8 +915,9 @@ class AArch64TargetLowering : public TargetLowering {
return VecOp.getOpcode() == ISD::SETCC;
}

bool hasMultipleConditionRegisters(EVT VT) const override {
return VT.isScalableVector();
bool hasMultipleConditionRegisters(EVT ResVT,
std::optional<EVT> CmpVT) const override {
return ResVT.isScalableVector();
}
};

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,8 @@ class AMDGPUTargetLowering : public TargetLowering {
return MVT::i32;
}

bool hasMultipleConditionRegisters(EVT VT) const override {
bool hasMultipleConditionRegisters(EVT ResVT,
std::optional<EVT> CmpVT) const override {
// FIXME: This is only partially true. If we have to do vector compares, any
// SGPR pair can be a condition register. If we have a uniform condition, we
// are better off doing SALU operations, where there is only one SCC. For
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20116,6 +20116,7 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
}

bool PPCTargetLowering::hasMultipleConditionRegisters(EVT VT) const {
bool PPCTargetLowering::hasMultipleConditionRegisters(
EVT ResVT, std::optional<EVT> CmpVT) const {
return Subtarget.useCRBits();
}
3 changes: 2 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1194,7 +1194,8 @@ namespace llvm {
bool IsVarArg) const;
bool supportsTailCallFor(const CallBase *CB) const;

bool hasMultipleConditionRegisters(EVT VT) const override;
bool hasMultipleConditionRegisters(EVT ResVT,
std::optional<EVT> CmpVT) const override;

private:
struct ReuseLoadInfo {
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3403,6 +3403,14 @@ bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
}

bool X86TargetLowering::hasMultipleConditionRegisters(
EVT ResVT, std::optional<EVT> CmpVT) const {
if (CmpVT.has_value())
return CmpVT->isScalarInteger() &&
CmpVT->getSizeInBits() > (Subtarget.is64Bit() ? 64 : 32);
return TargetLowering::hasMultipleConditionRegisters(ResVT, CmpVT);
}

bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
bool) const {
// TODO: Allow vectors?
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1542,6 +1542,11 @@ namespace llvm {
/// supported.
bool shouldScalarizeBinop(SDValue) const override;

/// If returns true the code generator will not aggressively sink
/// comparisons into the blocks of their users.
bool hasMultipleConditionRegisters(EVT ResVT,
std::optional<EVT> CmpVT) const override;

/// Extract of a scalar FP value from index 0 of a vector is free.
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
EVT EltVT = VT.getScalarType();
Expand Down
55 changes: 25 additions & 30 deletions llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,16 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: .cfi_offset %edi, -16
; CHECK-NEXT: .cfi_offset %ebx, -12
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: Ltmp0:
; CHECK-NEXT: ## implicit-def: $ebx
; CHECK-NEXT: Ltmp0: ## EH_LABEL
; CHECK-NEXT: ## implicit-def: $edi
; CHECK-NEXT: calll __Znam
; CHECK-NEXT: Ltmp1:
; CHECK-NEXT: Ltmp1: ## EH_LABEL
; CHECK-NEXT: ## %bb.1: ## %bb11
; CHECK-NEXT: movl %eax, %esi
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: movb $1, %bl
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne LBB0_2
; CHECK-NEXT: ## %bb.7: ## %bb31
; CHECK-NEXT: ## implicit-def: $eax
Expand All @@ -53,23 +53,20 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: ## Child Loop BB0_13 Depth 2
; CHECK-NEXT: ## Child Loop BB0_16 Depth 3
; CHECK-NEXT: ## Child Loop BB0_21 Depth 2
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne LBB0_9
; CHECK-NEXT: ## %bb.10: ## %bb41
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
; CHECK-NEXT: Ltmp2:
; CHECK-NEXT: Ltmp2: ## EH_LABEL
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %esi, (%esp)
; CHECK-NEXT: calll _Pjii
; CHECK-NEXT: Ltmp3:
; CHECK-NEXT: Ltmp3: ## EH_LABEL
; CHECK-NEXT: ## %bb.11: ## %bb42
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: decl %eax
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne LBB0_18
; CHECK-NEXT: ## %bb.12: ## %bb45.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
Expand All @@ -78,8 +75,7 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1
; CHECK-NEXT: ## => This Loop Header: Depth=2
; CHECK-NEXT: ## Child Loop BB0_16 Depth 3
; CHECK-NEXT: movb $1, %cl
; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne LBB0_19
; CHECK-NEXT: ## %bb.14: ## %bb48
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2
Expand All @@ -88,14 +84,14 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: movl %esi, %edx
; CHECK-NEXT: movl %edi, %ebx
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
; CHECK-NEXT: LBB0_16: ## %bb49
; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1
; CHECK-NEXT: ## Parent Loop BB0_13 Depth=2
; CHECK-NEXT: ## => This Inner Loop Header: Depth=3
; CHECK-NEXT: incl %ecx
; CHECK-NEXT: addl $4, %edx
; CHECK-NEXT: decl %ebx
; CHECK-NEXT: decl %edi
; CHECK-NEXT: jne LBB0_16
; CHECK-NEXT: LBB0_17: ## %bb57
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2
Expand All @@ -107,13 +103,12 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, (%esp)
; CHECK-NEXT: calll ___bzero
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne LBB0_22
; CHECK-NEXT: ## %bb.20: ## %bb61.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: movl %edi, %ecx
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
; CHECK-NEXT: LBB0_21: ## %bb61
; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1
; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
Expand All @@ -126,32 +121,32 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: decl {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; CHECK-NEXT: jmp LBB0_8
; CHECK-NEXT: LBB0_18: ## %bb43
; CHECK-NEXT: Ltmp5:
; CHECK-NEXT: movl %esi, %ebx
; CHECK-NEXT: Ltmp5: ## EH_LABEL
; CHECK-NEXT: movl %esi, %edi
; CHECK-NEXT: calll _OnOverFlow
; CHECK-NEXT: Ltmp6:
; CHECK-NEXT: Ltmp6: ## EH_LABEL
; CHECK-NEXT: jmp LBB0_3
; CHECK-NEXT: LBB0_2: ## %bb29
; CHECK-NEXT: Ltmp7:
; CHECK-NEXT: movl %esi, %ebx
; CHECK-NEXT: Ltmp7: ## EH_LABEL
; CHECK-NEXT: movl %esi, %edi
; CHECK-NEXT: calll _OnOverFlow
; CHECK-NEXT: Ltmp8:
; CHECK-NEXT: Ltmp8: ## EH_LABEL
; CHECK-NEXT: LBB0_3: ## %bb30
; CHECK-NEXT: ud2
; CHECK-NEXT: LBB0_4: ## %bb20.loopexit
; CHECK-NEXT: Ltmp4:
; CHECK-NEXT: Ltmp4: ## EH_LABEL
; CHECK-NEXT: LBB0_9:
; CHECK-NEXT: movl %esi, %ebx
; CHECK-NEXT: movl %esi, %edi
; CHECK-NEXT: LBB0_6: ## %bb23
; CHECK-NEXT: testl %ebx, %ebx
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: addl $28, %esp
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl
; CHECK-NEXT: LBB0_5: ## %bb20.loopexit.split-lp
; CHECK-NEXT: Ltmp9:
; CHECK-NEXT: Ltmp9: ## EH_LABEL
; CHECK-NEXT: jmp LBB0_6
; CHECK-NEXT: Lfunc_end0:
bb:
Expand Down
99 changes: 24 additions & 75 deletions llvm/test/CodeGen/X86/pr166534.ll
Original file line number Diff line number Diff line change
@@ -1,108 +1,57 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE2
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE4
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX2
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX

define void @pr166534(ptr %pa, ptr %pb, ptr %pc, ptr %pd) {
; SSE2-LABEL: pr166534:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movq (%rdi), %rax
; SSE2-NEXT: movq 8(%rdi), %r8
; SSE2-NEXT: movdqu (%rdi), %xmm0
; SSE2-NEXT: movq (%rsi), %r9
; SSE2-NEXT: movq 8(%rsi), %rdi
; SSE2-NEXT: movdqu (%rsi), %xmm1
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; SSE2-NEXT: pmovmskb %xmm1, %esi
; SSE2-NEXT: xorl %r10d, %r10d
; SSE2-NEXT: xorl %eax, %eax
; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF
; SSE2-NEXT: sete %r10b
; SSE2-NEXT: orq %r10, (%rdx)
; SSE2-NEXT: sete %al
; SSE2-NEXT: orq %rax, (%rdx)
; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF
; SSE2-NEXT: jne .LBB0_2
; SSE2-NEXT: # %bb.1: # %if.then
; SSE2-NEXT: xorq %r9, %rax
; SSE2-NEXT: xorq %rdi, %r8
; SSE2-NEXT: xorl %edx, %edx
; SSE2-NEXT: orq %rax, %r8
; SSE2-NEXT: sete %dl
; SSE2-NEXT: orq %rdx, (%rcx)
; SSE2-NEXT: orq %rax, (%rcx)
; SSE2-NEXT: .LBB0_2: # %if.end
; SSE2-NEXT: retq
;
; SSE4-LABEL: pr166534:
; SSE4: # %bb.0: # %entry
; SSE4-NEXT: movq (%rdi), %rax
; SSE4-NEXT: movq 8(%rdi), %r8
; SSE4-NEXT: movdqu (%rdi), %xmm0
; SSE4-NEXT: movq (%rsi), %r9
; SSE4-NEXT: movq 8(%rsi), %rdi
; SSE4-NEXT: movdqu (%rsi), %xmm1
; SSE4-NEXT: pxor %xmm0, %xmm1
; SSE4-NEXT: xorl %esi, %esi
; SSE4-NEXT: xorl %eax, %eax
; SSE4-NEXT: ptest %xmm1, %xmm1
; SSE4-NEXT: sete %sil
; SSE4-NEXT: orq %rsi, (%rdx)
; SSE4-NEXT: sete %al
; SSE4-NEXT: orq %rax, (%rdx)
; SSE4-NEXT: ptest %xmm1, %xmm1
; SSE4-NEXT: jne .LBB0_2
; SSE4-NEXT: # %bb.1: # %if.then
; SSE4-NEXT: xorq %r9, %rax
; SSE4-NEXT: xorq %rdi, %r8
; SSE4-NEXT: xorl %edx, %edx
; SSE4-NEXT: orq %rax, %r8
; SSE4-NEXT: sete %dl
; SSE4-NEXT: orq %rdx, (%rcx)
; SSE4-NEXT: orq %rax, (%rcx)
; SSE4-NEXT: .LBB0_2: # %if.end
; SSE4-NEXT: retq
;
; AVX2-LABEL: pr166534:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: movq (%rdi), %rax
; AVX2-NEXT: movq 8(%rdi), %r8
; AVX2-NEXT: vmovdqu (%rdi), %xmm0
; AVX2-NEXT: movq (%rsi), %rdi
; AVX2-NEXT: vpxor (%rsi), %xmm0, %xmm0
; AVX2-NEXT: movq 8(%rsi), %rsi
; AVX2-NEXT: xorl %r9d, %r9d
; AVX2-NEXT: vptest %xmm0, %xmm0
; AVX2-NEXT: sete %r9b
; AVX2-NEXT: orq %r9, (%rdx)
; AVX2-NEXT: vptest %xmm0, %xmm0
; AVX2-NEXT: jne .LBB0_2
; AVX2-NEXT: # %bb.1: # %if.then
; AVX2-NEXT: xorq %rdi, %rax
; AVX2-NEXT: xorq %rsi, %r8
; AVX2-NEXT: xorl %edx, %edx
; AVX2-NEXT: orq %rax, %r8
; AVX2-NEXT: sete %dl
; AVX2-NEXT: orq %rdx, (%rcx)
; AVX2-NEXT: .LBB0_2: # %if.end
; AVX2-NEXT: retq
;
; AVX512-LABEL: pr166534:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: movq (%rdi), %rax
; AVX512-NEXT: movq 8(%rdi), %r8
; AVX512-NEXT: vmovdqu (%rdi), %xmm0
; AVX512-NEXT: movq (%rsi), %r9
; AVX512-NEXT: movq 8(%rsi), %rdi
; AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0
; AVX512-NEXT: xorl %esi, %esi
; AVX512-NEXT: vptest %xmm0, %xmm0
; AVX512-NEXT: sete %sil
; AVX512-NEXT: orq %rsi, (%rdx)
; AVX512-NEXT: vptest %xmm0, %xmm0
; AVX512-NEXT: jne .LBB0_2
; AVX512-NEXT: # %bb.1: # %if.then
; AVX512-NEXT: xorq %r9, %rax
; AVX512-NEXT: xorq %rdi, %r8
; AVX512-NEXT: xorl %edx, %edx
; AVX512-NEXT: orq %rax, %r8
; AVX512-NEXT: sete %dl
; AVX512-NEXT: orq %rdx, (%rcx)
; AVX512-NEXT: .LBB0_2: # %if.end
; AVX512-NEXT: retq
; AVX-LABEL: pr166534:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovdqu (%rdi), %xmm0
; AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
; AVX-NEXT: xorl %eax, %eax
; AVX-NEXT: vptest %xmm0, %xmm0
; AVX-NEXT: sete %al
; AVX-NEXT: orq %rax, (%rdx)
; AVX-NEXT: vptest %xmm0, %xmm0
; AVX-NEXT: jne .LBB0_2
; AVX-NEXT: # %bb.1: # %if.then
; AVX-NEXT: orq %rax, (%rcx)
; AVX-NEXT: .LBB0_2: # %if.end
; AVX-NEXT: retq
entry:
%a = load i128, ptr %pa, align 8
%b = load i128, ptr %pb, align 8
Expand Down