diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index b229659415d55..62ded89a7d5de 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -527,7 +527,10 @@ class LLVM_ABI TargetLoweringBase {
   /// and conditional branches. With multiple condition registers, the code
   /// generator will not aggressively sink comparisons into the blocks of their
   /// users.
-  virtual bool hasMultipleConditionRegisters(EVT VT) const { return false; }
+  virtual bool hasMultipleConditionRegisters(EVT ResVT,
+                                             std::optional<EVT> CmpVT) const {
+    return false;
+  }
 
   /// Return true if the target has BitExtract instructions.
   bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
@@ -2493,7 +2496,7 @@ class LLVM_ABI TargetLoweringBase {
                                                EVT VT) const {
     // If a target has multiple condition registers, then it likely has logical
     // operations on those registers.
-    if (hasMultipleConditionRegisters(VT))
+    if (hasMultipleConditionRegisters(VT, std::nullopt))
       return false;
     // Only do the transform if the value won't be split into multiple
     // registers.
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 0309e225d9df4..3f2c6bf18105b 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1840,7 +1840,11 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
 ///
 /// Return true if any changes are made.
 static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
-  if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
+  std::optional<EVT> CmpVT;
+  if (Cmp->getOperand(0)->getType()->isIntegerTy())
+    CmpVT = EVT::getEVT(Cmp->getOperand(0)->getType());
+
+  if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType()), CmpVT))
     return false;
 
   // Avoid sinking soft-FP comparisons, since this can move them into a loop.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 70bfae717fb76..9111c8320d698 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -915,8 +915,9 @@ class AArch64TargetLowering : public TargetLowering {
     return VecOp.getOpcode() == ISD::SETCC;
   }
 
-  bool hasMultipleConditionRegisters(EVT VT) const override {
-    return VT.isScalableVector();
+  bool hasMultipleConditionRegisters(EVT ResVT,
+                                     std::optional<EVT> CmpVT) const override {
+    return ResVT.isScalableVector();
   }
 };
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index bdaf48652d107..d49e38fae8147 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -395,7 +395,8 @@ class AMDGPUTargetLowering : public TargetLowering {
     return MVT::i32;
   }
 
-  bool hasMultipleConditionRegisters(EVT VT) const override {
+  bool hasMultipleConditionRegisters(EVT ResVT,
+                                     std::optional<EVT> CmpVT) const override {
     // FIXME: This is only partially true. If we have to do vector compares, any
     // SGPR pair can be a condition register. If we have a uniform condition, we
     // are better off doing SALU operations, where there is only one SCC. For
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 20fc849ea4aa5..9f7ce41e399d5 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -20116,6 +20116,7 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
       Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
 }
 
-bool PPCTargetLowering::hasMultipleConditionRegisters(EVT VT) const {
+bool PPCTargetLowering::hasMultipleConditionRegisters(
+    EVT ResVT, std::optional<EVT> CmpVT) const {
   return Subtarget.useCRBits();
 }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 880aca751d7d6..68ce19ac75b31 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1194,7 +1194,8 @@ namespace llvm {
                                   bool IsVarArg) const;
     bool supportsTailCallFor(const CallBase *CB) const;
 
-    bool hasMultipleConditionRegisters(EVT VT) const override;
+    bool hasMultipleConditionRegisters(EVT ResVT,
+                                       std::optional<EVT> CmpVT) const override;
 
   private:
     struct ReuseLoadInfo {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4d44227b3ecd4..d7d3fab496364 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3403,6 +3403,14 @@ bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
   return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
 }
 
+bool X86TargetLowering::hasMultipleConditionRegisters(
+    EVT ResVT, std::optional<EVT> CmpVT) const {
+  if (CmpVT.has_value())
+    return CmpVT->isScalarInteger() &&
+           CmpVT->getSizeInBits() > (Subtarget.is64Bit() ? 64 : 32);
+  return TargetLowering::hasMultipleConditionRegisters(ResVT, CmpVT);
+}
+
 bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
                                              bool) const {
   // TODO: Allow vectors?
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index b7151f65942b4..612ca589ee139 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1542,6 +1542,11 @@ namespace llvm {
     /// supported.
     bool shouldScalarizeBinop(SDValue) const override;
 
+    /// If returns true the code generator will not aggressively sink
+    /// comparisons into the blocks of their users.
+    bool hasMultipleConditionRegisters(EVT ResVT,
+                                       std::optional<EVT> CmpVT) const override;
+
     /// Extract of a scalar FP value from index 0 of a vector is free.
     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
       EVT EltVT = VT.getScalarType();
diff --git a/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
index 1962ddebc2115..5a5feaa7734e9 100644
--- a/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
+++ b/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
@@ -34,16 +34,16 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
 ; CHECK-NEXT:    .cfi_offset %edi, -16
 ; CHECK-NEXT:    .cfi_offset %ebx, -12
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:  Ltmp0:
-; CHECK-NEXT:    ## implicit-def: $ebx
+; CHECK-NEXT:  Ltmp0: ## EH_LABEL
+; CHECK-NEXT:    ## implicit-def: $edi
 ; CHECK-NEXT:    calll __Znam
-; CHECK-NEXT:  Ltmp1:
+; CHECK-NEXT:  Ltmp1: ## EH_LABEL
 ; CHECK-NEXT:  ## %bb.1: ## %bb11
 ; CHECK-NEXT:    movl %eax, %esi
-; CHECK-NEXT:    movb $1, %al
-; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    movb $1, %bl
+; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    jne LBB0_2
 ; CHECK-NEXT:  ## %bb.7: ## %bb31
 ; CHECK-NEXT:    ## implicit-def: $eax
@@ -53,23 +53,20 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
 ; CHECK-NEXT:    ## Child Loop BB0_13 Depth 2
 ; CHECK-NEXT:    ## Child Loop BB0_16 Depth 3
 ; CHECK-NEXT:    ## Child Loop BB0_21 Depth 2
-; CHECK-NEXT:    movb $1, %al
-; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    jne LBB0_9
 ; CHECK-NEXT:  ## %bb.10: ## %bb41
 ; CHECK-NEXT:    ## in Loop: Header=BB0_8 Depth=1
-; CHECK-NEXT:  Ltmp2:
+; CHECK-NEXT:  Ltmp2: ## EH_LABEL
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl %esi, (%esp)
 ; CHECK-NEXT:    calll _Pjii
-; CHECK-NEXT:  Ltmp3:
+; CHECK-NEXT:  Ltmp3: ## EH_LABEL
 ; CHECK-NEXT:  ## %bb.11: ## %bb42
 ; CHECK-NEXT:    ## in Loop: Header=BB0_8 Depth=1
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    decl %eax
-; CHECK-NEXT:    testl %eax, %eax
+; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    jne LBB0_18
 ; CHECK-NEXT:  ## %bb.12: ## %bb45.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_8 Depth=1
@@ -78,8 +75,7 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
 ; CHECK-NEXT:    ## Parent Loop BB0_8 Depth=1
 ; CHECK-NEXT:    ## => This Loop Header: Depth=2
 ; CHECK-NEXT:    ## Child Loop BB0_16 Depth 3
-; CHECK-NEXT:    movb $1, %cl
-; CHECK-NEXT:    testb %cl, %cl
+; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    jne LBB0_19
 ; CHECK-NEXT:  ## %bb.14: ## %bb48
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=2
@@ -88,14 +84,14 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=2
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    movl %esi, %edx
-; CHECK-NEXT:    movl %edi, %ebx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
 ; CHECK-NEXT:  LBB0_16: ## %bb49
 ; CHECK-NEXT:    ## Parent Loop BB0_8 Depth=1
 ; CHECK-NEXT:    ## Parent Loop BB0_13 Depth=2
 ; CHECK-NEXT:    ## => This Inner Loop Header: Depth=3
 ; CHECK-NEXT:    incl %ecx
 ; CHECK-NEXT:    addl $4, %edx
-; CHECK-NEXT:    decl %ebx
+; CHECK-NEXT:    decl %edi
 ; CHECK-NEXT:    jne LBB0_16
 ; CHECK-NEXT:  LBB0_17: ## %bb57
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=2
@@ -107,13 +103,12 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
 ; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl $0, (%esp)
 ; CHECK-NEXT:    calll ___bzero
-; CHECK-NEXT:    movb $1, %al
-; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    jne LBB0_22
 ; CHECK-NEXT:  ## %bb.20: ## %bb61.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_8 Depth=1
 ; CHECK-NEXT:    movl %esi, %eax
-; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
 ; CHECK-NEXT:  LBB0_21: ## %bb61
 ; CHECK-NEXT:    ## Parent Loop BB0_8 Depth=1
 ; CHECK-NEXT:    ## => This Inner Loop Header: Depth=2
@@ -126,24 +121,24 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
 ; CHECK-NEXT:    decl {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
 ; CHECK-NEXT:    jmp LBB0_8
 ; CHECK-NEXT:  LBB0_18: ## %bb43
-; CHECK-NEXT:  Ltmp5:
-; CHECK-NEXT:    movl %esi, %ebx
+; CHECK-NEXT:  Ltmp5: ## EH_LABEL
+; CHECK-NEXT:    movl %esi, %edi
 ; CHECK-NEXT:    calll _OnOverFlow
-; CHECK-NEXT:  Ltmp6:
+; CHECK-NEXT:  Ltmp6: ## EH_LABEL
 ; CHECK-NEXT:    jmp LBB0_3
 ; CHECK-NEXT:  LBB0_2: ## %bb29
-; CHECK-NEXT:  Ltmp7:
-; CHECK-NEXT:    movl %esi, %ebx
+; CHECK-NEXT:  Ltmp7: ## EH_LABEL
+; CHECK-NEXT:    movl %esi, %edi
 ; CHECK-NEXT:    calll _OnOverFlow
-; CHECK-NEXT:  Ltmp8:
+; CHECK-NEXT:  Ltmp8: ## EH_LABEL
 ; CHECK-NEXT:  LBB0_3: ## %bb30
 ; CHECK-NEXT:    ud2
 ; CHECK-NEXT:  LBB0_4: ## %bb20.loopexit
-; CHECK-NEXT:  Ltmp4:
+; CHECK-NEXT:  Ltmp4: ## EH_LABEL
 ; CHECK-NEXT:  LBB0_9:
-; CHECK-NEXT:    movl %esi, %ebx
+; CHECK-NEXT:    movl %esi, %edi
 ; CHECK-NEXT:  LBB0_6: ## %bb23
-; CHECK-NEXT:    testl %ebx, %ebx
+; CHECK-NEXT:    testl %edi, %edi
 ; CHECK-NEXT:    addl $28, %esp
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    popl %edi
@@ -151,7 +146,7 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    retl
 ; CHECK-NEXT:  LBB0_5: ## %bb20.loopexit.split-lp
-; CHECK-NEXT:  Ltmp9:
+; CHECK-NEXT:  Ltmp9: ## EH_LABEL
 ; CHECK-NEXT:    jmp LBB0_6
 ; CHECK-NEXT:  Lfunc_end0:
 bb:
diff --git a/llvm/test/CodeGen/X86/pr166534.ll b/llvm/test/CodeGen/X86/pr166534.ll
index aef44cc3e40d0..a311acb4c0409 100644
--- a/llvm/test/CodeGen/X86/pr166534.ll
+++ b/llvm/test/CodeGen/X86/pr166534.ll
@@ -1,108 +1,57 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64    | FileCheck %s --check-prefixes=SSE2
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE4
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX2
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX
 
 define void @pr166534(ptr %pa, ptr %pb, ptr %pc, ptr %pd) {
 ; SSE2-LABEL: pr166534:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movq (%rdi), %rax
-; SSE2-NEXT:    movq 8(%rdi), %r8
 ; SSE2-NEXT:    movdqu (%rdi), %xmm0
-; SSE2-NEXT:    movq (%rsi), %r9
-; SSE2-NEXT:    movq 8(%rsi), %rdi
 ; SSE2-NEXT:    movdqu (%rsi), %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
 ; SSE2-NEXT:    pmovmskb %xmm1, %esi
-; SSE2-NEXT:    xorl %r10d, %r10d
+; SSE2-NEXT:    xorl %eax, %eax
 ; SSE2-NEXT:    cmpl $65535, %esi # imm = 0xFFFF
-; SSE2-NEXT:    sete %r10b
-; SSE2-NEXT:    orq %r10, (%rdx)
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    orq %rax, (%rdx)
 ; SSE2-NEXT:    cmpl $65535, %esi # imm = 0xFFFF
 ; SSE2-NEXT:    jne .LBB0_2
 ; SSE2-NEXT:  # %bb.1: # %if.then
-; SSE2-NEXT:    xorq %r9, %rax
-; SSE2-NEXT:    xorq %rdi, %r8
-; SSE2-NEXT:    xorl %edx, %edx
-; SSE2-NEXT:    orq %rax, %r8
-; SSE2-NEXT:    sete %dl
-; SSE2-NEXT:    orq %rdx, (%rcx)
+; SSE2-NEXT:    orq %rax, (%rcx)
 ; SSE2-NEXT:  .LBB0_2: # %if.end
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: pr166534:
 ; SSE4:       # %bb.0: # %entry
-; SSE4-NEXT:    movq (%rdi), %rax
-; SSE4-NEXT:    movq 8(%rdi), %r8
 ; SSE4-NEXT:    movdqu (%rdi), %xmm0
-; SSE4-NEXT:    movq (%rsi), %r9
-; SSE4-NEXT:    movq 8(%rsi), %rdi
 ; SSE4-NEXT:    movdqu (%rsi), %xmm1
 ; SSE4-NEXT:    pxor %xmm0, %xmm1
-; SSE4-NEXT:    xorl %esi, %esi
+; SSE4-NEXT:    xorl %eax, %eax
 ; SSE4-NEXT:    ptest %xmm1, %xmm1
-; SSE4-NEXT:    sete %sil
-; SSE4-NEXT:    orq %rsi, (%rdx)
+; SSE4-NEXT:    sete %al
+; SSE4-NEXT:    orq %rax, (%rdx)
 ; SSE4-NEXT:    ptest %xmm1, %xmm1
 ; SSE4-NEXT:    jne .LBB0_2
 ; SSE4-NEXT:  # %bb.1: # %if.then
-; SSE4-NEXT:    xorq %r9, %rax
-; SSE4-NEXT:    xorq %rdi, %r8
-; SSE4-NEXT:    xorl %edx, %edx
-; SSE4-NEXT:    orq %rax, %r8
-; SSE4-NEXT:    sete %dl
-; SSE4-NEXT:    orq %rdx, (%rcx)
+; SSE4-NEXT:    orq %rax, (%rcx)
 ; SSE4-NEXT:  .LBB0_2: # %if.end
 ; SSE4-NEXT:    retq
 ;
-; AVX2-LABEL: pr166534:
-; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    movq (%rdi), %rax
-; AVX2-NEXT:    movq 8(%rdi), %r8
-; AVX2-NEXT:    vmovdqu (%rdi), %xmm0
-; AVX2-NEXT:    movq (%rsi), %rdi
-; AVX2-NEXT:    vpxor (%rsi), %xmm0, %xmm0
-; AVX2-NEXT:    movq 8(%rsi), %rsi
-; AVX2-NEXT:    xorl %r9d, %r9d
-; AVX2-NEXT:    vptest %xmm0, %xmm0
-; AVX2-NEXT:    sete %r9b
-; AVX2-NEXT:    orq %r9, (%rdx)
-; AVX2-NEXT:    vptest %xmm0, %xmm0
-; AVX2-NEXT:    jne .LBB0_2
-; AVX2-NEXT:  # %bb.1: # %if.then
-; AVX2-NEXT:    xorq %rdi, %rax
-; AVX2-NEXT:    xorq %rsi, %r8
-; AVX2-NEXT:    xorl %edx, %edx
-; AVX2-NEXT:    orq %rax, %r8
-; AVX2-NEXT:    sete %dl
-; AVX2-NEXT:    orq %rdx, (%rcx)
-; AVX2-NEXT:  .LBB0_2: # %if.end
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: pr166534:
-; AVX512:       # %bb.0: # %entry
-; AVX512-NEXT:    movq (%rdi), %rax
-; AVX512-NEXT:    movq 8(%rdi), %r8
-; AVX512-NEXT:    vmovdqu (%rdi), %xmm0
-; AVX512-NEXT:    movq (%rsi), %r9
-; AVX512-NEXT:    movq 8(%rsi), %rdi
-; AVX512-NEXT:    vpxor (%rsi), %xmm0, %xmm0
-; AVX512-NEXT:    xorl %esi, %esi
-; AVX512-NEXT:    vptest %xmm0, %xmm0
-; AVX512-NEXT:    sete %sil
-; AVX512-NEXT:    orq %rsi, (%rdx)
-; AVX512-NEXT:    vptest %xmm0, %xmm0
-; AVX512-NEXT:    jne .LBB0_2
-; AVX512-NEXT:  # %bb.1: # %if.then
-; AVX512-NEXT:    xorq %r9, %rax
-; AVX512-NEXT:    xorq %rdi, %r8
-; AVX512-NEXT:    xorl %edx, %edx
-; AVX512-NEXT:    orq %rax, %r8
-; AVX512-NEXT:    sete %dl
-; AVX512-NEXT:    orq %rdx, (%rcx)
-; AVX512-NEXT:  .LBB0_2: # %if.end
-; AVX512-NEXT:    retq
+; AVX-LABEL: pr166534:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vmovdqu (%rdi), %xmm0
+; AVX-NEXT:    vpxor (%rsi), %xmm0, %xmm0
+; AVX-NEXT:    xorl %eax, %eax
+; AVX-NEXT:    vptest %xmm0, %xmm0
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    orq %rax, (%rdx)
+; AVX-NEXT:    vptest %xmm0, %xmm0
+; AVX-NEXT:    jne .LBB0_2
+; AVX-NEXT:  # %bb.1: # %if.then
+; AVX-NEXT:    orq %rax, (%rcx)
+; AVX-NEXT:  .LBB0_2: # %if.end
+; AVX-NEXT:    retq
 entry:
   %a = load i128, ptr %pa, align 8
   %b = load i128, ptr %pb, align 8