Skip to content

Commit

Permalink
Fix bug 30945- [AVX512] Failure to flip vector comparison to remove n…
Browse files Browse the repository at this point in the history
…ot mask instruction

adding new optimization opportunity by adding new X86ISelLowering pattern. The test case was shown in https://llvm.org/bugs/show_bug.cgi?id=30945.

Test explanation:
Select gets three arguments mask, op and op2. In this case, the Mask is a result of ICMP. The ICMP instruction compares (with equal operand) the zero initializer vector and the result of the first ICMP.

In general, The result of "cmp eq, op1, zero initializers" is "not(op1)" where op1 is a mask. By rearranging of the two arguments inside the Select instruction, we can get the same result. Without the necessary of the middle phase ("cmp eq, op1, zero initializers").

Missed optimization opportunity: 
vpcmpled %zmm0, %zmm1, %k0
knotw %k0, %k1

can be combine to 
vpcmpgtd %zmm0, %zmm2, %k1

Reviewers: 
1. delena
2. igorb 

Commited after check all 
Differential Revision: https://reviews.llvm.org/D27160

llvm-svn: 289653
  • Loading branch information
Michael Zuckerman authored and Michael Zuckerman committed Dec 14, 2016
1 parent ebe5819 commit 1ce2a23
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 11 deletions.
17 changes: 14 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -28231,7 +28231,8 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,

/// If a vector select has an operand that is -1 or 0, simplify the select to a
/// bitwise logic operation.
static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG) {
static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
Expand All @@ -28243,6 +28244,16 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG) {
if (N->getOpcode() != ISD::VSELECT)
return SDValue();

bool FValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
// Check if the first operand is all zeros.This situation only
// applies to avx512.
if (FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse()) {
//Invert the cond to not(cond) : xor(op,allones)=not(op)
SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, DL, Cond.getValueType()));
//Vselect cond, op1, op2 = Vselect not(cond), op2, op1
return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS);
}
assert(CondVT.isVector() && "Vector select expects a vector selector!");

// To use the condition operand as a bitwise mask, it must have elements that
Expand All @@ -28254,7 +28265,7 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG) {
return SDValue();

bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());

// Try to invert the condition if true value is not all 1s and false value is
// not all 0s.
Expand Down Expand Up @@ -28736,7 +28747,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}
}

if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG))
if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget))
return V;

// If this is a *dynamic* select (non-constant condition) and we can match
Expand Down
14 changes: 6 additions & 8 deletions llvm/test/CodeGen/X86/avx512-vec-cmp.ll
Expand Up @@ -658,10 +658,9 @@ define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
; CHECK-LABEL: test14:
; CHECK: ## BB#0:
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1
; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0
; CHECK-NEXT: knotw %k0, %k1
; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2
; CHECK-NEXT: vpcmpgtd %zmm0, %zmm2, %k1
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%sub_r = sub <16 x i32> %a, %b
%cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
Expand All @@ -674,10 +673,9 @@ define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
; CHECK-LABEL: test15:
; CHECK: ## BB#0:
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1
; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; CHECK-NEXT: knotw %k0, %k1
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2
; CHECK-NEXT: vpcmpgtq %zmm0, %zmm2, %k1
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%sub_r = sub <8 x i64> %a, %b
%cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
Expand Down

0 comments on commit 1ce2a23

Please sign in to comment.