Skip to content

Commit

Permalink
Merging r324576:
Browse files Browse the repository at this point in the history
------------------------------------------------------------------------
r324576 | ctopper | 2018-02-08 08:45:55 +0100 (Thu, 08 Feb 2018) | 20 lines

[X86] Don't emit KTEST instructions unless only the Z flag is being used

Summary:
KTEST has weird flag behavior. The Z flag is set for all bits in the AND of the k-registers being 0, and the C flag is set for all bits being 1. All other flags are cleared.

We currently emit this instruction in EmitTEST and don't check the condition code. This can lead to strange things like using the S flag after a KTEST for a signed compare.

The domain reassignment pass can also transform TEST instructions into KTEST and is not protected against the flag usage either. For now I've disabled this part of the domain reassignment pass. I tried to comment out the checks in the mir test so that we could recover them later, but I couldn't figure out how to get that to work.

This patch moves the KTEST handling into LowerSETCC and now creates a ktest+x86setcc. I've chosen this approach because I'd like to add support for the C flag for all ones in a followup patch. To do that requires that I can rewrite the condition code going in the x86setcc to be different than the original SETCC condition code.

This fixes PR36182. I'll file a PR to fix domain reassignment once this goes in. Should this be merged to 6.0?

Reviewers: spatel, guyblank, RKSimon, zvi

Reviewed By: guyblank

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D42770
------------------------------------------------------------------------

llvm-svn: 325106
  • Loading branch information
zmodem committed Feb 14, 2018
1 parent c1957e6 commit 34684f1
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 57 deletions.
12 changes: 8 additions & 4 deletions llvm/lib/Target/X86/X86DomainReassignment.cpp
Expand Up @@ -663,8 +663,10 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::XOR32rr, X86::KXORDrr);
createReplacer(X86::XOR64rr, X86::KXORQrr);

createReplacer(X86::TEST32rr, X86::KTESTDrr);
createReplacer(X86::TEST64rr, X86::KTESTQrr);
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
// to prove only Z flag is used.
//createReplacer(X86::TEST32rr, X86::KTESTDrr);
//createReplacer(X86::TEST64rr, X86::KTESTQrr);
}

if (STI->hasDQI()) {
Expand All @@ -684,8 +686,10 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::SHR8ri, X86::KSHIFTRBri);
createReplacer(X86::SHL8ri, X86::KSHIFTLBri);

createReplacer(X86::TEST8rr, X86::KTESTBrr);
createReplacer(X86::TEST16rr, X86::KTESTWrr);
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
// to prove only Z flag is used.
//createReplacer(X86::TEST8rr, X86::KTESTBrr);
//createReplacer(X86::TEST16rr, X86::KTESTWrr);

createReplacer(X86::XOR8rr, X86::KXORBrr);
}
Expand Down
57 changes: 32 additions & 25 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -17017,24 +17017,6 @@ static bool hasNonFlagsUse(SDValue Op) {
return false;
}

// Emit KTEST instruction for bit vectors on AVX-512
static SDValue EmitKTEST(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (Op.getOpcode() == ISD::BITCAST) {
auto hasKTEST = [&](MVT VT) {
unsigned SizeInBits = VT.getSizeInBits();
return (Subtarget.hasDQI() && (SizeInBits == 8 || SizeInBits == 16)) ||
(Subtarget.hasBWI() && (SizeInBits == 32 || SizeInBits == 64));
};
SDValue Op0 = Op.getOperand(0);
MVT Op0VT = Op0.getValueType().getSimpleVT();
if (Op0VT.isVector() && Op0VT.getVectorElementType() == MVT::i1 &&
hasKTEST(Op0VT))
return DAG.getNode(X86ISD::KTEST, SDLoc(Op), Op0VT, Op0, Op0);
}
return SDValue();
}

/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent.
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
Expand Down Expand Up @@ -17079,9 +17061,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
// doing a separate TEST. TEST always sets OF and CF to 0, so unless
// we prove that the arithmetic won't overflow, we can't use OF or CF.
if (Op.getResNo() != 0 || NeedOF || NeedCF) {
// Emit KTEST for bit vectors
if (auto Node = EmitKTEST(Op, DAG, Subtarget))
return Node;
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, dl, Op.getValueType()));
Expand Down Expand Up @@ -17310,10 +17289,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
}

if (Opcode == 0) {
// Emit KTEST for bit vectors
if (auto Node = EmitKTEST(Op, DAG, Subtarget))
return Node;

// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, dl, Op.getValueType()));
Expand Down Expand Up @@ -18093,6 +18068,34 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
return Result;
}

// Try to select this as a KTEST+SETCC if possible.
static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// Only support equality comparisons.
if (CC != ISD::SETEQ && CC != ISD::SETNE)
return SDValue();

// Must be a bitcast from vXi1.
if (Op0.getOpcode() != ISD::BITCAST)
return SDValue();

Op0 = Op0.getOperand(0);
MVT VT = Op0.getSimpleValueType();
if (!(Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) &&
!(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)))
return SDValue();

X86::CondCode X86CC;
if (isNullConstant(Op1)) {
X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
} else
return SDValue();

SDValue KTEST = DAG.getNode(X86ISD::KTEST, dl, MVT::i32, Op0, Op0);
return getSETCC(X86CC, KTEST, dl, DAG);
}

SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {

MVT VT = Op.getSimpleValueType();
Expand All @@ -18115,6 +18118,10 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return NewSetCC;
}

// Try to lower using KTEST.
if (SDValue NewSetCC = EmitKTEST(Op0, Op1, CC, dl, DAG, Subtarget))
return NewSetCC;

// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
// these.
if ((isOneConstant(Op1) || isNullConstant(Op1)) &&
Expand Down
96 changes: 96 additions & 0 deletions llvm/test/CodeGen/X86/avx512-mask-op.ll
Expand Up @@ -2775,3 +2775,99 @@ define i8 @test_v8i1_mul(i8 %x, i8 %y) {
%ret = bitcast <8 x i1> %m2 to i8
ret i8 %ret
}

; Make sure we don't emit a ktest for signed comparisons.
define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
; KNL-LABEL: ktest_signed:
; KNL: ## %bb.0:
; KNL-NEXT: pushq %rax
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testw %ax, %ax
; KNL-NEXT: jle LBB63_1
; KNL-NEXT: ## %bb.2: ## %bb.2
; KNL-NEXT: popq %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB63_1: ## %bb.1
; KNL-NEXT: vzeroupper
; KNL-NEXT: callq _foo
; KNL-NEXT: popq %rax
; KNL-NEXT: retq
;
; SKX-LABEL: ktest_signed:
; SKX: ## %bb.0:
; SKX-NEXT: pushq %rax
; SKX-NEXT: .cfi_def_cfa_offset 16
; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; SKX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: testw %ax, %ax
; SKX-NEXT: jle LBB63_1
; SKX-NEXT: ## %bb.2: ## %bb.2
; SKX-NEXT: popq %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: LBB63_1: ## %bb.1
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _foo
; SKX-NEXT: popq %rax
; SKX-NEXT: retq
;
; AVX512BW-LABEL: ktest_signed:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: pushq %rax
; AVX512BW-NEXT: .cfi_def_cfa_offset 16
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: testw %ax, %ax
; AVX512BW-NEXT: jle LBB63_1
; AVX512BW-NEXT: ## %bb.2: ## %bb.2
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB63_1: ## %bb.1
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: callq _foo
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: ktest_signed:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: pushq %rax
; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: testw %ax, %ax
; AVX512DQ-NEXT: jle LBB63_1
; AVX512DQ-NEXT: ## %bb.2: ## %bb.2
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
; AVX512DQ-NEXT: LBB63_1: ## %bb.1
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: callq _foo
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: retq
%a = icmp eq <16 x i32> %x, zeroinitializer
%b = icmp eq <16 x i32> %y, zeroinitializer
%c = and <16 x i1> %a, %b
%d = bitcast <16 x i1> %c to i16
%e = icmp sgt i16 %d, 0
br i1 %e, label %bb.2, label %bb.1
bb.1:
call void @foo()
br label %bb.2
bb.2:
ret void
}
declare void @foo()

48 changes: 20 additions & 28 deletions llvm/test/CodeGen/X86/domain-reassignment.mir
Expand Up @@ -256,7 +256,7 @@ constants:
body: |
; CHECK-LABEL: name: test_8bitops
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: %rdi, %zmm0, %zmm1, %zmm2, %zmm3
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
Expand All @@ -277,9 +277,6 @@ body: |
; CHECK: [[COPY8:%[0-9]+]]:vk8wm = COPY [[COPY7]]
; CHECK: [[VMOVAPDZrrk:%[0-9]+]]:vr512 = VMOVAPDZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
; CHECK: VMOVAPDZmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVAPDZrrk]]
; CHECK: KTESTBrr [[KADDBrr]], [[KADDBrr]], implicit-def %eflags
; CHECK: JE_1 %bb.1, implicit %eflags
; CHECK: JMP_1 %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: bb.2:
Expand Down Expand Up @@ -311,9 +308,10 @@ body: |
%11 = VMOVAPDZrrk %2, killed %10, %1
VMOVAPDZmr %0, 1, %noreg, 0, %noreg, killed %11
TEST8rr %18, %18, implicit-def %eflags
JE_1 %bb.1, implicit %eflags
JMP_1 %bb.2
; FIXME We can't replace TEST with KTEST due to flag differences
; TEST8rr %18, %18, implicit-def %eflags
; JE_1 %bb.1, implicit %eflags
; JMP_1 %bb.2
bb.1:
Expand Down Expand Up @@ -377,7 +375,7 @@ constants:
body: |
; CHECK-LABEL: name: test_16bitops
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: %rdi, %zmm0, %zmm1, %zmm2, %zmm3
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
Expand All @@ -397,9 +395,6 @@ body: |
; CHECK: [[COPY8:%[0-9]+]]:vk16wm = COPY [[COPY7]]
; CHECK: [[VMOVAPSZrrk:%[0-9]+]]:vr512 = VMOVAPSZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
; CHECK: VMOVAPSZmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVAPSZrrk]]
; CHECK: KTESTWrr [[KXORWrr]], [[KXORWrr]], implicit-def %eflags
; CHECK: JE_1 %bb.1, implicit %eflags
; CHECK: JMP_1 %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: bb.2:
Expand Down Expand Up @@ -430,9 +425,10 @@ body: |
%11 = VMOVAPSZrrk %2, killed %10, %1
VMOVAPSZmr %0, 1, %noreg, 0, %noreg, killed %11
TEST16rr %17, %17, implicit-def %eflags
JE_1 %bb.1, implicit %eflags
JMP_1 %bb.2
; FIXME We can't replace TEST with KTEST due to flag differences
; TEST16rr %17, %17, implicit-def %eflags
; JE_1 %bb.1, implicit %eflags
; JMP_1 %bb.2
bb.1:
Expand Down Expand Up @@ -490,7 +486,7 @@ constants:
body: |
; CHECK-LABEL: name: test_32bitops
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: %rdi, %zmm0, %zmm1
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
Expand All @@ -507,9 +503,6 @@ body: |
; CHECK: [[COPY3:%[0-9]+]]:vk32wm = COPY [[KADDDrr]]
; CHECK: [[VMOVDQU16Zrrk:%[0-9]+]]:vr512 = VMOVDQU16Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
; CHECK: VMOVDQA32Zmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVDQU16Zrrk]]
; CHECK: KTESTDrr [[KADDDrr]], [[KADDDrr]], implicit-def %eflags
; CHECK: JE_1 %bb.1, implicit %eflags
; CHECK: JMP_1 %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: bb.2:
Expand All @@ -535,9 +528,10 @@ body: |
%4 = VMOVDQU16Zrrk %2, killed %3, %1
VMOVDQA32Zmr %0, 1, %noreg, 0, %noreg, killed %4
TEST32rr %13, %13, implicit-def %eflags
JE_1 %bb.1, implicit %eflags
JMP_1 %bb.2
; FIXME We can't replace TEST with KTEST due to flag differences
; TEST32rr %13, %13, implicit-def %eflags
; JE_1 %bb.1, implicit %eflags
; JMP_1 %bb.2
bb.1:
Expand Down Expand Up @@ -595,7 +589,7 @@ constants:
body: |
; CHECK-LABEL: name: test_64bitops
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: %rdi, %zmm0, %zmm1
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
Expand All @@ -612,9 +606,6 @@ body: |
; CHECK: [[COPY3:%[0-9]+]]:vk64wm = COPY [[KADDQrr]]
; CHECK: [[VMOVDQU8Zrrk:%[0-9]+]]:vr512 = VMOVDQU8Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
; CHECK: VMOVDQA32Zmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVDQU8Zrrk]]
; CHECK: KTESTQrr [[KADDQrr]], [[KADDQrr]], implicit-def %eflags
; CHECK: JE_1 %bb.1, implicit %eflags
; CHECK: JMP_1 %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: bb.2:
Expand All @@ -640,9 +631,10 @@ body: |
%4 = VMOVDQU8Zrrk %2, killed %3, %1
VMOVDQA32Zmr %0, 1, %noreg, 0, %noreg, killed %4
TEST64rr %13, %13, implicit-def %eflags
JE_1 %bb.1, implicit %eflags
JMP_1 %bb.2
; FIXME We can't replace TEST with KTEST due to flag differences
; TEST64rr %13, %13, implicit-def %eflags
; JE_1 %bb.1, implicit %eflags
; JMP_1 %bb.2
bb.1:
Expand Down

0 comments on commit 34684f1

Please sign in to comment.