Skip to content

Commit

Permalink
[X86] Add post-isel peephole to fold KAND+KORTEST into KTEST if only …
Browse files Browse the repository at this point in the history
…the zero flag is used.

Doing this late so we will prefer to fold the AND into a masked comparison first. That can be better for the live range of the mask register.

Differential Revision: https://reviews.llvm.org/D56246

llvm-svn: 350374
  • Loading branch information
topperc committed Jan 4, 2019
1 parent 26ce9c3 commit 6265a15
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 46 deletions.
35 changes: 35 additions & 0 deletions llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
Expand Up @@ -948,6 +948,41 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
}
}

// Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
// used. We're doing this late so we can prefer to fold the AND into masked
// comparisons. Doing that can be better for the live range of the mask
// register.
if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
N->getOperand(0) == N->getOperand(1) &&
N->isOnlyUserOf(N->getOperand(0).getNode()) &&
N->getOperand(0).isMachineOpcode() &&
onlyUsesZeroFlag(SDValue(N, 0))) {
SDValue And = N->getOperand(0);
unsigned N0Opc = And.getMachineOpcode();
// KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
// KAND instructions and KTEST use the same ISA feature.
if (N0Opc == X86::KANDBrr ||
(N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
unsigned NewOpc;
switch (Opc) {
default: llvm_unreachable("Unexpected opcode!");
case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
}
MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
MVT::i32,
And.getOperand(0),
And.getOperand(1));
ReplaceUses(N, KTest);
MadeChange = true;
continue;
}
}

// Attempt to remove vectors moves that were inserted to zero upper bits.
if (Opc != TargetOpcode::SUBREG_TO_REG)
continue;
Expand Down
147 changes: 101 additions & 46 deletions llvm/test/CodeGen/X86/avx512-mask-op.ll
Expand Up @@ -3519,8 +3519,7 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1
; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k2
; SKX-NEXT: korb %k2, %k1, %k1
; SKX-NEXT: kandb %k1, %k0, %k0
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: ktestb %k1, %k0
; SKX-NEXT: je LBB71_1
; SKX-NEXT: ## %bb.2: ## %exit
; SKX-NEXT: popq %rax
Expand Down Expand Up @@ -3574,8 +3573,7 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k3
; AVX512DQ-NEXT: korb %k1, %k0, %k0
; AVX512DQ-NEXT: korb %k3, %k2, %k1
; AVX512DQ-NEXT: kandb %k1, %k0, %k0
; AVX512DQ-NEXT: kortestb %k0, %k0
; AVX512DQ-NEXT: ktestb %k1, %k0
; AVX512DQ-NEXT: je LBB71_1
; AVX512DQ-NEXT: ## %bb.2: ## %exit
; AVX512DQ-NEXT: popq %rax
Expand All @@ -3597,8 +3595,7 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; X86-NEXT: vptestnmd %ymm2, %ymm2, %k1
; X86-NEXT: vptestnmd %ymm3, %ymm3, %k2
; X86-NEXT: korb %k2, %k1, %k1
; X86-NEXT: kandb %k1, %k0, %k0
; X86-NEXT: kortestb %k0, %k0
; X86-NEXT: ktestb %k1, %k0
; X86-NEXT: je LBB71_1
; X86-NEXT: ## %bb.2: ## %exit
; X86-NEXT: addl $12, %esp
Expand Down Expand Up @@ -3663,8 +3660,7 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1
; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k2
; SKX-NEXT: korb %k2, %k1, %k1
; SKX-NEXT: kandb %k1, %k0, %k0
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: ktestb %k1, %k0
; SKX-NEXT: je LBB72_1
; SKX-NEXT: ## %bb.2: ## %exit
; SKX-NEXT: popq %rax
Expand Down Expand Up @@ -3710,8 +3706,7 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
; AVX512DQ-NEXT: vptestnmq %zmm2, %zmm2, %k1
; AVX512DQ-NEXT: vptestnmq %zmm3, %zmm3, %k2
; AVX512DQ-NEXT: korb %k2, %k1, %k1
; AVX512DQ-NEXT: kandb %k1, %k0, %k0
; AVX512DQ-NEXT: kortestb %k0, %k0
; AVX512DQ-NEXT: ktestb %k1, %k0
; AVX512DQ-NEXT: je LBB72_1
; AVX512DQ-NEXT: ## %bb.2: ## %exit
; AVX512DQ-NEXT: popq %rax
Expand All @@ -3733,8 +3728,7 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
; X86-NEXT: vptestnmq %zmm2, %zmm2, %k1
; X86-NEXT: vptestnmq %zmm3, %zmm3, %k2
; X86-NEXT: korb %k2, %k1, %k1
; X86-NEXT: kandb %k1, %k0, %k0
; X86-NEXT: kortestb %k0, %k0
; X86-NEXT: ktestb %k1, %k0
; X86-NEXT: je LBB72_1
; X86-NEXT: ## %bb.2: ## %exit
; X86-NEXT: addl $12, %esp
Expand Down Expand Up @@ -3765,28 +3759,95 @@ exit:
}

define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) {
; CHECK-LABEL: ktest_5:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
; CHECK-NEXT: vptestnmd %zmm3, %zmm3, %k2
; CHECK-NEXT: korw %k2, %k1, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: kortestw %k0, %k0
; CHECK-NEXT: je LBB73_1
; CHECK-NEXT: ## %bb.2: ## %exit
; CHECK-NEXT: popq %rax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
; CHECK-NEXT: LBB73_1: ## %bar
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq _foo
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
; KNL-LABEL: ktest_5:
; KNL: ## %bb.0:
; KNL-NEXT: pushq %rax
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k1
; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k2
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kortestw %k0, %k0
; KNL-NEXT: je LBB73_1
; KNL-NEXT: ## %bb.2: ## %exit
; KNL-NEXT: popq %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB73_1: ## %bar
; KNL-NEXT: vzeroupper
; KNL-NEXT: callq _foo
; KNL-NEXT: popq %rax
; KNL-NEXT: retq
;
; SKX-LABEL: ktest_5:
; SKX: ## %bb.0:
; SKX-NEXT: pushq %rax
; SKX-NEXT: .cfi_def_cfa_offset 16
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1
; SKX-NEXT: korw %k1, %k0, %k0
; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1
; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k2
; SKX-NEXT: korw %k2, %k1, %k1
; SKX-NEXT: ktestw %k1, %k0
; SKX-NEXT: je LBB73_1
; SKX-NEXT: ## %bb.2: ## %exit
; SKX-NEXT: popq %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: LBB73_1: ## %bar
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _foo
; SKX-NEXT: popq %rax
; SKX-NEXT: retq
;
; AVX512BW-LABEL: ktest_5:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: pushq %rax
; AVX512BW-NEXT: .cfi_def_cfa_offset 16
; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1
; AVX512BW-NEXT: korw %k1, %k0, %k0
; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k1
; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k2
; AVX512BW-NEXT: korw %k2, %k1, %k1
; AVX512BW-NEXT: kandw %k1, %k0, %k0
; AVX512BW-NEXT: kortestw %k0, %k0
; AVX512BW-NEXT: je LBB73_1
; AVX512BW-NEXT: ## %bb.2: ## %exit
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB73_1: ## %bar
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: callq _foo
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: ktest_5:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: pushq %rax
; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1
; AVX512DQ-NEXT: korw %k1, %k0, %k0
; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k1
; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k2
; AVX512DQ-NEXT: korw %k2, %k1, %k1
; AVX512DQ-NEXT: ktestw %k1, %k0
; AVX512DQ-NEXT: je LBB73_1
; AVX512DQ-NEXT: ## %bb.2: ## %exit
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
; AVX512DQ-NEXT: LBB73_1: ## %bar
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: callq _foo
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: retq
;
; X86-LABEL: ktest_5:
; X86: ## %bb.0:
Expand All @@ -3798,8 +3859,7 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
; X86-NEXT: vptestnmd %zmm2, %zmm2, %k1
; X86-NEXT: vptestnmd %zmm3, %zmm3, %k2
; X86-NEXT: korw %k2, %k1, %k1
; X86-NEXT: kandw %k1, %k0, %k0
; X86-NEXT: kortestw %k0, %k0
; X86-NEXT: ktestw %k1, %k0
; X86-NEXT: je LBB73_1
; X86-NEXT: ## %bb.2: ## %exit
; X86-NEXT: addl $12, %esp
Expand Down Expand Up @@ -3878,8 +3938,7 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1
; SKX-NEXT: vptestnmw %zmm3, %zmm3, %k2
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kandd %k1, %k0, %k0
; SKX-NEXT: kortestd %k0, %k0
; SKX-NEXT: ktestd %k1, %k0
; SKX-NEXT: je LBB74_1
; SKX-NEXT: ## %bb.2: ## %exit
; SKX-NEXT: popq %rax
Expand All @@ -3901,8 +3960,7 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1
; AVX512BW-NEXT: vptestnmw %zmm3, %zmm3, %k2
; AVX512BW-NEXT: kord %k2, %k1, %k1
; AVX512BW-NEXT: kandd %k1, %k0, %k0
; AVX512BW-NEXT: kortestd %k0, %k0
; AVX512BW-NEXT: ktestd %k1, %k0
; AVX512BW-NEXT: je LBB74_1
; AVX512BW-NEXT: ## %bb.2: ## %exit
; AVX512BW-NEXT: popq %rax
Expand Down Expand Up @@ -3962,8 +4020,7 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
; X86-NEXT: vptestnmw %zmm2, %zmm2, %k1
; X86-NEXT: vptestnmw %zmm3, %zmm3, %k2
; X86-NEXT: kord %k2, %k1, %k1
; X86-NEXT: kandd %k1, %k0, %k0
; X86-NEXT: kortestd %k0, %k0
; X86-NEXT: ktestd %k1, %k0
; X86-NEXT: je LBB74_1
; X86-NEXT: ## %bb.2: ## %exit
; X86-NEXT: addl $12, %esp
Expand Down Expand Up @@ -4066,8 +4123,7 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1
; SKX-NEXT: vptestnmb %zmm3, %zmm3, %k2
; SKX-NEXT: korq %k2, %k1, %k1
; SKX-NEXT: kandq %k1, %k0, %k0
; SKX-NEXT: kortestq %k0, %k0
; SKX-NEXT: ktestq %k1, %k0
; SKX-NEXT: je LBB75_1
; SKX-NEXT: ## %bb.2: ## %exit
; SKX-NEXT: popq %rax
Expand All @@ -4089,8 +4145,7 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1
; AVX512BW-NEXT: vptestnmb %zmm3, %zmm3, %k2
; AVX512BW-NEXT: korq %k2, %k1, %k1
; AVX512BW-NEXT: kandq %k1, %k0, %k0
; AVX512BW-NEXT: kortestq %k0, %k0
; AVX512BW-NEXT: ktestq %k1, %k0
; AVX512BW-NEXT: je LBB75_1
; AVX512BW-NEXT: ## %bb.2: ## %exit
; AVX512BW-NEXT: popq %rax
Expand Down

0 comments on commit 6265a15

Please sign in to comment.