Skip to content

Commit

Permalink
[X86][FastISel] Use a COPY from K register to a GPR instead of a K op…
Browse files Browse the repository at this point in the history
…eration

The KORTEST was introduced due to a bug where a TEST instruction used a K register.
but, turns out that the opposite case of KORTEST using a GPR is now happening

The change removes the KORTEST flow and adds a COPY instruction from the K reg to a GPR.

Differential Revision: https://reviews.llvm.org/D24953

llvm-svn: 282580
  • Loading branch information
Guy Blank committed Sep 28, 2016
1 parent 55b8eaa commit 2bdc74a
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 31 deletions.
58 changes: 31 additions & 27 deletions llvm/lib/Target/X86/X86FastISel.cpp
Expand Up @@ -1731,15 +1731,17 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
unsigned OpReg = getRegForValue(BI->getCondition());
if (OpReg == 0) return false;

// In case OpReg is a K register, kortest against itself.
if (MRI.getRegClass(OpReg) == &X86::VK1RegClass)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::KORTESTWrr))
.addReg(OpReg)
.addReg(OpReg);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(OpReg)
.addImm(1);
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
unsigned KOpReg = OpReg;
OpReg = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), OpReg)
.addReg(KOpReg);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(OpReg)
.addImm(1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
.addMBB(TrueMBB);
finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
Expand Down Expand Up @@ -2073,16 +2075,17 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
return false;
bool CondIsKill = hasTrivialKill(Cond);

// In case OpReg is a K register, kortest against itself.
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass)
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
unsigned KCondReg = CondReg;
CondReg = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::KORTESTWrr))
.addReg(CondReg, getKillRegState(CondIsKill))
.addReg(CondReg);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(CondReg, getKillRegState(CondIsKill))
.addImm(1);
TII.get(TargetOpcode::COPY), CondReg)
.addReg(KCondReg, getKillRegState(CondIsKill));
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(CondReg, getKillRegState(CondIsKill))
.addImm(1);
}

const Value *LHS = I->getOperand(1);
Expand Down Expand Up @@ -2254,16 +2257,17 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
return false;
bool CondIsKill = hasTrivialKill(Cond);

// In case OpReg is a K register, kortest against itself.
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass)
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
unsigned KCondReg = CondReg;
CondReg = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::KORTESTWrr))
.addReg(CondReg, getKillRegState(CondIsKill))
.addReg(CondReg);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(CondReg, getKillRegState(CondIsKill))
.addImm(1);
TII.get(TargetOpcode::COPY), CondReg)
.addReg(KCondReg, getKillRegState(CondIsKill));
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(CondReg, getKillRegState(CondIsKill))
.addImm(1);
}

const Value *LHS = I->getOperand(1);
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/X86/avx512-fsel.ll
Expand Up @@ -26,7 +26,8 @@ define i32 @test(float %a, float %b) {
; CHECK-NEXT: movb %dil, %r8b
; CHECK-NEXT: andl $1, %r8d
; CHECK-NEXT: kmovw %r8d, %k1
; CHECK-NEXT: kortestw %k1, %k1
; CHECK-NEXT: kmovw %k1, %ecx
; CHECK-NEXT: testb $1, %cl
; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) ## 1-byte Spill
; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill
; CHECK-NEXT: jne LBB0_1
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/CodeGen/X86/fast-isel-load-i1.ll
@@ -0,0 +1,15 @@
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s

define i1 @test_i1(i1* %b) {
; CHECK-LABEL: test_i1:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: testb $1, (%rdi)
entry:
%0 = load i1, i1* %b, align 1
br i1 %0, label %in, label %out
in:
ret i1 0
out:
ret i1 1
}

9 changes: 6 additions & 3 deletions llvm/test/CodeGen/X86/fast-isel-select-cmov.ll
Expand Up @@ -16,7 +16,8 @@ define zeroext i16 @select_cmov_i16(i1 zeroext %cond, i16 zeroext %a, i16 zeroex
; AVX512-LABEL: select_cmov_i16:
; AVX512: ## BB#0:
; AVX512-NEXT: kmovw %edi, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: testb $1, %al
; AVX512-NEXT: cmovew %dx, %si
; AVX512-NEXT: movzwl %si, %eax
; AVX512-NEXT: retq
Expand Down Expand Up @@ -47,7 +48,8 @@ define i32 @select_cmov_i32(i1 zeroext %cond, i32 %a, i32 %b) {
; AVX512-LABEL: select_cmov_i32:
; AVX512: ## BB#0:
; AVX512-NEXT: kmovw %edi, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: testb $1, %al
; AVX512-NEXT: cmovel %edx, %esi
; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: retq
Expand Down Expand Up @@ -78,7 +80,8 @@ define i64 @select_cmov_i64(i1 zeroext %cond, i64 %a, i64 %b) {
; AVX512-LABEL: select_cmov_i64:
; AVX512: ## BB#0:
; AVX512-NEXT: kmovw %edi, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: testb $1, %al
; AVX512-NEXT: cmoveq %rdx, %rsi
; AVX512-NEXT: movq %rsi, %rax
; AVX512-NEXT: retq
Expand Down

0 comments on commit 2bdc74a

Please sign in to comment.