diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 3090ad313b90d..7561a7df5c138 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -1877,28 +1877,34 @@ bool X86InstructionSelector::selectSelect(MachineInstr &I, unsigned OpCmp; LLT Ty = MRI.getType(DstReg); - switch (Ty.getSizeInBits()) { - default: - return false; - case 8: - OpCmp = X86::CMOV_GR8; - break; - case 16: - OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16; - break; - case 32: - OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32; - break; - case 64: - assert(STI.is64Bit() && STI.canUseCMOV()); - OpCmp = X86::CMOV64rr; - break; + if (Ty.getSizeInBits() == 80) { + BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(X86::CMOVE_Fp80), + DstReg) + .addReg(Sel.getTrueReg()) + .addReg(Sel.getFalseReg()); + } else { + switch (Ty.getSizeInBits()) { + default: + return false; + case 8: + OpCmp = X86::CMOV_GR8; + break; + case 16: + OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16; + break; + case 32: + OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32; + break; + case 64: + assert(STI.is64Bit() && STI.canUseCMOV()); + OpCmp = X86::CMOV64rr; + break; + } + BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg) + .addReg(Sel.getTrueReg()) + .addReg(Sel.getFalseReg()) + .addImm(X86::COND_E); } - BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg) - .addReg(Sel.getTrueReg()) - .addReg(Sel.getFalseReg()) - .addImm(X86::COND_E); - const TargetRegisterClass *DstRC = getRegClass(Ty, DstReg, MRI); if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n"); diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index ee9760f881ae9..6006eeaf44093 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -571,10 +571,13 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, // todo: vectors and address spaces getActionDefinitionsBuilder(G_SELECT) - .legalFor({{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}}) + .legalFor({{s16, s32}, {s32, s32}, {p0, s32}}) + .legalFor(!HasCMOV, {{s8, s32}}) + .legalFor(Is64Bit, {{s64, s32}}) + .legalFor(UseX87, {{s80, s32}}) + .clampScalar(1, s32, s32) .widenScalarToNextPow2(0, /*Min=*/8) - .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar) - .clampScalar(1, s32, s32); + .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar); // memory intrinsics getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir index 19fe5b84c73ce..85f7c77cbdb60 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir @@ -25,6 +25,7 @@ body: | ; X64-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CTLZ]], [[C1]] ; X64-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C]] ; X64-NEXT: RET 0, implicit [[AND1]](s64) + ; ; X86-LABEL: name: test_ctlz35 ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[TRUNC:%[0-9]+]]:_(s35) = G_TRUNC [[COPY]](s64) @@ -36,9 +37,9 @@ body: | ; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]] ; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) + ; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64) - ; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV2]], [[UV4]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV3]], [[UV5]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) @@ -97,6 +98,7 @@ body: | ; X64-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[DEF]](s64) ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[CTLZ]](s64) ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; ; X86-LABEL: name: test_ctlz64 ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) @@ -106,9 +108,9 @@ body: | ; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]] ; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) + ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64) - ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV2]], [[UV4]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV3]], [[UV5]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir index a7cbb35e3f74c..6ab424eeaa780 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir @@ -33,9 +33,9 @@ body: | ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF + ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) - ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV]], [[UV2]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV1]], [[UV3]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) @@ -115,9 +115,9 @@ body: | ; X64: [[DEF:%[0-9]+]]:_(s8) = IMPLICIT_DEF ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s8) = IMPLICIT_DEF ; X64-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF + ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF1]](s8) ; X64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF]](s8) - ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X64-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ZEXT]](s32), [[ANYEXT]], [[ANYEXT1]] ; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s16) ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) diff --git a/llvm/test/CodeGen/X86/fcmove.ll b/llvm/test/CodeGen/X86/fcmove.ll deleted file mode 100644 index 6bb014858d048..0000000000000 --- a/llvm/test/CodeGen/X86/fcmove.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s - -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-unknown" - -; Test that we can generate an fcmove, and also that it passes verification. - -; CHECK-LABEL: cmove_f -; CHECK: fcmove %st({{[0-7]}}), %st -define x86_fp80 @cmove_f(x86_fp80 %a, x86_fp80 %b, i32 %c) { - %test = icmp eq i32 %c, 0 - %add = fadd x86_fp80 %a, %b - %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b - ret x86_fp80 %ret -} diff --git a/llvm/test/CodeGen/X86/isel-select-fcmov.ll b/llvm/test/CodeGen/X86/isel-select-fcmov.ll new file mode 100644 index 0000000000000..cb441b860bb56 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-select-fcmov.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=X86-GISEL +; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=X64-GISEL +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefix=X64 + +; Test that we can generate an fcmove, and also that it passes verification. + +define x86_fp80 @cmove_cmp(x86_fp80 %a, x86_fp80 %b, i32 %c) { +; X86-LABEL: cmove_cmp: +; X86: # %bb.0: +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X86-NEXT: fadd %st(1), %st +; X86-NEXT: fxch %st(1) +; X86-NEXT: fcmove %st(1), %st +; X86-NEXT: fstp %st(1) +; X86-NEXT: retl +; +; X86-GISEL-LABEL: cmove_cmp: +; X86-GISEL: # %bb.0: +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: xorl %eax, %eax +; X86-GISEL-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: sete %al +; X86-GISEL-NEXT: fadd %st, %st(1) +; X86-GISEL-NEXT: andl $1, %eax +; X86-GISEL-NEXT: testl %eax, %eax +; X86-GISEL-NEXT: fxch %st(1) +; X86-GISEL-NEXT: fcmove %st(1), %st +; X86-GISEL-NEXT: fstp %st(1) +; X86-GISEL-NEXT: retl +; +; X64-LABEL: cmove_cmp: +; X64: # %bb.0: +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: testl %edi, %edi +; X64-NEXT: fadd %st(1), %st +; X64-NEXT: fxch %st(1) +; X64-NEXT: fcmove %st(1), %st +; X64-NEXT: fstp %st(1) +; X64-NEXT: retq +; +; X64-GISEL-LABEL: cmove_cmp: +; X64-GISEL: # %bb.0: +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: xorl %eax, %eax +; X64-GISEL-NEXT: cmpl $0, %edi +; X64-GISEL-NEXT: sete %al +; X64-GISEL-NEXT: fadd %st, %st(1) +; X64-GISEL-NEXT: andl $1, %eax +; X64-GISEL-NEXT: testl %eax, %eax +; X64-GISEL-NEXT: fxch %st(1) +; X64-GISEL-NEXT: fcmove %st(1), %st +; X64-GISEL-NEXT: fstp %st(1) +; X64-GISEL-NEXT: retq + %test = icmp eq i32 %c, 0 + %add = fadd x86_fp80 %a, %b + %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b + ret x86_fp80 %ret +} + +define x86_fp80 @cmove_arg(x86_fp80 %a, x86_fp80 %b, i1 %test) { +; X86-LABEL: cmove_arg: +; X86: # %bb.0: +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fadd %st(1), %st +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: fxch %st(1) +; X86-NEXT: fcmovne %st(1), %st +; X86-NEXT: fstp %st(1) +; X86-NEXT: retl +; +; X86-GISEL-LABEL: cmove_arg: +; X86-GISEL: # %bb.0: +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: fadd %st, %st(1) +; X86-GISEL-NEXT: movl $1, %eax +; X86-GISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: testl %eax, %eax +; X86-GISEL-NEXT: fxch %st(1) +; X86-GISEL-NEXT: fcmove %st(1), %st +; X86-GISEL-NEXT: fstp %st(1) +; X86-GISEL-NEXT: retl +; +; X64-LABEL: cmove_arg: +; X64: # %bb.0: +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fadd %st(1), %st +; X64-NEXT: testb $1, %dil +; X64-NEXT: fxch %st(1) +; X64-NEXT: fcmovne %st(1), %st +; X64-NEXT: fstp %st(1) +; X64-NEXT: retq +; +; X64-GISEL-LABEL: cmove_arg: +; X64-GISEL: # %bb.0: +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fadd %st, %st(1) +; X64-GISEL-NEXT: andl $1, %edi +; X64-GISEL-NEXT: testl %edi, %edi +; X64-GISEL-NEXT: fxch %st(1) +; X64-GISEL-NEXT: fcmove %st(1), %st +; X64-GISEL-NEXT: fstp %st(1) +; X64-GISEL-NEXT: retq + %add = fadd x86_fp80 %a, %b + %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b + ret x86_fp80 %ret +} + +define x86_fp80 @cmove_load(x86_fp80 %a, x86_fp80 %b, ptr %p) { +; X86-LABEL: cmove_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fadd %st(1), %st +; X86-NEXT: cmpb $0, (%eax) +; X86-NEXT: fxch %st(1) +; X86-NEXT: fcmovne %st(1), %st +; X86-NEXT: fstp %st(1) +; X86-NEXT: retl +; +; X86-GISEL-LABEL: cmove_load: +; X86-GISEL: # %bb.0: +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: fadd %st, %st(1) +; X86-GISEL-NEXT: movzbl (%eax), %eax +; X86-GISEL-NEXT: andl $1, %eax +; X86-GISEL-NEXT: testl %eax, %eax +; X86-GISEL-NEXT: fxch %st(1) +; X86-GISEL-NEXT: fcmove %st(1), %st +; X86-GISEL-NEXT: fstp %st(1) +; X86-GISEL-NEXT: retl +; +; X64-LABEL: cmove_load: +; X64: # %bb.0: +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fadd %st(1), %st +; X64-NEXT: cmpb $0, (%rdi) +; X64-NEXT: fxch %st(1) +; X64-NEXT: fcmovne %st(1), %st +; X64-NEXT: fstp %st(1) +; X64-NEXT: retq +; +; X64-GISEL-LABEL: cmove_load: +; X64-GISEL: # %bb.0: +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fadd %st, %st(1) +; X64-GISEL-NEXT: movzbl (%rdi), %eax +; X64-GISEL-NEXT: andl $1, %eax +; X64-GISEL-NEXT: testl %eax, %eax +; X64-GISEL-NEXT: fxch %st(1) +; X64-GISEL-NEXT: fcmove %st(1), %st +; X64-GISEL-NEXT: fstp %st(1) +; X64-GISEL-NEXT: retq + %test = load i1, ptr %p + %add = fadd x86_fp80 %a, %b + %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b + ret x86_fp80 %ret +}