From 7e927698621f476e698eea14e6954057c81ccf4b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 20 Feb 2020 13:46:56 -0800 Subject: [PATCH] [X86] Expand vselect of v1i1 under avx512. We already do this for v2i1, v4i1, etc. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/CodeGen/X86/avx512-select.ll | 86 +++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bf304fc0060be..c56dd33c98aef 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1449,6 +1449,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SADDSAT, VT, Custom); setOperationAction(ISD::USUBSAT, VT, Custom); setOperationAction(ISD::SSUBSAT, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Expand); } for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) { @@ -1464,7 +1465,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Expand); } for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 }) diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll index cd4a70c4f73c4..5cbca00501776 100644 --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -591,3 +591,89 @@ define <16 x i64> @narrowExtractedVectorSelect_crash(<16 x i64> %arg, <16 x i16> %tmp3 = zext <16 x i16> %tmp2 to <16 x i64> ret <16 x i64> %tmp3 } + +define void @vselect_v1i1(<1 x i1>* %w, <1 x i1>* %x, <1 x i1>* %y) nounwind { +; X86-AVX512F-LABEL: vselect_v1i1: +; X86-AVX512F: # %bb.0: +; X86-AVX512F-NEXT: pushl %esi +; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX512F-NEXT: movzbl (%edx), %esi +; X86-AVX512F-NEXT: kmovw %esi, %k0 +; X86-AVX512F-NEXT: movzbl (%ecx), %ecx +; X86-AVX512F-NEXT: kmovw %ecx, %k1 +; X86-AVX512F-NEXT: movzbl (%eax), %eax +; X86-AVX512F-NEXT: kmovw %eax, %k2 +; X86-AVX512F-NEXT: kandw %k2, %k0, %k0 +; X86-AVX512F-NEXT: kxnorw %k0, %k0, %k3 +; X86-AVX512F-NEXT: kxorw %k3, %k2, %k2 +; X86-AVX512F-NEXT: kandw %k2, %k1, %k1 +; X86-AVX512F-NEXT: korw %k1, %k0, %k0 +; X86-AVX512F-NEXT: kmovw %k0, %eax +; X86-AVX512F-NEXT: movb %al, (%edx) +; X86-AVX512F-NEXT: popl %esi +; X86-AVX512F-NEXT: retl +; +; X64-AVX512F-LABEL: vselect_v1i1: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: movzbl (%rsi), %eax +; X64-AVX512F-NEXT: kmovw %eax, %k0 +; X64-AVX512F-NEXT: movzbl (%rdx), %eax +; X64-AVX512F-NEXT: kmovw %eax, %k1 +; X64-AVX512F-NEXT: movzbl (%rdi), %eax +; X64-AVX512F-NEXT: kmovw %eax, %k2 +; X64-AVX512F-NEXT: kandw %k2, %k0, %k0 +; X64-AVX512F-NEXT: kxnorw %k0, %k0, %k3 +; X64-AVX512F-NEXT: kxorw %k3, %k2, %k2 +; X64-AVX512F-NEXT: kandw %k2, %k1, %k1 +; X64-AVX512F-NEXT: korw %k1, %k0, %k0 +; X64-AVX512F-NEXT: kmovw %k0, %eax +; X64-AVX512F-NEXT: movb %al, (%rsi) +; X64-AVX512F-NEXT: retq +; +; X86-AVX512BW-LABEL: vselect_v1i1: +; X86-AVX512BW: # %bb.0: +; X86-AVX512BW-NEXT: pushl %esi +; X86-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX512BW-NEXT: movzbl (%edx), %esi +; X86-AVX512BW-NEXT: kmovd %esi, %k0 +; X86-AVX512BW-NEXT: movzbl (%ecx), %ecx +; X86-AVX512BW-NEXT: kmovd %ecx, %k1 +; X86-AVX512BW-NEXT: movzbl (%eax), %eax +; X86-AVX512BW-NEXT: kmovd %eax, %k2 +; X86-AVX512BW-NEXT: kandw %k2, %k0, %k0 +; X86-AVX512BW-NEXT: kxnorw %k0, %k0, %k3 +; X86-AVX512BW-NEXT: kxorw %k3, %k2, %k2 +; X86-AVX512BW-NEXT: kandw %k2, %k1, %k1 +; X86-AVX512BW-NEXT: korw %k1, %k0, %k0 +; X86-AVX512BW-NEXT: kmovd %k0, %eax +; X86-AVX512BW-NEXT: movb %al, (%edx) +; X86-AVX512BW-NEXT: popl %esi +; X86-AVX512BW-NEXT: retl +; +; X64-AVX512BW-LABEL: vselect_v1i1: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: movzbl (%rsi), %eax +; X64-AVX512BW-NEXT: kmovd %eax, %k0 +; X64-AVX512BW-NEXT: movzbl (%rdx), %eax +; X64-AVX512BW-NEXT: kmovd %eax, %k1 +; X64-AVX512BW-NEXT: movzbl (%rdi), %eax +; X64-AVX512BW-NEXT: kmovd %eax, %k2 +; X64-AVX512BW-NEXT: kandw %k2, %k0, %k0 +; X64-AVX512BW-NEXT: kxnorw %k0, %k0, %k3 +; X64-AVX512BW-NEXT: kxorw %k3, %k2, %k2 +; X64-AVX512BW-NEXT: kandw %k2, %k1, %k1 +; X64-AVX512BW-NEXT: korw %k1, %k0, %k0 +; X64-AVX512BW-NEXT: kmovd %k0, %eax +; X64-AVX512BW-NEXT: movb %al, (%rsi) +; X64-AVX512BW-NEXT: retq + %a = load <1 x i1>, <1 x i1>* %x + %b = load <1 x i1>, <1 x i1>* %y + %b2 = load <1 x i1>, <1 x i1>* %w + %c = select <1 x i1> %b2, <1 x i1> %a, <1 x i1> %b + store <1 x i1> %c, <1 x i1>* %x + ret void +}