diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 8827bff111c22..1b15a207a2d37 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5702,6 +5702,9 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, return false; } + case ISD::VECTOR_COMPRESS: + return false; + default: // Allow the target to implement this method for its nodes. if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || diff --git a/llvm/test/CodeGen/X86/vector-compress-freeze.ll b/llvm/test/CodeGen/X86/vector-compress-freeze.ll new file mode 100644 index 0000000000000..981557f9b56cf --- /dev/null +++ b/llvm/test/CodeGen/X86/vector-compress-freeze.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl | FileCheck %s + +declare <16 x i32> @llvm.experimental.vector.compress.v16i32(<16 x i32>, <16 x i1>, <16 x i32>) + +define <16 x i32> @test_compress_freeze_elimination(<16 x i32> %a0, <16 x i32> %a1, <16 x i8> %a3) { +; CHECK-LABEL: test_compress_freeze_elimination: +; CHECK: # %bb.0: +; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; CHECK-NEXT: vpcompressd %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq + %cmp = icmp sgt <16 x i32> %a0, %a1 + %ext = zext <16 x i8> %a3 to <16 x i32> + %cpr = call <16 x i32> @llvm.experimental.vector.compress.v16i32(<16 x i32> %ext, <16 x i1> %cmp, <16 x i32> splat(i32 15)) + %fr = freeze <16 x i32> %cpr + %and = and <16 x i32> %fr, splat(i32 255) + ret <16 x i32> %and +} + +define <16 x i32> @test_compress_freeze(<16 x i32> %a0, <16 x i32> %a1, <16 x i8> %a3) { +; CHECK-LABEL: test_compress_freeze: +; CHECK: # %bb.0: +; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; CHECK-NEXT: vpcompressd %zmm0, %zmm0 {%k1} +; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK-NEXT: retq + %cmp = icmp sgt <16 x i32> %a0, %a1 + %ext = zext <16 x i8> %a3 to <16 x i32> + %cpr = call <16 x i32> @llvm.experimental.vector.compress.v16i32(<16 x i32> %ext, <16 x i1> %cmp, <16 x i32> poison) + %fr = freeze <16 x i32> %cpr + %and = and <16 x i32> %fr, splat(i32 255) + ret <16 x i32> %and +}