From 97988f62e48ad5f16711c19aaaa85a995ca2ab21 Mon Sep 17 00:00:00 2001 From: Michael-Chen-NJU <2802328816@qq.com> Date: Fri, 14 Nov 2025 12:19:52 +0800 Subject: [PATCH 1/5] [SelectionDAG] Update canCreateUndefOrPoison to handle VECTOR_COMPRESS case --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c2b4c19846316..38fee7c83e1c0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5702,6 +5702,11 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, return false; } + case ISD::VECTOR_COMPRESS: + // Return true only if undef is checked and at least one element is + // demanded. + return !PoisonOnly && !DemandedElts.isZero(); + default: // Allow the target to implement this method for its nodes. if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || From a4e05742f04a3b3e9bf343e45504d3fde659fa88 Mon Sep 17 00:00:00 2001 From: Michael-Chen-NJU <2802328816@qq.com> Date: Mon, 17 Nov 2025 13:50:32 +0800 Subject: [PATCH 2/5] [DAG] Enhance canCreateUndefOrPoison for VECTOR_COMPRESS and add tests for freeze elimination --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +- .../CodeGen/X86/vector-compress-freeze.ll | 39 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/vector-compress-freeze.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 38fee7c83e1c0..413698b824884 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5705,7 +5705,9 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::VECTOR_COMPRESS: // Return true only if undef is checked and at least one element is // demanded. - return !PoisonOnly && !DemandedElts.isZero(); + if (Op.getOperand(2).isUndef()) + return !PoisonOnly && !DemandedElts.isZero(); + return false; default: // Allow the target to implement this method for its nodes. diff --git a/llvm/test/CodeGen/X86/vector-compress-freeze.ll b/llvm/test/CodeGen/X86/vector-compress-freeze.ll new file mode 100644 index 0000000000000..8f77e19cf085e --- /dev/null +++ b/llvm/test/CodeGen/X86/vector-compress-freeze.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl | FileCheck %s + +declare <16 x i32> @llvm.experimental.vector.compress.v16i32(<16 x i32>, <16 x i1>, <16 x i32>) + +define <16 x i32> @test_compress_freeze_elimination(<16 x i32> %a0, <16 x i32> %a1, <16 x i8> %a3) { +; CHECK-LABEL: test_compress_freeze_elimination: +; CHECK: # %bb.0: +; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; CHECK-NEXT: vpcompressd %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq + %cmp = icmp sgt <16 x i32> %a0, %a1 + %ext = zext <16 x i8> %a3 to <16 x i32> + %cpr = call <16 x i32> @llvm.experimental.vector.compress.v16i32(<16 x i32> %ext, <16 x i1> %cmp, <16 x i32> splat(i32 15)) + %fr = freeze <16 x i32> %cpr + %and = and <16 x i32> %fr, splat(i32 255) + ret <16 x i32> %and +} + +; Test 2: Negative Case (Optimization must NOT happen) +; PassThru is 'undef', so compress *can* generate undefs. +; The freeze must remain to define those lanes. +define <16 x i32> @test_compress_freeze_must_remain(<16 x i32> %a0, <16 x i32> %a1, <16 x i8> %a3) { +; CHECK-LABEL: test_compress_freeze_must_remain: +; CHECK: # %bb.0: +; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; CHECK-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK-NEXT: retq + %cmp = icmp sgt <16 x i32> %a0, %a1 + %ext = zext <16 x i8> %a3 to <16 x i32> + %cpr = call <16 x i32> @llvm.experimental.vector.compress.v16i32(<16 x i32> %ext, <16 x i1> %cmp, <16 x i32> undef) + %fr = freeze <16 x i32> %cpr + %and = and <16 x i32> %fr, splat(i32 255) + ret <16 x i32> %and +} From 1e3bc70116d6bb7ea433581c5ec844529f583448 Mon Sep 17 00:00:00 2001 From: Michael-Chen-NJU <2802328816@qq.com> Date: Mon, 17 Nov 2025 13:59:46 +0800 Subject: [PATCH 3/5] [DAG] Removing undef --- .../CodeGen/X86/vector-compress-freeze.ll | 21 +------------------ 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/llvm/test/CodeGen/X86/vector-compress-freeze.ll b/llvm/test/CodeGen/X86/vector-compress-freeze.ll index 8f77e19cf085e..66b76ac4ebadb 100644 --- a/llvm/test/CodeGen/X86/vector-compress-freeze.ll +++ b/llvm/test/CodeGen/X86/vector-compress-freeze.ll @@ -17,23 +17,4 @@ define <16 x i32> @test_compress_freeze_elimination(<16 x i32> %a0, <16 x i32> % %fr = freeze <16 x i32> %cpr %and = and <16 x i32> %fr, splat(i32 255) ret <16 x i32> %and -} - -; Test 2: Negative Case (Optimization must NOT happen) -; PassThru is 'undef', so compress *can* generate undefs. -; The freeze must remain to define those lanes. -define <16 x i32> @test_compress_freeze_must_remain(<16 x i32> %a0, <16 x i32> %a1, <16 x i8> %a3) { -; CHECK-LABEL: test_compress_freeze_must_remain: -; CHECK: # %bb.0: -; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero -; CHECK-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: retq - %cmp = icmp sgt <16 x i32> %a0, %a1 - %ext = zext <16 x i8> %a3 to <16 x i32> - %cpr = call <16 x i32> @llvm.experimental.vector.compress.v16i32(<16 x i32> %ext, <16 x i1> %cmp, <16 x i32> undef) - %fr = freeze <16 x i32> %cpr - %and = and <16 x i32> %fr, splat(i32 255) - ret <16 x i32> %and -} +} \ No newline at end of file From d67fee962efc1255374b717d8cc9c00f87823b5e Mon Sep 17 00:00:00 2001 From: Michael-Chen-NJU <2802328816@qq.com> Date: Mon, 17 Nov 2025 23:53:10 +0800 Subject: [PATCH 4/5] [DAG] Add negative test case for freeze elimination in vector.compress --- .../CodeGen/X86/vector-compress-freeze.ll | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/X86/vector-compress-freeze.ll b/llvm/test/CodeGen/X86/vector-compress-freeze.ll index 66b76ac4ebadb..84301e802a348 100644 --- a/llvm/test/CodeGen/X86/vector-compress-freeze.ll +++ b/llvm/test/CodeGen/X86/vector-compress-freeze.ll @@ -17,4 +17,21 @@ define <16 x i32> @test_compress_freeze_elimination(<16 x i32> %a0, <16 x i32> % %fr = freeze <16 x i32> %cpr %and = and <16 x i32> %fr, splat(i32 255) ret <16 x i32> %and -} \ No newline at end of file +} + +; Negative Case +define <16 x i32> @test_compress_freeze_must_remain(<16 x i32> %a0, <16 x i32> %a1, <16 x i8> %a3) { +; CHECK-LABEL: test_compress_freeze_must_remain: +; CHECK: # %bb.0: +; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; CHECK-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK-NEXT: retq + %cmp = icmp sgt <16 x i32> %a0, %a1 + %ext = zext <16 x i8> %a3 to <16 x i32> + %cpr = call <16 x i32> @llvm.experimental.vector.compress.v16i32(<16 x i32> %ext, <16 x i1> %cmp, <16 x i32> poison) + %fr = freeze <16 x i32> %cpr + %and = and <16 x i32> %fr, splat(i32 255) + ret <16 x i32> %and +} From b4d8f82c189f0d191682e72fb583d9816dacad32 Mon Sep 17 00:00:00 2001 From: Michael-Chen-NJU <2802328816@qq.com> Date: Tue, 18 Nov 2025 23:24:46 +0800 Subject: [PATCH 5/5] [DAG] Refactor canCreateUndefOrPoison for VECTOR_COMPRESS and update freeze elimination test --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 ---- llvm/test/CodeGen/X86/vector-compress-freeze.ll | 7 +++---- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 413698b824884..df80d894a01ad 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5703,10 +5703,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, } case ISD::VECTOR_COMPRESS: - // Return true only if undef is checked and at least one element is - // demanded. - if (Op.getOperand(2).isUndef()) - return !PoisonOnly && !DemandedElts.isZero(); return false; default: diff --git a/llvm/test/CodeGen/X86/vector-compress-freeze.ll b/llvm/test/CodeGen/X86/vector-compress-freeze.ll index 84301e802a348..981557f9b56cf 100644 --- a/llvm/test/CodeGen/X86/vector-compress-freeze.ll +++ b/llvm/test/CodeGen/X86/vector-compress-freeze.ll @@ -19,13 +19,12 @@ define <16 x i32> @test_compress_freeze_elimination(<16 x i32> %a0, <16 x i32> % ret <16 x i32> %and } -; Negative Case -define <16 x i32> @test_compress_freeze_must_remain(<16 x i32> %a0, <16 x i32> %a1, <16 x i8> %a3) { -; CHECK-LABEL: test_compress_freeze_must_remain: +define <16 x i32> @test_compress_freeze(<16 x i32> %a0, <16 x i32> %a1, <16 x i8> %a3) { +; CHECK-LABEL: test_compress_freeze: ; CHECK: # %bb.0: ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero -; CHECK-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: vpcompressd %zmm0, %zmm0 {%k1} ; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 ; CHECK-NEXT: retq %cmp = icmp sgt <16 x i32> %a0, %a1