[X86] Handle X86ISD::EXPAND/COMPRESS nodes as target shuffles #171119

RKSimon · 2025-12-08T12:18:29Z

Allows for shuffle simplification

Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test

Allows for shuffle simplification Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test

llvmbot · 2025-12-08T12:19:02Z

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Allows for shuffle simplification

Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test

Full diff: https://github.com/llvm/llvm-project/pull/171119.diff

3 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+46)
(modified) llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll (+2-2)
(modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll (+11-56)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d0ae75b2e6785..b71878ae1434c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2888,6 +2888,8 @@ static bool isTargetShuffle(unsigned Opcode) {
   case X86ISD::VPERMV:
   case X86ISD::VPERMV3:
   case X86ISD::VZEXT_MOVL:
+  case X86ISD::COMPRESS:
+  case X86ISD::EXPAND:
     return true;
   }
 }
@@ -5839,6 +5841,48 @@ static bool getTargetShuffleMask(SDValue N, bool AllowSentinelZero,
     }
     return false;
   }
+  case X86ISD::COMPRESS: {
+    SDValue CmpVec = N.getOperand(0);
+    SDValue PassThru = N.getOperand(1);
+    SDValue CmpMask = N.getOperand(2);
+    APInt UndefElts;
+    SmallVector<APInt> EltBits;
+    if (!getTargetConstantBitsFromNode(CmpMask, 1, UndefElts, EltBits))
+      return false;
+    assert(UndefElts.getBitWidth() == NumElems && EltBits.size() == NumElems &&
+           "Illegal compression mask");
+    for (unsigned I = 0; I != NumElems; ++I) {
+      if (!EltBits[I].isZero())
+        Mask.push_back(I);
+    }
+    while (Mask.size() != NumElems) {
+      Mask.push_back(NumElems + Mask.size());
+    }
+    Ops.push_back(CmpVec);
+    Ops.push_back(PassThru);
+    return true;
+  }
+  case X86ISD::EXPAND: {
+    SDValue ExpVec = N.getOperand(0);
+    SDValue PassThru = N.getOperand(1);
+    SDValue ExpMask = N.getOperand(2);
+    APInt UndefElts;
+    SmallVector<APInt> EltBits;
+    if (!getTargetConstantBitsFromNode(ExpMask, 1, UndefElts, EltBits))
+      return false;
+    assert(UndefElts.getBitWidth() == NumElems && EltBits.size() == NumElems &&
+           "Illegal expansion mask");
+    unsigned ExpIndex = 0;
+    for (unsigned I = 0; I != NumElems; ++I) {
+      if (EltBits[I].isZero())
+        Mask.push_back(I + NumElems);
+      else
+        Mask.push_back(ExpIndex++);
+    }
+    Ops.push_back(ExpVec);
+    Ops.push_back(PassThru);
+    return true;
+  }
   default:
     llvm_unreachable("unknown target shuffle node");
   }
@@ -61325,6 +61369,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::VPERM2X128:
   case X86ISD::SHUF128:
   case X86ISD::VZEXT_MOVL:
+  case X86ISD::COMPRESS:
+  case X86ISD::EXPAND:
   case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
   case X86ISD::FMADD_RND:
   case X86ISD::FMSUB:
diff --git a/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll b/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll
index 47331db7261b3..b19112c02c085 100644
--- a/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll
+++ b/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl | FileCheck %s --check-prefix=CHECK
 
-define void @test_compress_undef_float_passthrough() {
+define void @test_compress_undef_float_passthrough(<4 x double> %a0) {
 ; CHECK-LABEL: test_compress_undef_float_passthrough:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movb $5, %al
@@ -12,7 +12,7 @@ define void @test_compress_undef_float_passthrough() {
 ; CHECK-NEXT:    retq
 entry:                                          ; preds = %loop.50
   %0 = bitcast i4 undef to <4 x i1>
-  %1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> <i1 1, i1 0, i1 1, i1 0>)
+  %1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %a0, <4 x double> undef, <4 x i1> <i1 1, i1 0, i1 1, i1 0>)
   call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %1, <4 x ptr> undef, i32 0, <4 x i1> %0)
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
index 3ea95eeaedfc7..b79d9e8ce47e9 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
@@ -1035,68 +1035,23 @@ define <8 x double> @concat_vpermilvar_v8f64_v4f64(<4 x double> %a0, <4 x double
   ret <8 x double> %res
 }
 
-; TODO - shift elements up by one
+; shift elements up by one
 define <16 x i32> @combine_vexpandd_as_valignd(<16 x i32>  %x) {
-; X86-AVX512F-LABEL: combine_vexpandd_as_valignd:
-; X86-AVX512F:       # %bb.0:
-; X86-AVX512F-NEXT:    movw $-2, %ax
-; X86-AVX512F-NEXT:    kmovw %eax, %k1
-; X86-AVX512F-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512F-NEXT:    retl
-;
-; X86-AVX512BW-LABEL: combine_vexpandd_as_valignd:
-; X86-AVX512BW:       # %bb.0:
-; X86-AVX512BW-NEXT:    movw $-2, %ax
-; X86-AVX512BW-NEXT:    kmovd %eax, %k1
-; X86-AVX512BW-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512BW-NEXT:    retl
-;
-; X64-AVX512F-LABEL: combine_vexpandd_as_valignd:
-; X64-AVX512F:       # %bb.0:
-; X64-AVX512F-NEXT:    movw $-2, %ax
-; X64-AVX512F-NEXT:    kmovw %eax, %k1
-; X64-AVX512F-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512F-NEXT:    retq
-;
-; X64-AVX512BW-LABEL: combine_vexpandd_as_valignd:
-; X64-AVX512BW:       # %bb.0:
-; X64-AVX512BW-NEXT:    movw $-2, %ax
-; X64-AVX512BW-NEXT:    kmovd %eax, %k1
-; X64-AVX512BW-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512BW-NEXT:    retq
+; CHECK-LABEL: combine_vexpandd_as_valignd:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    valignd {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %x, <16 x i32> zeroinitializer, <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
   ret <16 x i32> %res
 }
 
-; TODO - zero upper half of vector
+; zero upper half of vector
 define <16 x i32> @combine_vcompressd_as_vmov(<16 x i32> %x) {
-; X86-AVX512F-LABEL: combine_vcompressd_as_vmov:
-; X86-AVX512F:       # %bb.0:
-; X86-AVX512F-NEXT:    movw $255, %ax
-; X86-AVX512F-NEXT:    kmovw %eax, %k1
-; X86-AVX512F-NEXT:    vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512F-NEXT:    retl
-;
-; X86-AVX512BW-LABEL: combine_vcompressd_as_vmov:
-; X86-AVX512BW:       # %bb.0:
-; X86-AVX512BW-NEXT:    movw $255, %ax
-; X86-AVX512BW-NEXT:    kmovd %eax, %k1
-; X86-AVX512BW-NEXT:    vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512BW-NEXT:    retl
-;
-; X64-AVX512F-LABEL: combine_vcompressd_as_vmov:
-; X64-AVX512F:       # %bb.0:
-; X64-AVX512F-NEXT:    movw $255, %ax
-; X64-AVX512F-NEXT:    kmovw %eax, %k1
-; X64-AVX512F-NEXT:    vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512F-NEXT:    retq
-;
-; X64-AVX512BW-LABEL: combine_vcompressd_as_vmov:
-; X64-AVX512BW:       # %bb.0:
-; X64-AVX512BW-NEXT:    movw $255, %ax
-; X64-AVX512BW-NEXT:    kmovd %eax, %k1
-; X64-AVX512BW-NEXT:    vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512BW-NEXT:    retq
+; CHECK-LABEL: combine_vcompressd_as_vmov:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovaps %ymm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %x, <16 x i32> zeroinitializer, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>)
   ret <16 x i32> %res
 }

github-actions · 2025-12-08T12:20:09Z

⚠️ undef deprecator found issues in your code. ⚠️

You can test this locally with the following command:

git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef([^a-zA-Z0-9_-]|$)|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/Target/X86/X86ISelLowering.cpp llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll

The following files introduce new uses of undef:

llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll

Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields undef. You should use poison values for placeholders instead.

In tests, avoid using undef and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.

For example, this is considered a bad practice:

define void @fn() {
  ...
  br i1 undef, ...
}

Please use the following instead:

define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}

Please refer to the Undefined Behavior Manual for more information.

phoebewang

LGTM.

…71119) Allows for shuffle simplification Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test

[X86] Handle X86ISD::EXPAND/COMPRESS nodes as target shuffles

0abf578

Allows for shuffle simplification Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test

RKSimon requested a review from phoebewang December 8, 2025 12:18

llvmbot added the backend:X86 label Dec 8, 2025

phoebewang approved these changes Dec 8, 2025

View reviewed changes

Merge branch 'main' into x86-expand-compress-shuffle-decode

7677023

RKSimon enabled auto-merge (squash) December 8, 2025 15:12

RKSimon merged commit ebdb903 into llvm:main Dec 8, 2025
8 of 10 checks passed

RKSimon deleted the x86-expand-compress-shuffle-decode branch December 8, 2025 16:00

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86] Handle X86ISD::EXPAND/COMPRESS nodes as target shuffles #171119

[X86] Handle X86ISD::EXPAND/COMPRESS nodes as target shuffles #171119

RKSimon commented Dec 8, 2025

Uh oh!

llvmbot commented Dec 8, 2025

Uh oh!

github-actions bot commented Dec 8, 2025

Uh oh!

phoebewang left a comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

[X86] Handle X86ISD::EXPAND/COMPRESS nodes as target shuffles #171119

[X86] Handle X86ISD::EXPAND/COMPRESS nodes as target shuffles #171119

Conversation

RKSimon commented Dec 8, 2025

Uh oh!

llvmbot commented Dec 8, 2025

Uh oh!

github-actions bot commented Dec 8, 2025

Uh oh!

phoebewang left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants