Skip to content

Conversation

@RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Dec 8, 2025

Allows for shuffle simplification

Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test

Allows for shuffle simplification

Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test
@llvmbot
Copy link
Member

llvmbot commented Dec 8, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Allows for shuffle simplification

Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test


Full diff: https://github.com/llvm/llvm-project/pull/171119.diff

3 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+46)
  • (modified) llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll (+2-2)
  • (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll (+11-56)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d0ae75b2e6785..b71878ae1434c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2888,6 +2888,8 @@ static bool isTargetShuffle(unsigned Opcode) {
   case X86ISD::VPERMV:
   case X86ISD::VPERMV3:
   case X86ISD::VZEXT_MOVL:
+  case X86ISD::COMPRESS:
+  case X86ISD::EXPAND:
     return true;
   }
 }
@@ -5839,6 +5841,48 @@ static bool getTargetShuffleMask(SDValue N, bool AllowSentinelZero,
     }
     return false;
   }
+  case X86ISD::COMPRESS: {
+    SDValue CmpVec = N.getOperand(0);
+    SDValue PassThru = N.getOperand(1);
+    SDValue CmpMask = N.getOperand(2);
+    APInt UndefElts;
+    SmallVector<APInt> EltBits;
+    if (!getTargetConstantBitsFromNode(CmpMask, 1, UndefElts, EltBits))
+      return false;
+    assert(UndefElts.getBitWidth() == NumElems && EltBits.size() == NumElems &&
+           "Illegal compression mask");
+    for (unsigned I = 0; I != NumElems; ++I) {
+      if (!EltBits[I].isZero())
+        Mask.push_back(I);
+    }
+    while (Mask.size() != NumElems) {
+      Mask.push_back(NumElems + Mask.size());
+    }
+    Ops.push_back(CmpVec);
+    Ops.push_back(PassThru);
+    return true;
+  }
+  case X86ISD::EXPAND: {
+    SDValue ExpVec = N.getOperand(0);
+    SDValue PassThru = N.getOperand(1);
+    SDValue ExpMask = N.getOperand(2);
+    APInt UndefElts;
+    SmallVector<APInt> EltBits;
+    if (!getTargetConstantBitsFromNode(ExpMask, 1, UndefElts, EltBits))
+      return false;
+    assert(UndefElts.getBitWidth() == NumElems && EltBits.size() == NumElems &&
+           "Illegal expansion mask");
+    unsigned ExpIndex = 0;
+    for (unsigned I = 0; I != NumElems; ++I) {
+      if (EltBits[I].isZero())
+        Mask.push_back(I + NumElems);
+      else
+        Mask.push_back(ExpIndex++);
+    }
+    Ops.push_back(ExpVec);
+    Ops.push_back(PassThru);
+    return true;
+  }
   default:
     llvm_unreachable("unknown target shuffle node");
   }
@@ -61325,6 +61369,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::VPERM2X128:
   case X86ISD::SHUF128:
   case X86ISD::VZEXT_MOVL:
+  case X86ISD::COMPRESS:
+  case X86ISD::EXPAND:
   case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
   case X86ISD::FMADD_RND:
   case X86ISD::FMSUB:
diff --git a/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll b/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll
index 47331db7261b3..b19112c02c085 100644
--- a/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll
+++ b/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl | FileCheck %s --check-prefix=CHECK
 
-define void @test_compress_undef_float_passthrough() {
+define void @test_compress_undef_float_passthrough(<4 x double> %a0) {
 ; CHECK-LABEL: test_compress_undef_float_passthrough:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movb $5, %al
@@ -12,7 +12,7 @@ define void @test_compress_undef_float_passthrough() {
 ; CHECK-NEXT:    retq
 entry:                                          ; preds = %loop.50
   %0 = bitcast i4 undef to <4 x i1>
-  %1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> <i1 1, i1 0, i1 1, i1 0>)
+  %1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %a0, <4 x double> undef, <4 x i1> <i1 1, i1 0, i1 1, i1 0>)
   call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %1, <4 x ptr> undef, i32 0, <4 x i1> %0)
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
index 3ea95eeaedfc7..b79d9e8ce47e9 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
@@ -1035,68 +1035,23 @@ define <8 x double> @concat_vpermilvar_v8f64_v4f64(<4 x double> %a0, <4 x double
   ret <8 x double> %res
 }
 
-; TODO - shift elements up by one
+; shift elements up by one
 define <16 x i32> @combine_vexpandd_as_valignd(<16 x i32>  %x) {
-; X86-AVX512F-LABEL: combine_vexpandd_as_valignd:
-; X86-AVX512F:       # %bb.0:
-; X86-AVX512F-NEXT:    movw $-2, %ax
-; X86-AVX512F-NEXT:    kmovw %eax, %k1
-; X86-AVX512F-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512F-NEXT:    retl
-;
-; X86-AVX512BW-LABEL: combine_vexpandd_as_valignd:
-; X86-AVX512BW:       # %bb.0:
-; X86-AVX512BW-NEXT:    movw $-2, %ax
-; X86-AVX512BW-NEXT:    kmovd %eax, %k1
-; X86-AVX512BW-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512BW-NEXT:    retl
-;
-; X64-AVX512F-LABEL: combine_vexpandd_as_valignd:
-; X64-AVX512F:       # %bb.0:
-; X64-AVX512F-NEXT:    movw $-2, %ax
-; X64-AVX512F-NEXT:    kmovw %eax, %k1
-; X64-AVX512F-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512F-NEXT:    retq
-;
-; X64-AVX512BW-LABEL: combine_vexpandd_as_valignd:
-; X64-AVX512BW:       # %bb.0:
-; X64-AVX512BW-NEXT:    movw $-2, %ax
-; X64-AVX512BW-NEXT:    kmovd %eax, %k1
-; X64-AVX512BW-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512BW-NEXT:    retq
+; CHECK-LABEL: combine_vexpandd_as_valignd:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    valignd {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %x, <16 x i32> zeroinitializer, <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
   ret <16 x i32> %res
 }
 
-; TODO - zero upper half of vector
+; zero upper half of vector
 define <16 x i32> @combine_vcompressd_as_vmov(<16 x i32> %x) {
-; X86-AVX512F-LABEL: combine_vcompressd_as_vmov:
-; X86-AVX512F:       # %bb.0:
-; X86-AVX512F-NEXT:    movw $255, %ax
-; X86-AVX512F-NEXT:    kmovw %eax, %k1
-; X86-AVX512F-NEXT:    vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512F-NEXT:    retl
-;
-; X86-AVX512BW-LABEL: combine_vcompressd_as_vmov:
-; X86-AVX512BW:       # %bb.0:
-; X86-AVX512BW-NEXT:    movw $255, %ax
-; X86-AVX512BW-NEXT:    kmovd %eax, %k1
-; X86-AVX512BW-NEXT:    vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512BW-NEXT:    retl
-;
-; X64-AVX512F-LABEL: combine_vcompressd_as_vmov:
-; X64-AVX512F:       # %bb.0:
-; X64-AVX512F-NEXT:    movw $255, %ax
-; X64-AVX512F-NEXT:    kmovw %eax, %k1
-; X64-AVX512F-NEXT:    vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512F-NEXT:    retq
-;
-; X64-AVX512BW-LABEL: combine_vcompressd_as_vmov:
-; X64-AVX512BW:       # %bb.0:
-; X64-AVX512BW-NEXT:    movw $255, %ax
-; X64-AVX512BW-NEXT:    kmovd %eax, %k1
-; X64-AVX512BW-NEXT:    vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512BW-NEXT:    retq
+; CHECK-LABEL: combine_vcompressd_as_vmov:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovaps %ymm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %x, <16 x i32> zeroinitializer, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>)
   ret <16 x i32> %res
 }

@github-actions
Copy link

github-actions bot commented Dec 8, 2025

⚠️ undef deprecator found issues in your code. ⚠️

You can test this locally with the following command:
git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef([^a-zA-Z0-9_-]|$)|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/Target/X86/X86ISelLowering.cpp llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll

The following files introduce new uses of undef:

  • llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll

Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields undef. You should use poison values for placeholders instead.

In tests, avoid using undef and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.

For example, this is considered a bad practice:

define void @fn() {
  ...
  br i1 undef, ...
}

Please use the following instead:

define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}

Please refer to the Undefined Behavior Manual for more information.

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@RKSimon RKSimon enabled auto-merge (squash) December 8, 2025 15:12
@RKSimon RKSimon merged commit ebdb903 into llvm:main Dec 8, 2025
8 of 10 checks passed
@RKSimon RKSimon deleted the x86-expand-compress-shuffle-decode branch December 8, 2025 16:00
honeygoyal pushed a commit to honeygoyal/llvm-project that referenced this pull request Dec 9, 2025
…71119)

Allows for shuffle simplification

Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants