-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[X86] Handle X86ISD::EXPAND/COMPRESS nodes as target shuffles #171119
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Allows for shuffle simplification Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test
|
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesAllows for shuffle simplification Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test Full diff: https://github.com/llvm/llvm-project/pull/171119.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d0ae75b2e6785..b71878ae1434c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2888,6 +2888,8 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
case X86ISD::VZEXT_MOVL:
+ case X86ISD::COMPRESS:
+ case X86ISD::EXPAND:
return true;
}
}
@@ -5839,6 +5841,48 @@ static bool getTargetShuffleMask(SDValue N, bool AllowSentinelZero,
}
return false;
}
+ case X86ISD::COMPRESS: {
+ SDValue CmpVec = N.getOperand(0);
+ SDValue PassThru = N.getOperand(1);
+ SDValue CmpMask = N.getOperand(2);
+ APInt UndefElts;
+ SmallVector<APInt> EltBits;
+ if (!getTargetConstantBitsFromNode(CmpMask, 1, UndefElts, EltBits))
+ return false;
+ assert(UndefElts.getBitWidth() == NumElems && EltBits.size() == NumElems &&
+ "Illegal compression mask");
+ for (unsigned I = 0; I != NumElems; ++I) {
+ if (!EltBits[I].isZero())
+ Mask.push_back(I);
+ }
+ while (Mask.size() != NumElems) {
+ Mask.push_back(NumElems + Mask.size());
+ }
+ Ops.push_back(CmpVec);
+ Ops.push_back(PassThru);
+ return true;
+ }
+ case X86ISD::EXPAND: {
+ SDValue ExpVec = N.getOperand(0);
+ SDValue PassThru = N.getOperand(1);
+ SDValue ExpMask = N.getOperand(2);
+ APInt UndefElts;
+ SmallVector<APInt> EltBits;
+ if (!getTargetConstantBitsFromNode(ExpMask, 1, UndefElts, EltBits))
+ return false;
+ assert(UndefElts.getBitWidth() == NumElems && EltBits.size() == NumElems &&
+ "Illegal expansion mask");
+ unsigned ExpIndex = 0;
+ for (unsigned I = 0; I != NumElems; ++I) {
+ if (EltBits[I].isZero())
+ Mask.push_back(I + NumElems);
+ else
+ Mask.push_back(ExpIndex++);
+ }
+ Ops.push_back(ExpVec);
+ Ops.push_back(PassThru);
+ return true;
+ }
default:
llvm_unreachable("unknown target shuffle node");
}
@@ -61325,6 +61369,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERM2X128:
case X86ISD::SHUF128:
case X86ISD::VZEXT_MOVL:
+ case X86ISD::COMPRESS:
+ case X86ISD::EXPAND:
case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
case X86ISD::FMADD_RND:
case X86ISD::FMSUB:
diff --git a/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll b/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll
index 47331db7261b3..b19112c02c085 100644
--- a/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll
+++ b/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl | FileCheck %s --check-prefix=CHECK
-define void @test_compress_undef_float_passthrough() {
+define void @test_compress_undef_float_passthrough(<4 x double> %a0) {
; CHECK-LABEL: test_compress_undef_float_passthrough:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movb $5, %al
@@ -12,7 +12,7 @@ define void @test_compress_undef_float_passthrough() {
; CHECK-NEXT: retq
entry: ; preds = %loop.50
%0 = bitcast i4 undef to <4 x i1>
- %1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> <i1 1, i1 0, i1 1, i1 0>)
+ %1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %a0, <4 x double> undef, <4 x i1> <i1 1, i1 0, i1 1, i1 0>)
call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %1, <4 x ptr> undef, i32 0, <4 x i1> %0)
ret void
}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
index 3ea95eeaedfc7..b79d9e8ce47e9 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
@@ -1035,68 +1035,23 @@ define <8 x double> @concat_vpermilvar_v8f64_v4f64(<4 x double> %a0, <4 x double
ret <8 x double> %res
}
-; TODO - shift elements up by one
+; shift elements up by one
define <16 x i32> @combine_vexpandd_as_valignd(<16 x i32> %x) {
-; X86-AVX512F-LABEL: combine_vexpandd_as_valignd:
-; X86-AVX512F: # %bb.0:
-; X86-AVX512F-NEXT: movw $-2, %ax
-; X86-AVX512F-NEXT: kmovw %eax, %k1
-; X86-AVX512F-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512F-NEXT: retl
-;
-; X86-AVX512BW-LABEL: combine_vexpandd_as_valignd:
-; X86-AVX512BW: # %bb.0:
-; X86-AVX512BW-NEXT: movw $-2, %ax
-; X86-AVX512BW-NEXT: kmovd %eax, %k1
-; X86-AVX512BW-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512BW-NEXT: retl
-;
-; X64-AVX512F-LABEL: combine_vexpandd_as_valignd:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: movw $-2, %ax
-; X64-AVX512F-NEXT: kmovw %eax, %k1
-; X64-AVX512F-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512F-NEXT: retq
-;
-; X64-AVX512BW-LABEL: combine_vexpandd_as_valignd:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: movw $-2, %ax
-; X64-AVX512BW-NEXT: kmovd %eax, %k1
-; X64-AVX512BW-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512BW-NEXT: retq
+; CHECK-LABEL: combine_vexpandd_as_valignd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: valignd {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; CHECK-NEXT: ret{{[l|q]}}
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %x, <16 x i32> zeroinitializer, <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
ret <16 x i32> %res
}
-; TODO - zero upper half of vector
+; zero upper half of vector
define <16 x i32> @combine_vcompressd_as_vmov(<16 x i32> %x) {
-; X86-AVX512F-LABEL: combine_vcompressd_as_vmov:
-; X86-AVX512F: # %bb.0:
-; X86-AVX512F-NEXT: movw $255, %ax
-; X86-AVX512F-NEXT: kmovw %eax, %k1
-; X86-AVX512F-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512F-NEXT: retl
-;
-; X86-AVX512BW-LABEL: combine_vcompressd_as_vmov:
-; X86-AVX512BW: # %bb.0:
-; X86-AVX512BW-NEXT: movw $255, %ax
-; X86-AVX512BW-NEXT: kmovd %eax, %k1
-; X86-AVX512BW-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512BW-NEXT: retl
-;
-; X64-AVX512F-LABEL: combine_vcompressd_as_vmov:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: movw $255, %ax
-; X64-AVX512F-NEXT: kmovw %eax, %k1
-; X64-AVX512F-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512F-NEXT: retq
-;
-; X64-AVX512BW-LABEL: combine_vcompressd_as_vmov:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: movw $255, %ax
-; X64-AVX512BW-NEXT: kmovd %eax, %k1
-; X64-AVX512BW-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512BW-NEXT: retq
+; CHECK-LABEL: combine_vcompressd_as_vmov:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
%res = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %x, <16 x i32> zeroinitializer, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>)
ret <16 x i32> %res
}
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef([^a-zA-Z0-9_-]|$)|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/Target/X86/X86ISelLowering.cpp llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.llThe following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
}Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
}Please refer to the Undefined Behavior Manual for more information. |
phoebewang
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
…71119) Allows for shuffle simplification Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test
Allows for shuffle simplification
Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test