-
Notifications
You must be signed in to change notification settings - Fork 11k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86][EVEX512] Add HasEVEX512
when NoVLX
used for 512-bit patterns
#91106
Conversation
With KNL/KNC being deprecated, we don't need to care about such no VLX cases anymore. We may remove such patterns in the future. Fixes llvm#90844
@llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) ChangesWith KNL/KNC being deprecated, we don't need to care about such no VLX cases anymore. We may remove such patterns in the future. Fixes #90844 Full diff: https://github.com/llvm/llvm-project/pull/91106.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cf4a64ffded2e8..1bdb79b493f9a3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30058,7 +30058,9 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return R;
// AVX512 implicitly uses modulo rotation amounts.
- if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) {
+ if ((Subtarget.hasVLX() ||
+ (Subtarget.hasAVX512() && Subtarget.hasEVEX512())) &&
+ 32 <= EltSizeInBits) {
// Attempt to rotate by immediate.
if (IsCstSplat) {
unsigned RotOpc = IsROTL ? X86ISD::VROTLI : X86ISD::VROTRI;
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index ec2a5f52a7b6aa..0723328d40e3ed 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -826,7 +826,7 @@ defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32bf16_info, v16bf16x_info,
// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
// smaller extract to enable EVEX->VEX.
-let Predicates = [NoVLX] in {
+let Predicates = [NoVLX, HasEVEX512] in {
def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
(v2i64 (VEXTRACTI128rr
(v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
@@ -3080,7 +3080,7 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
}
-let Predicates = [HasAVX512, NoVLX] in {
+let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
@@ -3111,7 +3111,7 @@ let Predicates = [HasAVX512, NoVLX] in {
defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
}
-let Predicates = [HasBWI, NoVLX] in {
+let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
@@ -3505,7 +3505,7 @@ multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
// available. Use a 512-bit operation and extract.
-let Predicates = [HasAVX512, NoVLX] in {
+let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
@@ -3517,7 +3517,7 @@ let Predicates = [HasAVX512, NoVLX] in {
defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
}
-let Predicates = [HasBWI, NoVLX] in {
+let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
@@ -5010,8 +5010,8 @@ defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
SchedWriteVecALU, HasAVX512, 1>, T8;
-// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
-let Predicates = [HasDQI, NoVLX] in {
+// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512.
+let Predicates = [HasDQI, NoVLX, HasEVEX512] in {
def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
(EXTRACT_SUBREG
(VPMULLQZrr
@@ -5067,7 +5067,7 @@ multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
sub_xmm)>;
}
-let Predicates = [HasAVX512, NoVLX] in {
+let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
@@ -6044,7 +6044,7 @@ defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
SchedWriteVecShift>;
// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
-let Predicates = [HasAVX512, NoVLX] in {
+let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
(EXTRACT_SUBREG (v8i64
(VPSRAQZrr
@@ -6173,14 +6173,14 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecS
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
-defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
-defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
-defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
-defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX, HasEVEX512]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX, HasEVEX512]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX, HasEVEX512]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX, HasEVEX512]>;
// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
-let Predicates = [HasAVX512, NoVLX] in {
+let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
(EXTRACT_SUBREG (v8i64
(VPROLVQZrr
@@ -6231,7 +6231,7 @@ let Predicates = [HasAVX512, NoVLX] in {
}
// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
-let Predicates = [HasAVX512, NoVLX] in {
+let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
(EXTRACT_SUBREG (v8i64
(VPRORVQZrr
@@ -9863,7 +9863,7 @@ defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
truncstore_us_vi8, masked_truncstore_us_vi8,
X86vtruncus, X86vmtruncus>;
-let Predicates = [HasAVX512, NoVLX] in {
+let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
(v8i16 (EXTRACT_SUBREG
(v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
@@ -9874,7 +9874,7 @@ def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
VR256X:$src, sub_ymm)))), sub_xmm))>;
}
-let Predicates = [HasBWI, NoVLX] in {
+let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
(v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src, sub_ymm))), sub_xmm))>;
@@ -10417,7 +10417,7 @@ multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
EVEX_V128;
}
- let Predicates = [prd, NoVLX] in {
+ let Predicates = [prd, NoVLX, HasEVEX512] in {
defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
}
@@ -11204,7 +11204,7 @@ defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
SchedWriteVecALU>;
// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
-let Predicates = [HasAVX512, NoVLX] in {
+let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
def : Pat<(v4i64 (abs VR256X:$src)),
(EXTRACT_SUBREG
(VPABSQZrr
@@ -11220,7 +11220,7 @@ let Predicates = [HasAVX512, NoVLX] in {
// Use 512bit version to implement 128/256 bit.
multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
AVX512VLVectorVTInfo _, Predicate prd> {
- let Predicates = [prd, NoVLX] in {
+ let Predicates = [prd, NoVLX, HasEVEX512] in {
def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
(EXTRACT_SUBREG
(!cast<Instruction>(InstrStr # "Zrr")
@@ -11839,7 +11839,7 @@ let Predicates = [HasAVX512] in {
(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
}
-let Predicates = [HasAVX512, NoVLX] in {
+let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
def : Pat<(v16i8 (vnot VR128X:$src)),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
diff --git a/llvm/test/CodeGen/X86/pr90844.ll b/llvm/test/CodeGen/X86/pr90844.ll
new file mode 100644
index 00000000000000..6feece7f66d877
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr90844.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-evex512 < %s | FileCheck %s
+
+define void @PR90844() {
+; CHECK-LABEL: PR90844:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, (%rax)
+; CHECK-NEXT: retq
+entry:
+ %0 = tail call <2 x i32> @llvm.fshl.v2i32(<2 x i32> poison, <2 x i32> poison, <2 x i32> <i32 8, i32 24>)
+ %1 = and <2 x i32> %0, <i32 16711935, i32 -134152448>
+ %2 = or disjoint <2 x i32> zeroinitializer, %1
+ %3 = zext <2 x i32> %2 to <2 x i64>
+ %4 = shl nuw <2 x i64> %3, <i64 32, i64 32>
+ %5 = or disjoint <2 x i64> %4, zeroinitializer
+ store <2 x i64> %5, ptr poison, align 16
+ ret void
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
/cherry-pick 7963d9a |
llvm#91106) With KNL/KNC being deprecated, we don't need to care about such no VLX cases anymore. We may remove such patterns in the future. Fixes llvm#90844 (cherry picked from commit 7963d9a)
/pull-request #91118 |
llvm#91106) With KNL/KNC being deprecated, we don't need to care about such no VLX cases anymore. We may remove such patterns in the future. Fixes llvm#90844 (cherry picked from commit 7963d9a)
With KNL/KNC being deprecated, we don't need to care about such no VLX cases anymore. We may remove such patterns in the future.
Fixes #90844