Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -3161,6 +3161,12 @@ multiclass avx512_mask_setop_w<SDPatternOperator Val> {
defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
defm KSET1 : avx512_mask_setop_w<immAllOnesV>;

// 8-bit mask set operations for AVX512DQ
let Predicates = [HasDQI] in {
defm KSET0B : avx512_mask_setop<VK8, v8i1, immAllZerosV>;
defm KSET1B : avx512_mask_setop<VK8, v8i1, immAllOnesV>;
}

// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
let Predicates = [HasAVX512] in {
def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
Expand All @@ -3173,6 +3179,34 @@ let Predicates = [HasAVX512] in {
def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
}

// With AVX512DQ, use 8-bit operations for 8-bit masks to avoid setting upper
// bits
let Predicates = [HasDQI] in {
def : Pat<(v8i1 immAllZerosV), (KSET0B)>;
def : Pat<(v8i1 immAllOnesV), (KSET1B)>;
}

// Optimize bitconvert of all-ones constants to use kxnor instructions
let Predicates = [HasDQI] in {
def : Pat<(v8i1(bitconvert(i8 255))), (KSET1B)>;
def : Pat<(v16i1(bitconvert(i16 255))), (COPY_TO_REGCLASS(KSET1B), VK16)>;
}
let Predicates = [HasBWI] in {
def : Pat<(v32i1(bitconvert(i32 -1))), (KSET1D)>;
def : Pat<(v64i1(bitconvert(i64 -1))), (KSET1Q)>;
}
// Submask patterns: lower N bits set in larger mask registers
let Predicates = [HasBWI, HasDQI] in {
// v32i1 submasks
def : Pat<(v32i1(bitconvert(i32 255))), (COPY_TO_REGCLASS(KSET1B), VK32)>;
def : Pat<(v32i1(bitconvert(i32 65535))), (COPY_TO_REGCLASS(KSET1W), VK32)>;
// v64i1 submasks
def : Pat<(v64i1(bitconvert(i64 255))), (COPY_TO_REGCLASS(KSET1B), VK64)>;
def : Pat<(v64i1(bitconvert(i64 65535))), (COPY_TO_REGCLASS(KSET1W), VK64)>;
def : Pat<(v64i1(bitconvert(i64 4294967295))), (COPY_TO_REGCLASS(KSET1D),
VK64)>;
}

// Patterns for kmask insert_subvector/extract_subvector to/from index=0
multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
RegisterClass RC, ValueType VT> {
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -788,9 +788,11 @@ bool X86InstrInfo::isReMaterializableImpl(
case X86::FsFLD0SS:
case X86::FsFLD0SH:
case X86::FsFLD0F128:
case X86::KSET0B:
case X86::KSET0D:
case X86::KSET0Q:
case X86::KSET0W:
case X86::KSET1B:
case X86::KSET1D:
case X86::KSET1Q:
case X86::KSET1W:
Expand Down Expand Up @@ -6352,12 +6354,16 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// registers, since it is not usable as a write mask.
// FIXME: A more advanced approach would be to choose the best input mask
// register based on context.
case X86::KSET0B:
return Expand2AddrKreg(MIB, get(X86::KXORBkk), X86::K0);
case X86::KSET0W:
return Expand2AddrKreg(MIB, get(X86::KXORWkk), X86::K0);
case X86::KSET0D:
return Expand2AddrKreg(MIB, get(X86::KXORDkk), X86::K0);
case X86::KSET0Q:
return Expand2AddrKreg(MIB, get(X86::KXORQkk), X86::K0);
case X86::KSET1B:
return Expand2AddrKreg(MIB, get(X86::KXNORBkk), X86::K0);
case X86::KSET1W:
return Expand2AddrKreg(MIB, get(X86::KXNORWkk), X86::K0);
case X86::KSET1D:
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,8 @@ define void @gather_qps(<8 x i64> %ind, <8 x float> %src, ptr %base, ptr %stbuf)
; CHECK-LABEL: gather_qps:
; CHECK: ## %bb.0:
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: kxnorw %k0, %k0, %k2
; CHECK-NEXT: kxnorb %k0, %k0, %k1
; CHECK-NEXT: kxnorb %k0, %k0, %k2
; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
Expand Down Expand Up @@ -520,7 +520,7 @@ define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, ptr %x1,
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1}
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: kxnorb %k0, %k0, %k1
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vgatherdps (%rdi,%ymm1,2), %ymm2 {%k1}
; CHECK-NEXT: vaddps %ymm2, %ymm0, %ymm0
Expand Down Expand Up @@ -772,7 +772,7 @@ define void@test_int_x86_avx512_scattersiv8_sf(ptr %x0, i8 %x1, <8 x i32> %x2, <
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1}
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: kxnorb %k0, %k0, %k1
; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
Expand All @@ -788,7 +788,7 @@ define void@test_int_x86_avx512_scattersiv8_si(ptr %x0, i8 %x1, <8 x i32> %x2, <
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: kxnorb %k0, %k0, %k1
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
Expand All @@ -800,9 +800,9 @@ define void@test_int_x86_avx512_scattersiv8_si(ptr %x0, i8 %x1, <8 x i32> %x2, <
define void @scatter_mask_test(ptr %x0, <8 x i32> %x2, <8 x i32> %x3) {
; CHECK-LABEL: scatter_mask_test:
; CHECK: ## %bb.0:
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: kxnorb %k0, %k0, %k1
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
; CHECK-NEXT: kxorw %k0, %k0, %k1
; CHECK-NEXT: kxorb %k0, %k0, %k1
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,9 @@ define dso_local void @scatter_mask_qps_execdomain(<8 x i64> %ind, ptr %src, i8
define dso_local void @gather_qps(<8 x i64> %ind, <8 x float> %src, ptr %base, ptr %stbuf) {
; CHECK-LABEL: gather_qps:
; CHECK: # %bb.0:
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: kxnorb %k0, %k0, %k1
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kxnorw %k0, %k0, %k2
; CHECK-NEXT: kxnorb %k0, %k0, %k2
; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
Expand Down Expand Up @@ -523,7 +523,7 @@ define <8 x float> @test_int_x86_avx512_mask_gather3siv8_sf(<8 x float> %x0, ptr
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1}
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: kxnorb %k0, %k0, %k1
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vgatherdps (%rdi,%ymm1,2), %ymm2 {%k1}
; CHECK-NEXT: vaddps %ymm2, %ymm0, %ymm0
Expand Down Expand Up @@ -774,7 +774,7 @@ define dso_local void@test_int_x86_avx512_scattersiv8_sf(ptr %x0, i8 %x1, <8 x i
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1}
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: kxnorb %k0, %k0, %k1
; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
Expand All @@ -789,7 +789,7 @@ define dso_local void@test_int_x86_avx512_scattersiv8_si(ptr %x0, i8 %x1, <8 x i
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: kxnorb %k0, %k0, %k1
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
Expand All @@ -802,9 +802,9 @@ define dso_local void@test_int_x86_avx512_scattersiv8_si(ptr %x0, i8 %x1, <8 x i
define dso_local void @scatter_mask_test(ptr %x0, <8 x i32> %x2, <8 x i32> %x3) {
; CHECK-LABEL: scatter_mask_test:
; CHECK: # %bb.0:
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: kxnorb %k0, %k0, %k1
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
; CHECK-NEXT: kxorw %k0, %k0, %k1
; CHECK-NEXT: kxorb %k0, %k0, %k1
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
Expand Down Expand Up @@ -856,7 +856,7 @@ define <16 x float> @gather_mask_test(<16 x i32> %ind, <16 x float> %src, ptr %b
define <8 x float> @gather_global(<8 x i64>, ptr nocapture readnone) {
; CHECK-LABEL: gather_global:
; CHECK: # %bb.0:
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: kxnorb %k0, %k0, %k1
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vgatherqps x(,%zmm0,4), %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
Expand Down
Loading
Loading