Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,12 @@ def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
[(set VR512:$dst, (v16i32 immAllZerosV))]>;
def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
[(set VR512:$dst, (v16i32 immAllOnesV))]>;
let AddedComplexity = 1, Predicates = [HasVLX] in {
def AVX512_128_SETALLONES : I<0, Pseudo, (outs VR128X:$dst), (ins),
"", [(set VR128X:$dst, (v4i32 immAllOnesV))]>;
def AVX512_256_SETALLONES : I<0, Pseudo, (outs VR256X:$dst), (ins),
"", [(set VR256X:$dst, (v8i32 immAllOnesV))]>;
}
}

let Predicates = [HasAVX512] in {
Expand Down
32 changes: 31 additions & 1 deletion llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,8 @@ bool X86InstrInfo::isReMaterializableImpl(
case X86::AVX512_128_SET0:
case X86::AVX512_256_SET0:
case X86::AVX512_512_SET0:
case X86::AVX512_128_SETALLONES:
case X86::AVX512_256_SETALLONES:
case X86::AVX512_512_SETALLONES:
case X86::AVX512_FsFLD0SD:
case X86::AVX512_FsFLD0SH:
Expand Down Expand Up @@ -6246,9 +6248,31 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf);
return true;
}
case X86::AVX512_128_SETALLONES:
case X86::AVX512_256_SETALLONES:
case X86::AVX512_512_SETALLONES: {
Register Reg = MIB.getReg(0);
MIB->setDesc(get(X86::VPTERNLOGDZrri));
unsigned Opc;
switch (MI.getOpcode()) {
case X86::AVX512_128_SETALLONES: {
if (X86::VR128RegClass.contains(Reg))
return Expand2AddrUndef(MIB, get(X86::VPCMPEQDrr));

Opc = X86::VPTERNLOGDZ128rri;
break;
}
case X86::AVX512_256_SETALLONES: {
if (X86::VR256RegClass.contains(Reg))
return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));

Opc = X86::VPTERNLOGDZ256rri;
break;
}
case X86::AVX512_512_SETALLONES:
Opc = X86::VPTERNLOGDZrri;
break;
}
MIB->setDesc(get(Opc));
// VPTERNLOGD needs 3 register inputs and an immediate.
// 0xff will return 1s for any input.
MIB.addReg(Reg, RegState::Undef)
Expand Down Expand Up @@ -8190,13 +8214,15 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::AVX1_SETALLONES:
case X86::AVX_SET0:
case X86::AVX512_256_SET0:
case X86::AVX512_256_SETALLONES:
Alignment = Align(32);
break;
case X86::V_SET0:
case X86::V_SETALLONES:
case X86::AVX512_128_SET0:
case X86::FsFLD0F128:
case X86::AVX512_FsFLD0F128:
case X86::AVX512_128_SETALLONES:
Alignment = Align(16);
break;
case X86::MMX_SET0:
Expand Down Expand Up @@ -8255,6 +8281,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::AVX512_128_SET0:
case X86::AVX512_256_SET0:
case X86::AVX512_512_SET0:
case X86::AVX512_128_SETALLONES:
case X86::AVX512_256_SETALLONES:
case X86::AVX512_512_SETALLONES:
case X86::FsFLD0SH:
case X86::AVX512_FsFLD0SH:
Expand Down Expand Up @@ -8315,6 +8343,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
break;
case X86::AVX1_SETALLONES:
case X86::AVX2_SETALLONES:
case X86::AVX512_256_SETALLONES:
IsAllOnes = true;
[[fallthrough]];
case X86::AVX512_256_SET0:
Expand All @@ -8328,6 +8357,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
2);
break;
case X86::V_SETALLONES:
case X86::AVX512_128_SETALLONES:
IsAllOnes = true;
[[fallthrough]];
case X86::V_SET0:
Expand Down
26 changes: 26 additions & 0 deletions llvm/test/CodeGen/X86/avx512-i386-setallones-pseudo.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
# RUN: llc %s -mtriple=i386-- -start-before=postrapseudos -o - | FileCheck %s

--- |
target triple = "i386-unknown-unknown"

define void @setallones() #0 {
; CHECK-LABEL: setallones:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
entry:
unreachable
}

attributes #0 = { "target-features"="+avx512f,+avx512vl" }
---
name: setallones
tracksRegLiveness: true
liveins: []
body: |
bb.0:
$xmm0 = AVX512_128_SETALLONES
$ymm1 = AVX512_256_SETALLONES
...
30 changes: 30 additions & 0 deletions llvm/test/CodeGen/X86/avx512-setallones-pseudo.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
# RUN: llc %s -mtriple=x86_64-- -start-before=postrapseudos -o - | FileCheck %s

--- |
target triple = "x86_64-unknown-unknown"

define void @setallones() #0 {
; CHECK-LABEL: setallones:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcmpeqd %xmm14, %xmm14, %xmm14
; CHECK-NEXT: vpternlogd {{.*#+}} xmm16 = -1
; CHECK-NEXT: vpcmpeqd %ymm15, %ymm15, %ymm15
; CHECK-NEXT: vpternlogd {{.*#+}} ymm17 = -1
entry:
unreachable
}

attributes #0 = { "target-features"="+avx512f,+avx512vl" }
---
name: setallones
tracksRegLiveness: true
liveins: []
body: |
bb.0:
$xmm14 = AVX512_128_SETALLONES
$xmm16 = AVX512_128_SETALLONES
$ymm15 = AVX512_256_SETALLONES
$ymm17 = AVX512_256_SETALLONES
...
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ define <4 x i32> @eq_or_eq_ult_2_fail_multiuse(<4 x i32> %x) {
; AVX512: # %bb.0:
; AVX512-NEXT: subq $24, %rsp
; AVX512-NEXT: .cfi_def_cfa_offset 32
; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
; AVX512-NEXT: callq use.v4.i32@PLT
; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
Expand Down