Skip to content

Commit

Permalink
[X86] Mutate fceil/ffloor/ftrunc/fnearbyint/frint into X86ISD::RNDSCA…
Browse files Browse the repository at this point in the history
…LE during PreProcessIselDAG to cut down on pattern permutations

We already need to have patterns for X86ISD::RNDSCALE to support software intrinsics. But we currently have 5 sets of patterns for the 5 rounding operations. For of these 6 patterns we have to support 3 vectors widths, 2 element sizes, sse/vex/evex encodings, load folding, and broadcast load folding. This results in a fair amount of bytes in the isel table.

This patch adds code to PreProcessIselDAG to morph the fceil/ffloor/ftrunc/fnearbyint/frint to X86ISD::RNDSCALE. This way we can remove everything, but the intrinsic pattern while still allowing the operations to be considered Legal for DAGCombine and Legalization. This shrinks the DAGISel by somewhere between 9K and 10K.

There is one complication to this, the STRICT versions of these nodes are currently mutated to their none strict equivalents at isel time when the node is visited. This won't be true in the future since that loses the chain ordering information. For now I've also added support for the non-STRICT nodes to Select so we can change the STRICT versions there after they've been mutated to their non-STRICT versions. We'll probably need a STRICT version of RNDSCALE or something to handle this in the future. Which will take us back to needing 2 sets of patterns for strict and non-strict, but that's still better than the 11 or 12 sets of patterns we'd need.

We can probably do something similar for scalar, but I haven't looked at it yet.

Differential Revision: https://reviews.llvm.org/D62757

llvm-svn: 362535
  • Loading branch information
topperc committed Jun 4, 2019
1 parent 03ff1b3 commit 137de38
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 357 deletions.
100 changes: 82 additions & 18 deletions llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
Expand Up @@ -790,28 +790,60 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
continue;
}

// Replace vector shifts with their X86 specific equivalent so we don't
// need 2 sets of patterns.
switch (N->getOpcode()) {
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
if (N->getValueType(0).isVector()) {
unsigned NewOpc;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected opcode!");
case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
}
SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
N->getOperand(0), N->getOperand(1));
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
++I;
CurDAG->DeleteNode(N);
continue;
case ISD::SRL: {
// Replace vector shifts with their X86 specific equivalent so we don't
// need 2 sets of patterns.
if (!N->getValueType(0).isVector())
break;

unsigned NewOpc;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected opcode!");
case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
}
SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
N->getOperand(0), N->getOperand(1));
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
++I;
CurDAG->DeleteNode(N);
continue;
}
case ISD::FCEIL:
case ISD::FFLOOR:
case ISD::FTRUNC:
case ISD::FNEARBYINT:
case ISD::FRINT: {
// Replace vector rounding with their X86 specific equivalent so we don't
// need 2 sets of patterns.
if (!N->getValueType(0).isVector())
break;

unsigned Imm;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected opcode!");
case ISD::FCEIL: Imm = 0xA; break;
case ISD::FFLOOR: Imm = 0x9; break;
case ISD::FTRUNC: Imm = 0xB; break;
case ISD::FNEARBYINT: Imm = 0xC; break;
case ISD::FRINT: Imm = 0x4; break;
}
SDLoc dl(N);
SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
N->getValueType(0),
N->getOperand(0),
CurDAG->getConstant(Imm, dl, MVT::i8));
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
++I;
CurDAG->DeleteNode(N);
continue;
}
}

if (OptLevel != CodeGenOpt::None &&
Expand Down Expand Up @@ -4672,6 +4704,38 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
if (foldLoadStoreIntoMemOperand(Node))
return;
break;
case ISD::FCEIL:
case ISD::FFLOOR:
case ISD::FTRUNC:
case ISD::FNEARBYINT:
case ISD::FRINT: {
// Replace vector rounding with their X86 specific equivalent so we don't
// need 2 sets of patterns.
// FIXME: This can only happen when the nodes started as STRICT_* and have
// been mutated into their non-STRICT equivalents. Eventually this
// mutation will be removed and we should switch the STRICT_ nodes to a
// strict version of RNDSCALE in PreProcessISelDAG.
if (!Node->getValueType(0).isVector())
break;

unsigned Imm;
switch (Node->getOpcode()) {
default: llvm_unreachable("Unexpected opcode!");
case ISD::FCEIL: Imm = 0xA; break;
case ISD::FFLOOR: Imm = 0x9; break;
case ISD::FTRUNC: Imm = 0xB; break;
case ISD::FNEARBYINT: Imm = 0xC; break;
case ISD::FRINT: Imm = 0x4; break;
}
SDLoc dl(Node);
SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
Node->getValueType(0),
Node->getOperand(0),
CurDAG->getConstant(Imm, dl, MVT::i8));
ReplaceNode(Node, Res.getNode());
SelectCode(Res.getNode());
return;
}
}

SelectCode(Node);
Expand Down
203 changes: 0 additions & 203 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Expand Up @@ -10694,209 +10694,6 @@ defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;


multiclass AVX512_rndscale_lowering<X86VectorVTInfo _, string Suffix> {
// Register
def : Pat<(_.VT (ffloor _.RC:$src)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
_.RC:$src, (i32 0x9))>;
def : Pat<(_.VT (fnearbyint _.RC:$src)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
_.RC:$src, (i32 0xC))>;
def : Pat<(_.VT (fceil _.RC:$src)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
_.RC:$src, (i32 0xA))>;
def : Pat<(_.VT (frint _.RC:$src)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
_.RC:$src, (i32 0x4))>;
def : Pat<(_.VT (ftrunc _.RC:$src)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
_.RC:$src, (i32 0xB))>;

// Merge-masking
def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), _.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
_.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), _.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
_.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), _.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
_.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), _.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
_.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), _.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
_.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;

// Zero-masking
def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
_.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
_.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
_.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
_.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
_.KRCWM:$mask, _.RC:$src, (i32 0xB))>;

// Load
def : Pat<(_.VT (ffloor (_.LdFrag addr:$src))),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
addr:$src, (i32 0x9))>;
def : Pat<(_.VT (fnearbyint (_.LdFrag addr:$src))),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
addr:$src, (i32 0xC))>;
def : Pat<(_.VT (fceil (_.LdFrag addr:$src))),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
addr:$src, (i32 0xA))>;
def : Pat<(_.VT (frint (_.LdFrag addr:$src))),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
addr:$src, (i32 0x4))>;
def : Pat<(_.VT (ftrunc (_.LdFrag addr:$src))),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
addr:$src, (i32 0xB))>;

// Merge-masking + load
def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
_.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
_.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
_.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
_.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
_.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
_.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
_.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
_.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
_.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
_.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;

// Zero-masking + load
def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
_.KRCWM:$mask, addr:$src, (i32 0x9))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
_.KRCWM:$mask, addr:$src, (i32 0xC))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
_.KRCWM:$mask, addr:$src, (i32 0xA))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
_.KRCWM:$mask, addr:$src, (i32 0x4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
_.KRCWM:$mask, addr:$src, (i32 0xB))>;

// Broadcast load
def : Pat<(_.VT (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
addr:$src, (i32 0x9))>;
def : Pat<(_.VT (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
addr:$src, (i32 0xC))>;
def : Pat<(_.VT (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
addr:$src, (i32 0xA))>;
def : Pat<(_.VT (frint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
addr:$src, (i32 0x4))>;
def : Pat<(_.VT (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
addr:$src, (i32 0xB))>;

// Merge-masking + broadcast load
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
_.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
_.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
_.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
_.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
_.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
_.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
_.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
_.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
_.RC:$dst)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
_.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;

// Zero-masking + broadcast load
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
_.KRCWM:$mask, addr:$src, (i32 0x9))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
_.KRCWM:$mask, addr:$src, (i32 0xC))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
_.KRCWM:$mask, addr:$src, (i32 0xA))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
_.KRCWM:$mask, addr:$src, (i32 0x4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
_.ImmAllZerosV)),
(!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
_.KRCWM:$mask, addr:$src, (i32 0xB))>;
}

let Predicates = [HasAVX512] in {
defm : AVX512_rndscale_lowering<v16f32_info, "PS">;
defm : AVX512_rndscale_lowering<v8f64_info, "PD">;
}

let Predicates = [HasVLX] in {
defm : AVX512_rndscale_lowering<v8f32x_info, "PS">;
defm : AVX512_rndscale_lowering<v4f64x_info, "PD">;
defm : AVX512_rndscale_lowering<v4f32x_info, "PS">;
defm : AVX512_rndscale_lowering<v2f64x_info, "PD">;
}

multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
X86VectorVTInfo _,
Expand Down

0 comments on commit 137de38

Please sign in to comment.