Skip to content

Commit

Permalink
[DAG] Restrict (fp_round (copysign X, Y)) -> (copysign (fp_round X), …
Browse files Browse the repository at this point in the history
…Y) combine

This transformation creates an copysign node whose argument types do not match. RISCV does not handle such a case which results in a crash today. Looking at the relevant code in DAG, it looks like the process of enabling the non-matching types case was never completed for vectors at all. The transformation which triggered the RISCV crash is a specialization of another transform (specifically due to one use for profitability) which isn't enabled by default. Given that, I chose to match the preconditions for that other transform.

Other options here include:
* Updating RISCV codegen to handle the mismatched argument type case for vectors. This is slightly tricky as I don't see an obvious profitable lowering for this case which doesn't involve simply adding back in the round/trunc.
* Disabling the transform via a target hook.

This patch does involve two changes for AArch64 codegen. These could be called regressions, but well, the code after actually looks better than the code before.

Differential Revision: https://reviews.llvm.org/D148638
  • Loading branch information
preames committed Apr 18, 2023
1 parent 790c9ac commit c37c9f2
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 37 deletions.
47 changes: 28 additions & 19 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16611,27 +16611,30 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {

/// copysign(x, fp_extend(y)) -> copysign(x, y)
/// copysign(x, fp_round(y)) -> copysign(x, y)
static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
SDValue N1 = N->getOperand(1);
if ((N1.getOpcode() == ISD::FP_EXTEND ||
N1.getOpcode() == ISD::FP_ROUND)) {
EVT N1VT = N1->getValueType(0);
EVT N1Op0VT = N1->getOperand(0).getValueType();
/// Operands to the functions are the type of X and Y respectively.
static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
// Always fold no-op FP casts.
if (XTy == YTy)
return true;

// Always fold no-op FP casts.
if (N1VT == N1Op0VT)
return true;
// Do not optimize out type conversion of f128 type yet.
// For some targets like x86_64, configuration is changed to keep one f128
// value in one SSE register, but instruction selection cannot handle
// FCOPYSIGN on SSE registers yet.
if (YTy == MVT::f128)
return false;

// Do not optimize out type conversion of f128 type yet.
// For some targets like x86_64, configuration is changed to keep one f128
// value in one SSE register, but instruction selection cannot handle
// FCOPYSIGN on SSE registers yet.
if (N1Op0VT == MVT::f128)
return false;
return !YTy.isVector() || EnableVectorFCopySignExtendRound;
}

return !N1Op0VT.isVector() || EnableVectorFCopySignExtendRound;
}
return false;
static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
SDValue N1 = N->getOperand(1);
if (N1.getOpcode() != ISD::FP_EXTEND &&
N1.getOpcode() != ISD::FP_ROUND)
return false;
EVT N1VT = N1->getValueType(0);
EVT N1Op0VT = N1->getOperand(0).getValueType();
return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
}

SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
Expand Down Expand Up @@ -16991,7 +16994,13 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
}

// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) {
// Note: From a legality perspective, this is a two step transform. First,
// we duplicate the fp_round to the arguments of the copysign, then we
// eliminate the fp_round on Y. The second step requires an additional
// predicate to match the implementation above.
if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
CanCombineFCOPYSIGN_EXTEND_ROUND(VT,
N0.getValueType())) {
SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
N0.getOperand(0), N1);
AddToWorklist(Tmp.getNode());
Expand Down
54 changes: 36 additions & 18 deletions llvm/test/CodeGen/AArch64/sve-fcopysign.ll
Original file line number Diff line number Diff line change
Expand Up @@ -259,30 +259,48 @@ define <vscale x 8 x half> @test_copysign_v8f16_v8f32(<vscale x 8 x half> %a, <v
;========== FCOPYSIGN_EXTEND_ROUND

define <vscale x 4 x half> @test_copysign_nxv4f32_nxv4f16(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-LABEL: test_copysign_nxv4f32_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
; CHECK-NEXT: fcvt z1.h, p0/m, z1.s
; CHECK-NEXT: and z1.h, z1.h, #0x8000
; CHECK-NEXT: and z0.h, z0.h, #0x7fff
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16:
; CHECK-NO-EXTEND-ROUND: // %bb.0:
; CHECK-NO-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
; CHECK-NO-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s
; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s
; CHECK-NO-EXTEND-ROUND-NEXT: ret
;
; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16:
; CHECK-EXTEND-ROUND: // %bb.0:
; CHECK-EXTEND-ROUND-NEXT: ptrue p0.s
; CHECK-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s
; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s
; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
; CHECK-EXTEND-ROUND-NEXT: ret
%t1 = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
%t2 = fptrunc <vscale x 4 x float> %t1 to <vscale x 4 x half>
ret <vscale x 4 x half> %t2
}

define <vscale x 2 x float> @test_copysign_nxv2f64_nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: test_copysign_nxv2f64_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
; CHECK-NEXT: fcvt z1.s, p0/m, z1.d
; CHECK-NEXT: and z1.s, z1.s, #0x80000000
; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32:
; CHECK-NO-EXTEND-ROUND: // %bb.0:
; CHECK-NO-EXTEND-ROUND-NEXT: and z1.d, z1.d, #0x8000000000000000
; CHECK-NO-EXTEND-ROUND-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d
; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d
; CHECK-NO-EXTEND-ROUND-NEXT: ret
;
; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32:
; CHECK-EXTEND-ROUND: // %bb.0:
; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d
; CHECK-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d
; CHECK-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d
; CHECK-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
; CHECK-EXTEND-ROUND-NEXT: ret
%t1 = call <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
%t2 = fptrunc <vscale x 2 x double> %t1 to <vscale x 2 x float>
ret <vscale x 2 x float> %t2
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1431,3 +1431,16 @@ define <vscale x 8 x double> @vfcopynsign_exttrunc_vf_nxv8f64_nxv8f32(<vscale x
%r = call <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double> %vm, <vscale x 8 x double> %eneg)
ret <vscale x 8 x double> %r
}

define <vscale x 2 x float> @fptrunc_of_copysign_nxv2f32_nxv2f64(<vscale x 2 x double> %X, <vscale x 2 x double> %Y) {
; CHECK-LABEL: fptrunc_of_copysign_nxv2f32_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfsgnj.vv v10, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfncvt.f.f.w v8, v10
; CHECK-NEXT: ret
%copy = call fast <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> %X, <vscale x 2 x double> %Y)
%trunc = fptrunc <vscale x 2 x double> %copy to <vscale x 2 x float>
ret <vscale x 2 x float> %trunc
}

0 comments on commit c37c9f2

Please sign in to comment.