diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index c468f2f676281..21dec19e3cb9d 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -1076,6 +1076,10 @@ template inline UnaryOpc_match m_Cttz(const Opnd &Op) { return UnaryOpc_match(ISD::CTTZ, Op); } +template inline UnaryOpc_match m_FNeg(const Opnd &Op) { + return UnaryOpc_match(ISD::FNEG, Op); +} + // === Constants === struct ConstantInt_match { APInt *BindVal; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5f01633126c7b..46f544c0d4df5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -20248,6 +20248,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return V; break; case ISD::FMUL: { + using namespace SDPatternMatch; + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue X, Y; + // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see + // hoistFNegAboveFMulFDiv. + // Undo this and sink the fneg so we match more fmsub/fnmadd patterns. + if (sd_match(N, m_FMul(m_Value(X), m_OneUse(m_FNeg(m_Value(Y)))))) + return DAG.getNode(ISD::FNEG, DL, VT, + DAG.getNode(ISD::FMUL, DL, VT, X, Y)); + // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -20258,13 +20269,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, ConstantFPSDNode *C = dyn_cast(N0->getOperand(0)); if (!C || !C->getValueAPF().isExactlyValue(+1.0)) return SDValue(); - EVT VT = N->getValueType(0); if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT)) return SDValue(); SDValue Sign = N0->getOperand(1); if (Sign.getValueType() != VT) return SDValue(); - return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1)); + return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1)); } case ISD::FADD: case ISD::UMAX: diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll index d6c4f8d5f350f..911692ec32fb6 100644 --- a/llvm/test/CodeGen/RISCV/double-arith.ll +++ b/llvm/test/CodeGen/RISCV/double-arith.ll @@ -610,6 +610,86 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind { ret double %1 } +define double @fmsub_d_fmul_fneg(double %a, double %b, double %c, double %d) nounwind { +; CHECKIFD-LABEL: fmsub_d_fmul_fneg: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmul.d fa5, fa2, fa3 +; CHECKIFD-NEXT: fmsub.d fa0, fa0, fa1, fa5 +; CHECKIFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: fmsub_d_fmul_fneg: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: fmul.d a4, a4, a6 +; RV32IZFINXZDINX-NEXT: fmsub.d a0, a0, a2, a4 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: fmsub_d_fmul_fneg: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: fmul.d a2, a2, a3 +; RV64IZFINXZDINX-NEXT: fmsub.d a0, a0, a1, a2 +; RV64IZFINXZDINX-NEXT: ret +; +; RV32I-LABEL: fmsub_d_fmul_fneg: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: lui a0, 524288 +; RV32I-NEXT: xor a3, a7, a0 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a2, a6 +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: mv a4, a0 +; RV32I-NEXT: mv a5, a1 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmsub_d_fmul_fneg: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: li a0, -1 +; RV64I-NEXT: slli a0, a0, 63 +; RV64I-NEXT: xor a1, a3, a0 +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %negd = fneg double %d + %fmul = fmul double %c, %negd + %1 = call double @llvm.fma.f64(double %a, double %b, double %fmul) + ret double %1 +} + define double @fnmadd_d(double %a, double %b, double %c) nounwind { ; RV32IFD-LABEL: fnmadd_d: ; RV32IFD: # %bb.0: @@ -877,6 +957,88 @@ define double @fnmadd_d_3(double %a, double %b, double %c) nounwind { ret double %neg } +define double @fnmadd_d_fmul_fneg(double %a, double %b, double %c, double %d) nounwind { +; CHECKIFD-LABEL: fnmadd_d_fmul_fneg: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmul.d fa5, fa1, fa0 +; CHECKIFD-NEXT: fmsub.d fa0, fa2, fa3, fa5 +; CHECKIFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: fnmadd_d_fmul_fneg: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: fmul.d a0, a2, a0 +; RV32IZFINXZDINX-NEXT: fmsub.d a0, a4, a6, a0 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: fnmadd_d_fmul_fneg: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: fmul.d a0, a1, a0 +; RV64IZFINXZDINX-NEXT: fmsub.d a0, a2, a3, a0 +; RV64IZFINXZDINX-NEXT: ret +; +; RV32I-LABEL: fnmadd_d_fmul_fneg: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a7 +; RV32I-NEXT: mv s1, a6 +; RV32I-NEXT: mv s2, a5 +; RV32I-NEXT: mv s3, a4 +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a4, a0 +; RV32I-NEXT: lui a3, 524288 +; RV32I-NEXT: xor a3, a1, a3 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: mv a4, a0 +; RV32I-NEXT: mv a5, a1 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_d_fmul_fneg: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a3 +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a1, a0, a1 +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %nega = fneg double %a + %mul = fmul double %b, %nega + %1 = call double @llvm.fma.f64(double %c, double %d, double %mul) + ret double %1 +} define double @fnmadd_nsz(double %a, double %b, double %c) nounwind { ; CHECKIFD-LABEL: fnmadd_nsz: diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll index 57b3423da69a6..95f1fc6899206 100644 --- a/llvm/test/CodeGen/RISCV/float-arith.ll +++ b/llvm/test/CodeGen/RISCV/float-arith.ll @@ -529,6 +529,89 @@ define float @fmsub_s(float %a, float %b, float %c) nounwind { ret float %1 } +define float @fmsub_s_fmul_fneg(float %a, float %b, float %c, float %d) nounwind { +; CHECKIFD-LABEL: fmsub_d_fmul_fneg: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fneg.d fa5, fa3 +; CHECKIFD-NEXT: fmul.d fa5, fa2, fa5 +; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa5 +; CHECKIFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: fmsub_d_fmul_fneg: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: fneg.d a6, a6 +; RV32IZFINXZDINX-NEXT: fmul.d a4, a4, a6 +; RV32IZFINXZDINX-NEXT: fmadd.d a0, a0, a2, a4 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: fmsub_d_fmul_fneg: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: fneg.d a3, a3 +; RV64IZFINXZDINX-NEXT: fmul.d a2, a2, a3 +; RV64IZFINXZDINX-NEXT: fmadd.d a0, a0, a1, a2 +; RV64IZFINXZDINX-NEXT: ret +; +; CHECKIF-LABEL: fmsub_s_fmul_fneg: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmul.s fa5, fa2, fa3 +; CHECKIF-NEXT: fmsub.s fa0, fa0, fa1, fa5 +; CHECKIF-NEXT: ret +; +; CHECKIZFINX-LABEL: fmsub_s_fmul_fneg: +; CHECKIZFINX: # %bb.0: +; CHECKIZFINX-NEXT: fmul.s a2, a2, a3 +; CHECKIZFINX-NEXT: fmsub.s a0, a0, a1, a2 +; CHECKIZFINX-NEXT: ret +; +; RV32I-LABEL: fmsub_s_fmul_fneg: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a1, a3, a1 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmsub_s_fmul_fneg: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a1, a3, a1 +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %negd = fneg float %d + %fmul = fmul float %c, %negd + %1 = call float @llvm.fma.f32(float %a, float %b, float %fmul) + ret float %1 +} + define float @fnmadd_s(float %a, float %b, float %c) nounwind { ; CHECKIF-LABEL: fnmadd_s: ; CHECKIF: # %bb.0: @@ -738,6 +821,91 @@ define float @fnmadd_s_3(float %a, float %b, float %c) nounwind { ret float %neg } +define float @fnmadd_s_fmul_fneg(float %a, float %b, float %c, float %d) nounwind { +; CHECKIFD-LABEL: fnmadd_d_fmul_fneg: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fneg.d fa5, fa0 +; CHECKIFD-NEXT: fmul.d fa5, fa1, fa5 +; CHECKIFD-NEXT: fmadd.d fa0, fa2, fa3, fa5 +; CHECKIFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: fnmadd_d_fmul_fneg: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: fneg.d a0, a0 +; RV32IZFINXZDINX-NEXT: fmul.d a0, a2, a0 +; RV32IZFINXZDINX-NEXT: fmadd.d a0, a4, a6, a0 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: fnmadd_d_fmul_fneg: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: fneg.d a0, a0 +; RV64IZFINXZDINX-NEXT: fmul.d a0, a1, a0 +; RV64IZFINXZDINX-NEXT: fmadd.d a0, a2, a3, a0 +; RV64IZFINXZDINX-NEXT: ret +; +; CHECKIF-LABEL: fnmadd_s_fmul_fneg: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmul.s fa5, fa1, fa0 +; CHECKIF-NEXT: fmsub.s fa0, fa2, fa3, fa5 +; CHECKIF-NEXT: ret +; +; CHECKIZFINX-LABEL: fnmadd_s_fmul_fneg: +; CHECKIZFINX: # %bb.0: +; CHECKIZFINX-NEXT: fmul.s a0, a1, a0 +; CHECKIZFINX-NEXT: fmsub.s a0, a2, a3, a0 +; CHECKIZFINX-NEXT: ret +; +; RV32I-LABEL: fnmadd_s_fmul_fneg: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a1, a0, a1 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_s_fmul_fneg: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a3 +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a1, a0, a1 +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %nega = fneg float %a + %mul = fmul float %b, %nega + %1 = call float @llvm.fma.f32(float %c, float %d, float %mul) + ret float %1 +} + define float @fnmadd_nsz(float %a, float %b, float %c) nounwind { ; RV32IF-LABEL: fnmadd_nsz: ; RV32IF: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/float-bit-preserving-dagcombines.ll b/llvm/test/CodeGen/RISCV/float-bit-preserving-dagcombines.ll index 6aa6dedba548f..acf533a6c3e27 100644 --- a/llvm/test/CodeGen/RISCV/float-bit-preserving-dagcombines.ll +++ b/llvm/test/CodeGen/RISCV/float-bit-preserving-dagcombines.ll @@ -197,16 +197,17 @@ define float @bitcast_xor(float %a1, float %a2) nounwind { ; RV32F-NEXT: fmv.w.x fa5, a1 ; RV32F-NEXT: fmv.w.x fa4, a0 ; RV32F-NEXT: fmul.s fa5, fa4, fa5 -; RV32F-NEXT: fneg.s fa5, fa5 ; RV32F-NEXT: fmul.s fa5, fa4, fa5 ; RV32F-NEXT: fmv.x.w a0, fa5 +; RV32F-NEXT: lui a1, 524288 +; RV32F-NEXT: xor a0, a0, a1 ; RV32F-NEXT: ret ; ; RV32ZFINX-LABEL: bitcast_xor: ; RV32ZFINX: # %bb.0: ; RV32ZFINX-NEXT: fmul.s a1, a0, a1 -; RV32ZFINX-NEXT: fneg.s a1, a1 ; RV32ZFINX-NEXT: fmul.s a0, a0, a1 +; RV32ZFINX-NEXT: fneg.s a0, a0 ; RV32ZFINX-NEXT: ret ; ; RV32FD-LABEL: bitcast_xor: @@ -214,9 +215,10 @@ define float @bitcast_xor(float %a1, float %a2) nounwind { ; RV32FD-NEXT: fmv.w.x fa5, a1 ; RV32FD-NEXT: fmv.w.x fa4, a0 ; RV32FD-NEXT: fmul.s fa5, fa4, fa5 -; RV32FD-NEXT: fneg.s fa5, fa5 ; RV32FD-NEXT: fmul.s fa5, fa4, fa5 ; RV32FD-NEXT: fmv.x.w a0, fa5 +; RV32FD-NEXT: lui a1, 524288 +; RV32FD-NEXT: xor a0, a0, a1 ; RV32FD-NEXT: ret ; ; RV64F-LABEL: bitcast_xor: @@ -224,16 +226,17 @@ define float @bitcast_xor(float %a1, float %a2) nounwind { ; RV64F-NEXT: fmv.w.x fa5, a1 ; RV64F-NEXT: fmv.w.x fa4, a0 ; RV64F-NEXT: fmul.s fa5, fa4, fa5 -; RV64F-NEXT: fneg.s fa5, fa5 ; RV64F-NEXT: fmul.s fa5, fa4, fa5 ; RV64F-NEXT: fmv.x.w a0, fa5 +; RV64F-NEXT: lui a1, 524288 +; RV64F-NEXT: xor a0, a0, a1 ; RV64F-NEXT: ret ; ; RV64ZFINX-LABEL: bitcast_xor: ; RV64ZFINX: # %bb.0: ; RV64ZFINX-NEXT: fmul.s a1, a0, a1 -; RV64ZFINX-NEXT: fneg.s a1, a1 ; RV64ZFINX-NEXT: fmul.s a0, a0, a1 +; RV64ZFINX-NEXT: fneg.s a0, a0 ; RV64ZFINX-NEXT: ret ; ; RV64FD-LABEL: bitcast_xor: @@ -241,9 +244,10 @@ define float @bitcast_xor(float %a1, float %a2) nounwind { ; RV64FD-NEXT: fmv.w.x fa5, a1 ; RV64FD-NEXT: fmv.w.x fa4, a0 ; RV64FD-NEXT: fmul.s fa5, fa4, fa5 -; RV64FD-NEXT: fneg.s fa5, fa5 ; RV64FD-NEXT: fmul.s fa5, fa4, fa5 ; RV64FD-NEXT: fmv.x.w a0, fa5 +; RV64FD-NEXT: lui a1, 524288 +; RV64FD-NEXT: xor a0, a0, a1 ; RV64FD-NEXT: ret %a3 = fmul float %a1, %a2 %bc1 = bitcast float %a3 to i32 @@ -264,11 +268,12 @@ define double @bitcast_double_xor(double %a1, double %a2) nounwind { ; RV32F-NEXT: mv s1, a0 ; RV32F-NEXT: call __muldf3 ; RV32F-NEXT: mv a2, a0 -; RV32F-NEXT: lui a3, 524288 -; RV32F-NEXT: xor a3, a1, a3 +; RV32F-NEXT: mv a3, a1 ; RV32F-NEXT: mv a0, s1 ; RV32F-NEXT: mv a1, s0 ; RV32F-NEXT: call __muldf3 +; RV32F-NEXT: lui a2, 524288 +; RV32F-NEXT: xor a1, a1, a2 ; RV32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32F-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32F-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -285,11 +290,12 @@ define double @bitcast_double_xor(double %a1, double %a2) nounwind { ; RV32ZFINX-NEXT: mv s1, a0 ; RV32ZFINX-NEXT: call __muldf3 ; RV32ZFINX-NEXT: mv a2, a0 -; RV32ZFINX-NEXT: lui a3, 524288 -; RV32ZFINX-NEXT: xor a3, a1, a3 +; RV32ZFINX-NEXT: mv a3, a1 ; RV32ZFINX-NEXT: mv a0, s1 ; RV32ZFINX-NEXT: mv a1, s0 ; RV32ZFINX-NEXT: call __muldf3 +; RV32ZFINX-NEXT: lui a2, 524288 +; RV32ZFINX-NEXT: xor a1, a1, a2 ; RV32ZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32ZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32ZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -306,11 +312,12 @@ define double @bitcast_double_xor(double %a1, double %a2) nounwind { ; RV32FD-NEXT: sw a1, 12(sp) ; RV32FD-NEXT: fld fa4, 8(sp) ; RV32FD-NEXT: fmul.d fa5, fa4, fa5 -; RV32FD-NEXT: fneg.d fa5, fa5 ; RV32FD-NEXT: fmul.d fa5, fa4, fa5 ; RV32FD-NEXT: fsd fa5, 8(sp) -; RV32FD-NEXT: lw a0, 8(sp) ; RV32FD-NEXT: lw a1, 12(sp) +; RV32FD-NEXT: lw a0, 8(sp) +; RV32FD-NEXT: lui a2, 524288 +; RV32FD-NEXT: xor a1, a1, a2 ; RV32FD-NEXT: addi sp, sp, 16 ; RV32FD-NEXT: ret ; @@ -321,11 +328,12 @@ define double @bitcast_double_xor(double %a1, double %a2) nounwind { ; RV64F-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64F-NEXT: mv s0, a0 ; RV64F-NEXT: call __muldf3 -; RV64F-NEXT: li a1, -1 -; RV64F-NEXT: slli a1, a1, 63 -; RV64F-NEXT: xor a1, a0, a1 +; RV64F-NEXT: mv a1, a0 ; RV64F-NEXT: mv a0, s0 ; RV64F-NEXT: call __muldf3 +; RV64F-NEXT: li a1, -1 +; RV64F-NEXT: slli a1, a1, 63 +; RV64F-NEXT: xor a0, a0, a1 ; RV64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64F-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64F-NEXT: addi sp, sp, 16 @@ -338,11 +346,12 @@ define double @bitcast_double_xor(double %a1, double %a2) nounwind { ; RV64ZFINX-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64ZFINX-NEXT: mv s0, a0 ; RV64ZFINX-NEXT: call __muldf3 -; RV64ZFINX-NEXT: li a1, -1 -; RV64ZFINX-NEXT: slli a1, a1, 63 -; RV64ZFINX-NEXT: xor a1, a0, a1 +; RV64ZFINX-NEXT: mv a1, a0 ; RV64ZFINX-NEXT: mv a0, s0 ; RV64ZFINX-NEXT: call __muldf3 +; RV64ZFINX-NEXT: li a1, -1 +; RV64ZFINX-NEXT: slli a1, a1, 63 +; RV64ZFINX-NEXT: xor a0, a0, a1 ; RV64ZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64ZFINX-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64ZFINX-NEXT: addi sp, sp, 16 @@ -352,10 +361,12 @@ define double @bitcast_double_xor(double %a1, double %a2) nounwind { ; RV64FD: # %bb.0: ; RV64FD-NEXT: fmv.d.x fa5, a1 ; RV64FD-NEXT: fmv.d.x fa4, a0 +; RV64FD-NEXT: li a0, -1 ; RV64FD-NEXT: fmul.d fa5, fa4, fa5 -; RV64FD-NEXT: fneg.d fa5, fa5 ; RV64FD-NEXT: fmul.d fa5, fa4, fa5 -; RV64FD-NEXT: fmv.x.d a0, fa5 +; RV64FD-NEXT: fmv.x.d a1, fa5 +; RV64FD-NEXT: slli a0, a0, 63 +; RV64FD-NEXT: xor a0, a1, a0 ; RV64FD-NEXT: ret %a3 = fmul double %a1, %a2 %bc1 = bitcast double %a3 to i64 @@ -372,17 +383,18 @@ define float @bitcast_or(float %a1, float %a2) nounwind { ; RV32F-NEXT: fmv.w.x fa4, a0 ; RV32F-NEXT: fmul.s fa5, fa4, fa5 ; RV32F-NEXT: fabs.s fa5, fa5 -; RV32F-NEXT: fneg.s fa5, fa5 ; RV32F-NEXT: fmul.s fa5, fa4, fa5 ; RV32F-NEXT: fmv.x.w a0, fa5 +; RV32F-NEXT: lui a1, 524288 +; RV32F-NEXT: xor a0, a0, a1 ; RV32F-NEXT: ret ; ; RV32ZFINX-LABEL: bitcast_or: ; RV32ZFINX: # %bb.0: ; RV32ZFINX-NEXT: fmul.s a1, a0, a1 ; RV32ZFINX-NEXT: fabs.s a1, a1 -; RV32ZFINX-NEXT: fneg.s a1, a1 ; RV32ZFINX-NEXT: fmul.s a0, a0, a1 +; RV32ZFINX-NEXT: fneg.s a0, a0 ; RV32ZFINX-NEXT: ret ; ; RV32FD-LABEL: bitcast_or: @@ -391,9 +403,10 @@ define float @bitcast_or(float %a1, float %a2) nounwind { ; RV32FD-NEXT: fmv.w.x fa4, a0 ; RV32FD-NEXT: fmul.s fa5, fa4, fa5 ; RV32FD-NEXT: fabs.s fa5, fa5 -; RV32FD-NEXT: fneg.s fa5, fa5 ; RV32FD-NEXT: fmul.s fa5, fa4, fa5 ; RV32FD-NEXT: fmv.x.w a0, fa5 +; RV32FD-NEXT: lui a1, 524288 +; RV32FD-NEXT: xor a0, a0, a1 ; RV32FD-NEXT: ret ; ; RV64F-LABEL: bitcast_or: @@ -402,17 +415,18 @@ define float @bitcast_or(float %a1, float %a2) nounwind { ; RV64F-NEXT: fmv.w.x fa4, a0 ; RV64F-NEXT: fmul.s fa5, fa4, fa5 ; RV64F-NEXT: fabs.s fa5, fa5 -; RV64F-NEXT: fneg.s fa5, fa5 ; RV64F-NEXT: fmul.s fa5, fa4, fa5 ; RV64F-NEXT: fmv.x.w a0, fa5 +; RV64F-NEXT: lui a1, 524288 +; RV64F-NEXT: xor a0, a0, a1 ; RV64F-NEXT: ret ; ; RV64ZFINX-LABEL: bitcast_or: ; RV64ZFINX: # %bb.0: ; RV64ZFINX-NEXT: fmul.s a1, a0, a1 ; RV64ZFINX-NEXT: fabs.s a1, a1 -; RV64ZFINX-NEXT: fneg.s a1, a1 ; RV64ZFINX-NEXT: fmul.s a0, a0, a1 +; RV64ZFINX-NEXT: fneg.s a0, a0 ; RV64ZFINX-NEXT: ret ; ; RV64FD-LABEL: bitcast_or: @@ -421,9 +435,10 @@ define float @bitcast_or(float %a1, float %a2) nounwind { ; RV64FD-NEXT: fmv.w.x fa4, a0 ; RV64FD-NEXT: fmul.s fa5, fa4, fa5 ; RV64FD-NEXT: fabs.s fa5, fa5 -; RV64FD-NEXT: fneg.s fa5, fa5 ; RV64FD-NEXT: fmul.s fa5, fa4, fa5 ; RV64FD-NEXT: fmv.x.w a0, fa5 +; RV64FD-NEXT: lui a1, 524288 +; RV64FD-NEXT: xor a0, a0, a1 ; RV64FD-NEXT: ret %a3 = fmul float %a1, %a2 %bc1 = bitcast float %a3 to i32 @@ -444,11 +459,13 @@ define double @bitcast_double_or(double %a1, double %a2) nounwind { ; RV32F-NEXT: mv s1, a0 ; RV32F-NEXT: call __muldf3 ; RV32F-NEXT: mv a2, a0 -; RV32F-NEXT: lui a3, 524288 -; RV32F-NEXT: or a3, a1, a3 +; RV32F-NEXT: slli a1, a1, 1 +; RV32F-NEXT: srli a3, a1, 1 ; RV32F-NEXT: mv a0, s1 ; RV32F-NEXT: mv a1, s0 ; RV32F-NEXT: call __muldf3 +; RV32F-NEXT: lui a2, 524288 +; RV32F-NEXT: xor a1, a1, a2 ; RV32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32F-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32F-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -465,11 +482,13 @@ define double @bitcast_double_or(double %a1, double %a2) nounwind { ; RV32ZFINX-NEXT: mv s1, a0 ; RV32ZFINX-NEXT: call __muldf3 ; RV32ZFINX-NEXT: mv a2, a0 -; RV32ZFINX-NEXT: lui a3, 524288 -; RV32ZFINX-NEXT: or a3, a1, a3 +; RV32ZFINX-NEXT: slli a1, a1, 1 +; RV32ZFINX-NEXT: srli a3, a1, 1 ; RV32ZFINX-NEXT: mv a0, s1 ; RV32ZFINX-NEXT: mv a1, s0 ; RV32ZFINX-NEXT: call __muldf3 +; RV32ZFINX-NEXT: lui a2, 524288 +; RV32ZFINX-NEXT: xor a1, a1, a2 ; RV32ZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32ZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32ZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -487,11 +506,12 @@ define double @bitcast_double_or(double %a1, double %a2) nounwind { ; RV32FD-NEXT: fld fa4, 8(sp) ; RV32FD-NEXT: fmul.d fa5, fa4, fa5 ; RV32FD-NEXT: fabs.d fa5, fa5 -; RV32FD-NEXT: fneg.d fa5, fa5 ; RV32FD-NEXT: fmul.d fa5, fa4, fa5 ; RV32FD-NEXT: fsd fa5, 8(sp) -; RV32FD-NEXT: lw a0, 8(sp) ; RV32FD-NEXT: lw a1, 12(sp) +; RV32FD-NEXT: lw a0, 8(sp) +; RV32FD-NEXT: lui a2, 524288 +; RV32FD-NEXT: xor a1, a1, a2 ; RV32FD-NEXT: addi sp, sp, 16 ; RV32FD-NEXT: ret ; @@ -502,11 +522,13 @@ define double @bitcast_double_or(double %a1, double %a2) nounwind { ; RV64F-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64F-NEXT: mv s0, a0 ; RV64F-NEXT: call __muldf3 -; RV64F-NEXT: li a1, -1 -; RV64F-NEXT: slli a1, a1, 63 -; RV64F-NEXT: or a1, a0, a1 +; RV64F-NEXT: slli a0, a0, 1 +; RV64F-NEXT: srli a1, a0, 1 ; RV64F-NEXT: mv a0, s0 ; RV64F-NEXT: call __muldf3 +; RV64F-NEXT: li a1, -1 +; RV64F-NEXT: slli a1, a1, 63 +; RV64F-NEXT: xor a0, a0, a1 ; RV64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64F-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64F-NEXT: addi sp, sp, 16 @@ -519,11 +541,13 @@ define double @bitcast_double_or(double %a1, double %a2) nounwind { ; RV64ZFINX-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64ZFINX-NEXT: mv s0, a0 ; RV64ZFINX-NEXT: call __muldf3 -; RV64ZFINX-NEXT: li a1, -1 -; RV64ZFINX-NEXT: slli a1, a1, 63 -; RV64ZFINX-NEXT: or a1, a0, a1 +; RV64ZFINX-NEXT: slli a0, a0, 1 +; RV64ZFINX-NEXT: srli a1, a0, 1 ; RV64ZFINX-NEXT: mv a0, s0 ; RV64ZFINX-NEXT: call __muldf3 +; RV64ZFINX-NEXT: li a1, -1 +; RV64ZFINX-NEXT: slli a1, a1, 63 +; RV64ZFINX-NEXT: xor a0, a0, a1 ; RV64ZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64ZFINX-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64ZFINX-NEXT: addi sp, sp, 16 @@ -533,11 +557,13 @@ define double @bitcast_double_or(double %a1, double %a2) nounwind { ; RV64FD: # %bb.0: ; RV64FD-NEXT: fmv.d.x fa5, a1 ; RV64FD-NEXT: fmv.d.x fa4, a0 +; RV64FD-NEXT: li a0, -1 ; RV64FD-NEXT: fmul.d fa5, fa4, fa5 ; RV64FD-NEXT: fabs.d fa5, fa5 -; RV64FD-NEXT: fneg.d fa5, fa5 ; RV64FD-NEXT: fmul.d fa5, fa4, fa5 -; RV64FD-NEXT: fmv.x.d a0, fa5 +; RV64FD-NEXT: fmv.x.d a1, fa5 +; RV64FD-NEXT: slli a0, a0, 63 +; RV64FD-NEXT: xor a0, a1, a0 ; RV64FD-NEXT: ret %a3 = fmul double %a1, %a2 %bc1 = bitcast double %a3 to i64