diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5485b916c2031..c3f39caeb4293 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15137,6 +15137,10 @@ static unsigned getVecReduceOpcode(unsigned Opc) { case ISD::FADD: // Note: This is the associative form of the generic reduction opcode. return ISD::VECREDUCE_FADD; + case ISD::FMAXNUM: + return ISD::VECREDUCE_FMAX; + case ISD::FMINNUM: + return ISD::VECREDUCE_FMIN; } } @@ -15165,13 +15169,22 @@ combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const EVT VT = N->getValueType(0); const unsigned Opc = N->getOpcode(); - // For FADD, we only handle the case with reassociation allowed. We - // could handle strict reduction order, but at the moment, there's no - // known reason to, and the complexity isn't worth it. - // TODO: Handle fminnum and fmaxnum here - if (!VT.isInteger() && - (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation())) - return SDValue(); + if (!VT.isInteger()) { + switch (Opc) { + default: + return SDValue(); + case ISD::FADD: + // For FADD, we only handle the case with reassociation allowed. We + // could handle strict reduction order, but at the moment, there's no + // known reason to, and the complexity isn't worth it. + if (!N->getFlags().hasAllowReassociation()) + return SDValue(); + break; + case ISD::FMAXNUM: + case ISD::FMINNUM: + break; + } + } const unsigned ReduceOpc = getVecReduceOpcode(Opc); assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) && diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll index bf8baafc4a25d..754941eb93e01 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll @@ -904,3 +904,33 @@ define float @reduce_fadd_4xi32_non_associative2(ptr %p) { %fadd2 = fadd fast float %fadd1, %e3 ret float %fadd2 } + +define float @reduce_fmaxnum_16xf32_prefix2(ptr %p) { +; CHECK-LABEL: reduce_fmaxnum_16xf32_prefix2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfredmax.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret + %v = load <16 x float>, ptr %p, align 256 + %e0 = extractelement <16 x float> %v, i32 0 + %e1 = extractelement <16 x float> %v, i32 1 + %fmax0 = call float @llvm.maxnum.f32(float %e0, float %e1) + ret float %fmax0 +} + +define float @reduce_fminnum_16xf32_prefix2(ptr %p) { +; CHECK-LABEL: reduce_fminnum_16xf32_prefix2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfredmin.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret + %v = load <16 x float>, ptr %p, align 256 + %e0 = extractelement <16 x float> %v, i32 0 + %e1 = extractelement <16 x float> %v, i32 1 + %fmax0 = call float @llvm.minnum.f32(float %e0, float %e1) + ret float %fmax0 +}