diff --git a/flang/runtime/sum.cpp b/flang/runtime/sum.cpp index db808b2b4c329..fb32505419533 100644 --- a/flang/runtime/sum.cpp +++ b/flang/runtime/sum.cpp @@ -9,8 +9,8 @@ // Implements SUM for all required operand types and shapes. // // Real and complex SUM reductions attempt to reduce floating-point -// cancellation on intermediate results by adding up partial sums -// for positive and negative elements independently. +// cancellation on intermediate results by using "Kahan summation" +// (basically the same as manual "double-double"). #include "reduction-templates.h" #include "reduction.h" @@ -40,24 +40,17 @@ template class IntegerSumAccumulator { template class RealSumAccumulator { public: explicit RealSumAccumulator(const Descriptor &array) : array_{array} {} - void Reinitialize() { positives_ = negatives_ = inOrder_ = 0; } - template A Result() const { - auto sum{static_cast(positives_ + negatives_)}; - return std::isfinite(sum) ? sum : static_cast(inOrder_); - } + void Reinitialize() { sum_ = correction_ = 0; } + template A Result() const { return sum_; } template void GetResult(A *p, int /*zeroBasedDim*/ = -1) const { *p = Result(); } template bool Accumulate(A x) { - // Accumulate the nonnegative and negative elements independently - // to reduce cancellation; also record an in-order sum for use - // in case of overflow. - if (x >= 0) { - positives_ += x; - } else { - negatives_ += x; - } - inOrder_ += x; + // Kahan summation + auto next{x + correction_}; + auto oldSum{sum_}; + sum_ += next; + correction_ = (sum_ - oldSum) - next; // algebraically zero return true; } template bool AccumulateAt(const SubscriptValue at[]) { @@ -66,7 +59,7 @@ template class RealSumAccumulator { private: const Descriptor &array_; - INTERMEDIATE positives_{0.0}, negatives_{0.0}, inOrder_{0.0}; + INTERMEDIATE sum_{0.0}, correction_{0.0}; }; template class ComplexSumAccumulator {