Skip to content

Commit

Permalink
[flang] Runtime performance improvements to real formatted input
Browse files Browse the repository at this point in the history
Profiling a basic internal real input read benchmark shows some
hot spots in the code used to prepare input for decimal-to-binary
conversion, which is of course where the time should be spent.
The library that implements decimal to/from binary conversions has
been optimized, but not the code in the Fortran runtime that calls it,
and there are some obvious light changes worth making here.

Move some member functions from *.cpp files into the class definitions
of Descriptor and IoStatementState to enable inlining and specialization.

Make GetNextInputBytes() the new basic input API within the
runtime, replacing GetCurrentChar() -- which is rewritten in terms of
GetNextInputBytes -- so that input routines can have the
ability to acquire more than one input character at a time
and amortize overhead.

These changes speed up the time to read 1M random reals
using internal I/O from a character array from 1.29s to 0.54s
on my machine, which on par with Intel Fortran and much faster than
GNU Fortran.

Differential Revision: https://reviews.llvm.org/D113697
  • Loading branch information
klausler committed Nov 12, 2021
1 parent eb6f9f3 commit da25f96
Show file tree
Hide file tree
Showing 13 changed files with 354 additions and 225 deletions.
16 changes: 8 additions & 8 deletions flang/include/flang/Decimal/decimal.h
Expand Up @@ -101,21 +101,21 @@ template <int PREC> struct ConversionToBinaryResult {
};

template <int PREC>
ConversionToBinaryResult<PREC> ConvertToBinary(
const char *&, enum FortranRounding = RoundNearest);
ConversionToBinaryResult<PREC> ConvertToBinary(const char *&,
enum FortranRounding = RoundNearest, const char *end = nullptr);

extern template ConversionToBinaryResult<8> ConvertToBinary<8>(
const char *&, enum FortranRounding);
const char *&, enum FortranRounding, const char *end = nullptr);
extern template ConversionToBinaryResult<11> ConvertToBinary<11>(
const char *&, enum FortranRounding);
const char *&, enum FortranRounding, const char *end = nullptr);
extern template ConversionToBinaryResult<24> ConvertToBinary<24>(
const char *&, enum FortranRounding);
const char *&, enum FortranRounding, const char *end = nullptr);
extern template ConversionToBinaryResult<53> ConvertToBinary<53>(
const char *&, enum FortranRounding);
const char *&, enum FortranRounding, const char *end = nullptr);
extern template ConversionToBinaryResult<64> ConvertToBinary<64>(
const char *&, enum FortranRounding);
const char *&, enum FortranRounding, const char *end = nullptr);
extern template ConversionToBinaryResult<113> ConvertToBinary<113>(
const char *&, enum FortranRounding);
const char *&, enum FortranRounding, const char *end = nullptr);
} // namespace Fortran::decimal
extern "C" {
#define NS(x) Fortran::decimal::x
Expand Down
45 changes: 42 additions & 3 deletions flang/include/flang/Runtime/descriptor.h
Expand Up @@ -247,12 +247,51 @@ class Descriptor {
// subscripts of the array, these wrap the subscripts around to
// their first (or last) values and return false.
bool IncrementSubscripts(
SubscriptValue[], const int *permutation = nullptr) const;
SubscriptValue subscript[], const int *permutation = nullptr) const {
for (int j{0}; j < raw_.rank; ++j) {
int k{permutation ? permutation[j] : j};
const Dimension &dim{GetDimension(k)};
if (subscript[k]++ < dim.UpperBound()) {
return true;
}
subscript[k] = dim.LowerBound();
}
return false;
}

bool DecrementSubscripts(
SubscriptValue[], const int *permutation = nullptr) const;

// False when out of range.
bool SubscriptsForZeroBasedElementNumber(SubscriptValue *,
std::size_t elementNumber, const int *permutation = nullptr) const;
bool SubscriptsForZeroBasedElementNumber(SubscriptValue subscript[],
std::size_t elementNumber, const int *permutation = nullptr) const {
if (raw_.rank == 0) {
return elementNumber == 0;
}
std::size_t dimCoefficient[maxRank];
int k0{permutation ? permutation[0] : 0};
dimCoefficient[0] = 1;
auto coefficient{static_cast<std::size_t>(GetDimension(k0).Extent())};
for (int j{1}; j < raw_.rank; ++j) {
int k{permutation ? permutation[j] : j};
const Dimension &dim{GetDimension(k)};
dimCoefficient[j] = coefficient;
coefficient *= dim.Extent();
}
if (elementNumber >= coefficient) {
return false; // out of range
}
for (int j{raw_.rank - 1}; j > 0; --j) {
int k{permutation ? permutation[j] : j};
const Dimension &dim{GetDimension(k)};
std::size_t quotient{elementNumber / dimCoefficient[j]};
subscript[k] = quotient + dim.LowerBound();
elementNumber -= quotient * dimCoefficient[j];
}
subscript[k0] = elementNumber + GetDimension(k0).LowerBound();
return true;
}

std::size_t ZeroBasedElementNumber(
const SubscriptValue *, const int *permutation = nullptr) const;

Expand Down
10 changes: 8 additions & 2 deletions flang/lib/Decimal/big-radix-floating-point.h
Expand Up @@ -87,7 +87,8 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
// spaces.
// The argument is a reference to a pointer that is left
// pointing to the first character that wasn't parsed.
ConversionToBinaryResult<PREC> ConvertToBinary(const char *&);
ConversionToBinaryResult<PREC> ConvertToBinary(
const char *&, const char *end = nullptr);

// Formats a decimal floating-point number to a user buffer.
// May emit "NaN" or "Inf", or an possibly-signed integer.
Expand Down Expand Up @@ -337,7 +338,12 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
// Returns true when the the result has effectively been rounded down.
bool Mean(const BigRadixFloatingPointNumber &);

bool ParseNumber(const char *&, bool &inexact);
// Parses a floating-point number; leaves the pointer reference
// argument pointing at the next character after what was recognized.
// The "end" argument can be left null if the caller is sure that the
// string is properly terminated with an addressable character that
// can't be in a valid floating-point character.
bool ParseNumber(const char *&, bool &inexact, const char *end);

using Raw = typename Real::RawType;
constexpr Raw SignBit() const { return Raw{isNegative_} << (Real::bits - 1); }
Expand Down
75 changes: 43 additions & 32 deletions flang/lib/Decimal/decimal-to-binary.cpp
Expand Up @@ -19,34 +19,39 @@ namespace Fortran::decimal {

template <int PREC, int LOG10RADIX>
bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
const char *&p, bool &inexact) {
const char *&p, bool &inexact, const char *end) {
SetToZero();
while (*p == ' ') {
++p;
if (end && p >= end) {
return false;
}
// Skip leading spaces
for (; p != end && *p == ' '; ++p) {
}
if (p == end) {
return false;
}
const char *q{p};
isNegative_ = *q == '-';
if (*q == '-' || *q == '+') {
++q;
}
const char *start{q};
while (*q == '0') {
++q;
for (; q != end && *q == '0'; ++q) {
}
const char *first{q};
for (; *q >= '0' && *q <= '9'; ++q) {
const char *firstDigit{q};
for (; q != end && *q >= '0' && *q <= '9'; ++q) {
}
const char *point{nullptr};
if (*q == '.') {
if (q != end && *q == '.') {
point = q;
for (++q; *q >= '0' && *q <= '9'; ++q) {
for (++q; q != end && *q >= '0' && *q <= '9'; ++q) {
}
}
if (q == start || (q == start + 1 && *start == '.')) {
if (q == start || (q == start + 1 && start == point)) {
return false; // require at least one digit
}
// There's a valid number here; set the reference argument to point to
// the first character afterward.
// the first character afterward, which might be an exponent part.
p = q;
// Strip off trailing zeroes
if (point) {
Expand All @@ -59,13 +64,13 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
}
}
if (!point) {
while (q > first && q[-1] == '0') {
while (q > firstDigit && q[-1] == '0') {
--q;
++exponent_;
}
}
// Trim any excess digits
const char *limit{first + maxDigits * log10Radix + (point != nullptr)};
const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)};
if (q > limit) {
inexact = true;
if (point >= limit) {
Expand All @@ -80,11 +85,11 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
if (point) {
exponent_ -= static_cast<int>(q - point - 1);
}
if (q == first) {
if (q == firstDigit) {
exponent_ = 0; // all zeros
}
// Rack the decimal digits up into big Digits.
for (auto times{radix}; q-- > first;) {
for (auto times{radix}; q-- > firstDigit;) {
if (*q != '.') {
if (times == radix) {
digit_[digits_++] = *q - '0';
Expand All @@ -96,6 +101,9 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
}
}
// Look for an optional exponent field.
if (p == end) {
return true;
}
q = p;
switch (*q) {
case 'e':
Expand All @@ -104,18 +112,20 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
case 'D':
case 'q':
case 'Q': {
bool negExpo{*++q == '-'};
if (++q == end) {
break;
}
bool negExpo{*q == '-'};
if (*q == '-' || *q == '+') {
++q;
}
if (*q >= '0' && *q <= '9') {
if (q != end && *q >= '0' && *q <= '9') {
int expo{0};
while (*q == '0') {
++q;
for (; q != end && *q == '0'; ++q) {
}
const char *expDig{q};
while (*q >= '0' && *q <= '9') {
expo = 10 * expo + *q++ - '0';
for (; q != end && *q >= '0' && *q <= '9'; ++q) {
expo = 10 * expo + *q - '0';
}
if (q >= expDig + 8) {
// There's a ridiculous number of nonzero exponent digits.
Expand All @@ -125,7 +135,7 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
expo = 10 * Real::decimalRange;
exponent_ = 0;
}
p = q; // exponent was valid
p = q; // exponent is valid; advance the termination pointer
if (negExpo) {
exponent_ -= expo;
} else {
Expand Down Expand Up @@ -385,9 +395,10 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {

template <int PREC, int LOG10RADIX>
ConversionToBinaryResult<PREC>
BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary(const char *&p) {
BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary(
const char *&p, const char *limit) {
bool inexact{false};
if (ParseNumber(p, inexact)) {
if (ParseNumber(p, inexact, limit)) {
auto result{ConvertToBinary()};
if (inexact) {
result.flags =
Expand Down Expand Up @@ -422,22 +433,22 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary(const char *&p) {

template <int PREC>
ConversionToBinaryResult<PREC> ConvertToBinary(
const char *&p, enum FortranRounding rounding) {
return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p);
const char *&p, enum FortranRounding rounding, const char *end) {
return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p, end);
}

template ConversionToBinaryResult<8> ConvertToBinary<8>(
const char *&, enum FortranRounding);
const char *&, enum FortranRounding, const char *end);
template ConversionToBinaryResult<11> ConvertToBinary<11>(
const char *&, enum FortranRounding);
const char *&, enum FortranRounding, const char *end);
template ConversionToBinaryResult<24> ConvertToBinary<24>(
const char *&, enum FortranRounding);
const char *&, enum FortranRounding, const char *end);
template ConversionToBinaryResult<53> ConvertToBinary<53>(
const char *&, enum FortranRounding);
const char *&, enum FortranRounding, const char *end);
template ConversionToBinaryResult<64> ConvertToBinary<64>(
const char *&, enum FortranRounding);
const char *&, enum FortranRounding, const char *end);
template ConversionToBinaryResult<113> ConvertToBinary<113>(
const char *&, enum FortranRounding);
const char *&, enum FortranRounding, const char *end);

extern "C" {
enum ConversionResultFlags ConvertDecimalToFloat(
Expand Down
36 changes: 0 additions & 36 deletions flang/runtime/descriptor.cpp
Expand Up @@ -163,19 +163,6 @@ int Descriptor::Destroy(bool finalize) {

int Descriptor::Deallocate() { return ISO::CFI_deallocate(&raw_); }

bool Descriptor::IncrementSubscripts(
SubscriptValue *subscript, const int *permutation) const {
for (int j{0}; j < raw_.rank; ++j) {
int k{permutation ? permutation[j] : j};
const Dimension &dim{GetDimension(k)};
if (subscript[k]++ < dim.UpperBound()) {
return true;
}
subscript[k] = dim.LowerBound();
}
return false;
}

bool Descriptor::DecrementSubscripts(
SubscriptValue *subscript, const int *permutation) const {
for (int j{raw_.rank - 1}; j >= 0; --j) {
Expand All @@ -202,29 +189,6 @@ std::size_t Descriptor::ZeroBasedElementNumber(
return result;
}

bool Descriptor::SubscriptsForZeroBasedElementNumber(SubscriptValue *subscript,
std::size_t elementNumber, const int *permutation) const {
std::size_t coefficient{1};
std::size_t dimCoefficient[maxRank];
for (int j{0}; j < raw_.rank; ++j) {
int k{permutation ? permutation[j] : j};
const Dimension &dim{GetDimension(k)};
dimCoefficient[j] = coefficient;
coefficient *= dim.Extent();
}
if (elementNumber >= coefficient) {
return false; // out of range
}
for (int j{raw_.rank - 1}; j >= 0; --j) {
int k{permutation ? permutation[j] : j};
const Dimension &dim{GetDimension(k)};
std::size_t quotient{elementNumber / dimCoefficient[j]};
subscript[k] = quotient + dim.LowerBound();
elementNumber -= quotient * dimCoefficient[j];
}
return true;
}

bool Descriptor::EstablishPointerSection(const Descriptor &source,
const SubscriptValue *lower, const SubscriptValue *upper,
const SubscriptValue *stride) {
Expand Down

0 comments on commit da25f96

Please sign in to comment.