-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang][runtime] Return +/-HUGE() for some real input roundings #75525
Conversation
@llvm/pr-subscribers-flang-runtime Author: Peter Klausler (klausler) ChangesThe Fortran standard says that overflow input cases in some rounding modes (RZ, RD, RU) should round to a "representable" number. Some Fortran compilers interpret this to mean +/-HUGE(), some as +/-Inf. Follow the precedent of gfortran and the Intel compilers. Full diff: https://github.com/llvm/llvm-project/pull/75525.diff 4 Files Affected:
diff --git a/flang/lib/Decimal/big-radix-floating-point.h b/flang/lib/Decimal/big-radix-floating-point.h
index 7d5d31b7788d76..2143d1d9b3f776 100644
--- a/flang/lib/Decimal/big-radix-floating-point.h
+++ b/flang/lib/Decimal/big-radix-floating-point.h
@@ -369,6 +369,12 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
}
return result;
}
+ constexpr Raw HUGE() const {
+ Raw result{static_cast<Raw>(Real::maxExponent)};
+ result <<= Real::significandBits;
+ result |= SignBit();
+ return result - 1; // decrement exponent, set all significand bits
+ }
Digit digit_[maxDigits]; // in little-endian order: digit_[0] is LSD
int digits_{0}; // # of elements in digit_[] array; zero when zero
diff --git a/flang/lib/Decimal/decimal-to-binary.cpp b/flang/lib/Decimal/decimal-to-binary.cpp
index d5b66b9fb93388..7ab78400d9f6b2 100644
--- a/flang/lib/Decimal/decimal-to-binary.cpp
+++ b/flang/lib/Decimal/decimal-to-binary.cpp
@@ -237,6 +237,15 @@ template <int PREC> class IntermediateFloat {
int exponent_{0};
};
+// The standard says that these overflow cases round to "representable"
+// numbers, and some popular compilers interpret that to mean +/-HUGE()
+// rather than +/-Inf.
+static inline constexpr bool RoundOverflowToHuge(
+ enum FortranRounding rounding, bool isNegative) {
+ return rounding == RoundToZero || (!isNegative && rounding == RoundDown) ||
+ (isNegative && rounding == RoundUp);
+}
+
template <int PREC>
ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
bool isNegative, FortranRounding rounding) const {
@@ -259,8 +268,8 @@ ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
if (fraction == 0 && guard <= oneHalf) {
if ((!isNegative && rounding == RoundUp) ||
(isNegative && rounding == RoundDown)) {
- // round to minimum nonzero value
- } else {
+ // round to least nonzero value
+ } else { // zero
return {Binary{}, static_cast<enum ConversionResultFlags>(flags)};
}
} else {
@@ -303,12 +312,17 @@ ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
expo = 0; // subnormal
}
if (expo >= Binary::maxExponent) {
- expo = Binary::maxExponent; // Inf
- flags |= Overflow;
- if constexpr (Binary::bits == 80) { // x87
- fraction = IntType{1} << 63;
- } else {
- fraction = 0;
+ if (RoundOverflowToHuge(rounding, isNegative)) {
+ expo = Binary::maxExponent - 1;
+ fraction = mask;
+ } else { // Inf
+ expo = Binary::maxExponent;
+ flags |= Overflow;
+ if constexpr (Binary::bits == 80) { // x87
+ fraction = IntType{1} << 63;
+ } else {
+ fraction = 0;
+ }
}
}
using Raw = typename Binary::RawType;
@@ -344,8 +358,12 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
} else { // underflow to +/-0.
return {Real{SignBit()}, Inexact};
}
- } else if (exponent_ > crazy) { // overflow to +/-Inf.
- return {Real{Infinity()}, Overflow};
+ } else if (exponent_ > crazy) { // overflow to +/-HUGE() or +/-Inf
+ if (RoundOverflowToHuge(rounding_, isNegative_)) {
+ return {Real{HUGE()}};
+ } else {
+ return {Real{Infinity()}, Overflow};
+ }
}
// Apply any negative decimal exponent by multiplication
// by a power of two, adjusting the binary exponent to compensate.
diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index 822099b5141b1b..79c156a43501df 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -640,20 +640,29 @@ decimal::ConversionToBinaryResult<binaryPrecision> ConvertHexadecimal(
}
// Package & return result
constexpr RawType significandMask{(one << RealType::significandBits) - 1};
+ int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
if (!fraction) {
expo = 0;
} else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) {
expo = 0; // subnormal
} else if (expo >= RealType::maxExponent) {
- expo = RealType::maxExponent; // +/-Inf
- fraction = 0;
+ if (rounding == decimal::RoundToZero ||
+ (rounding == decimal::RoundDown && !isNegative) ||
+ (rounding == decimal::RoundUp && isNegative)) {
+ expo = RealType::maxExponent - 1; // +/-HUGE()
+ fraction = significandMask;
+ } else {
+ expo = RealType::maxExponent; // +/-Inf
+ fraction = 0;
+ flags |= decimal::Overflow;
+ }
} else {
fraction &= significandMask; // remove explicit normalization unless x87
}
return decimal::ConversionToBinaryResult<binaryPrecision>{
RealType{static_cast<RawType>(signBit |
static_cast<RawType>(expo) << RealType::significandBits | fraction)},
- (roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
+ static_cast<decimal::ConversionResultFlags>(flags)};
}
template <int KIND>
diff --git a/flang/unittests/Runtime/NumericalFormatTest.cpp b/flang/unittests/Runtime/NumericalFormatTest.cpp
index b5b8eb05943732..f5c19d2bd71ced 100644
--- a/flang/unittests/Runtime/NumericalFormatTest.cpp
+++ b/flang/unittests/Runtime/NumericalFormatTest.cpp
@@ -840,49 +840,66 @@ TEST(IOApiTests, FormatIntegerValues) {
// Ensure double input values correctly map to raw uint64 values
TEST(IOApiTests, EditDoubleInputValues) {
- using TestCaseTy = std::tuple<const char *, const char *, std::uint64_t>;
+ using TestCaseTy = std::tuple<const char *, const char *, std::uint64_t, int>;
+ int ovf{IostatRealInputOverflow};
static const std::vector<TestCaseTy> testCases{
- {"(F18.0)", " 0", 0x0},
- {"(F18.0)", " ", 0x0},
- {"(F18.0)", " -0", 0x8000000000000000},
- {"(F18.0)", " 01", 0x3ff0000000000000},
- {"(F18.0)", " 1", 0x3ff0000000000000},
- {"(F18.0)", " 125.", 0x405f400000000000},
- {"(F18.0)", " 12.5", 0x4029000000000000},
- {"(F18.0)", " 1.25", 0x3ff4000000000000},
- {"(F18.0)", " 01.25", 0x3ff4000000000000},
- {"(F18.0)", " .125", 0x3fc0000000000000},
- {"(F18.0)", " 0.125", 0x3fc0000000000000},
- {"(F18.0)", " .0625", 0x3fb0000000000000},
- {"(F18.0)", " 0.0625", 0x3fb0000000000000},
- {"(F18.0)", " 125", 0x405f400000000000},
- {"(F18.1)", " 125", 0x4029000000000000},
- {"(F18.2)", " 125", 0x3ff4000000000000},
- {"(F18.3)", " 125", 0x3fc0000000000000},
- {"(-1P,F18.0)", " 125", 0x4093880000000000}, // 1250
- {"(1P,F18.0)", " 125", 0x4029000000000000}, // 12.5
- {"(BZ,F18.0)", " 125 ", 0x4093880000000000}, // 1250
- {"(BZ,F18.0)", " 125 . e +1 ", 0x42a6bcc41e900000}, // 1.25e13
- {"(BZ,F18.0)", " . ", 0x0},
- {"(BZ,F18.0)", " . e +1 ", 0x0},
- {"(DC,F18.0)", " 12,5", 0x4029000000000000},
- {"(EX22.0)", "0X0P0 ", 0x0}, // +0.
- {"(EX22.0)", "-0X0P0 ", 0x8000000000000000}, // -0.
- {"(EX22.0)", "0X.8P1 ", 0x3ff0000000000000}, // 1.0
- {"(EX22.0)", "0X8.P-3 ", 0x3ff0000000000000}, // 1.0
- {"(EX22.0)", "0X.1P4 ", 0x3ff0000000000000}, // 1.0
- {"(EX22.0)", "0X10.P-4 ", 0x3ff0000000000000}, // 1.0
- {"(EX22.0)", "0X8.00P-3 ", 0x3ff0000000000000}, // 1.0
- {"(EX22.0)", "0X80.0P-6 ", 0x4000000000000000}, // 2.0
- {"(EX22.0)", "0XC.CCCCCCCCCCCDP-7 ", 0x3fb999999999999a}, // 0.1
- {"(EX22.0)", "0X.8P-1021 ", 0x0010000000000000}, // min normal
- {"(EX22.0)", "0X.8P-1022 ", 0x0008000000000000}, // subnormal
- {"(EX22.0)", "0X.8P-1073 ", 0x0000000000000001}, // min subn.
- {"(EX22.0)", "0X.FFFFFFFFFFFFF8P1024", 0x7fefffffffffffff}, // max finite
- {"(EX22.0)", "0X.8P1025 ", 0x7ff0000000000000}, // +Inf
- {"(EX22.0)", "-0X.8P1025 ", 0xfff0000000000000}, // -Inf
+ {"(F18.0)", " 0", 0x0, 0},
+ {"(F18.0)", " ", 0x0, 0},
+ {"(F18.0)", " -0", 0x8000000000000000, 0},
+ {"(F18.0)", " 01", 0x3ff0000000000000, 0},
+ {"(F18.0)", " 1", 0x3ff0000000000000, 0},
+ {"(F18.0)", " 125.", 0x405f400000000000, 0},
+ {"(F18.0)", " 12.5", 0x4029000000000000, 0},
+ {"(F18.0)", " 1.25", 0x3ff4000000000000, 0},
+ {"(F18.0)", " 01.25", 0x3ff4000000000000, 0},
+ {"(F18.0)", " .125", 0x3fc0000000000000, 0},
+ {"(F18.0)", " 0.125", 0x3fc0000000000000, 0},
+ {"(F18.0)", " .0625", 0x3fb0000000000000, 0},
+ {"(F18.0)", " 0.0625", 0x3fb0000000000000, 0},
+ {"(F18.0)", " 125", 0x405f400000000000, 0},
+ {"(F18.1)", " 125", 0x4029000000000000, 0},
+ {"(F18.2)", " 125", 0x3ff4000000000000, 0},
+ {"(F18.3)", " 125", 0x3fc0000000000000, 0},
+ {"(-1P,F18.0)", " 125", 0x4093880000000000, 0}, // 1250
+ {"(1P,F18.0)", " 125", 0x4029000000000000, 0}, // 12.5
+ {"(BZ,F18.0)", " 125 ", 0x4093880000000000, 0}, // 1250
+ {"(BZ,F18.0)", " 125 . e +1 ", 0x42a6bcc41e900000, 0}, // 1.25e13
+ {"(BZ,F18.0)", " . ", 0x0, 0},
+ {"(BZ,F18.0)", " . e +1 ", 0x0, 0},
+ {"(DC,F18.0)", " 12,5", 0x4029000000000000, 0},
+ {"(EX22.0)", "0X0P0 ", 0x0, 0}, // +0.
+ {"(EX22.0)", "-0X0P0 ", 0x8000000000000000, 0}, // -0.
+ {"(EX22.0)", "0X.8P1 ", 0x3ff0000000000000, 0}, // 1.0
+ {"(EX22.0)", "0X8.P-3 ", 0x3ff0000000000000, 0}, // 1.0
+ {"(EX22.0)", "0X.1P4 ", 0x3ff0000000000000, 0}, // 1.0
+ {"(EX22.0)", "0X10.P-4 ", 0x3ff0000000000000, 0}, // 1.0
+ {"(EX22.0)", "0X8.00P-3 ", 0x3ff0000000000000, 0}, // 1.0
+ {"(EX22.0)", "0X80.0P-6 ", 0x4000000000000000, 0}, // 2.0
+ {"(EX22.0)", "0XC.CCCCCCCCCCCDP-7 ", 0x3fb999999999999a, 0}, // 0.1
+ {"(EX22.0)", "0X.8P-1021 ", 0x0010000000000000,
+ 0}, // min normal
+ {"(EX22.0)", "0X.8P-1022 ", 0x0008000000000000,
+ 0}, // subnormal
+ {"(EX22.0)", "0X.8P-1073 ", 0x0000000000000001,
+ 0}, // min subn.
+ {"(EX22.0)", "0X.FFFFFFFFFFFFF8P1024", 0x7fefffffffffffff,
+ 0}, // max finite
+ {"(EX22.0)", "0X.8P1025 ", 0x7ff0000000000000, ovf}, // +Inf
+ {"(EX22.0)", "-0X.8P1025 ", 0xfff0000000000000, ovf}, // -Inf
+ {"(RZ,F7.0)", " 2.e308", 0x7fefffffffffffff, 0}, // +HUGE()
+ {"(RD,F7.0)", " 2.e308", 0x7fefffffffffffff, 0}, // +HUGE()
+ {"(RU,F7.0)", " 2.e308", 0x7ff0000000000000, ovf}, // +Inf
+ {"(RZ,F7.0)", "-2.e308", 0xffefffffffffffff, 0}, // -HUGE()
+ {"(RD,F7.0)", "-2.e308", 0xfff0000000000000, ovf}, // -Inf
+ {"(RU,F7.0)", "-2.e308", 0xffefffffffffffff, 0}, // -HUGE()
+ {"(RZ,F7.0)", " 1.e999", 0x7fefffffffffffff, 0}, // +HUGE()
+ {"(RD,F7.0)", " 1.e999", 0x7fefffffffffffff, 0}, // +HUGE()
+ {"(RU,F7.0)", " 1.e999", 0x7ff0000000000000, ovf}, // +Inf
+ {"(RZ,F7.0)", "-1.e999", 0xffefffffffffffff, 0}, // -HUGE()
+ {"(RD,F7.0)", "-1.e999", 0xfff0000000000000, ovf}, // -Inf
+ {"(RU,F7.0)", "-1.e999", 0xffefffffffffffff, 0}, // -HUGE()
};
- for (auto const &[format, data, want] : testCases) {
+ for (auto const &[format, data, want, iostat] : testCases) {
auto cookie{IONAME(BeginInternalFormattedInput)(
data, std::strlen(data), format, std::strlen(format))};
union {
@@ -899,12 +916,14 @@ TEST(IOApiTests, EditDoubleInputValues) {
char iomsg[bufferSize];
std::memset(iomsg, '\0', bufferSize - 1);
- // Ensure no errors were encountered reading input buffer into union value
+ // Ensure no unexpected errors were encountered reading input buffer into
+ // union value
IONAME(GetIoMsg)(cookie, iomsg, bufferSize - 1);
auto status{IONAME(EndIoStatement)(cookie)};
- ASSERT_EQ(status, 0) << '\'' << format << "' failed reading '" << data
- << "', status " << static_cast<int>(status)
- << " iomsg '" << iomsg << "'";
+ ASSERT_EQ(status, iostat)
+ << '\'' << format << "' failed reading '" << data << "', status "
+ << static_cast<int>(status) << " != expected " << iostat << " iomsg '"
+ << iomsg << "'";
// Ensure raw uint64 value matches expected conversion from double
ASSERT_EQ(u.raw, want) << '\'' << format << "' failed reading '" << data
|
The Fortran standard says that overflow input cases in some rounding modes (RZ, RD, RU) should round to a "representable" number. Some Fortran compilers interpret this to mean +/-HUGE(), some as +/-Inf. Follow the precedent of gfortran and the Intel compilers.
The Fortran standard says that overflow input cases in some rounding modes (RZ, RD, RU) should round to a "representable" number. Some Fortran compilers interpret this to mean +/-HUGE(), some as +/-Inf. Follow the precedent of gfortran and the Intel compilers.