Skip to content

Commit

Permalink
[APFloat] Fix truncation of certain subnormal numbers
Browse files Browse the repository at this point in the history
Certain subnormals would be incorrectly rounded away from zero.

Fixes #55838

Differential Revision: https://reviews.llvm.org/D127140
  • Loading branch information
danilaml committed Jun 8, 2022
1 parent d897a14 commit ed6c309
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 10 deletions.
9 changes: 8 additions & 1 deletion llvm/lib/Support/APFloat.cpp
Expand Up @@ -2213,15 +2213,22 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
// when truncating from PowerPC double-double to double format), the
// right shift could lose result mantissa bits. Adjust exponent instead
// of performing excessive shift.
// Also do a similar trick in case shifting denormal would produce zero
// significand as this case isn't handled correctly by normalize.
if (shift < 0 && isFiniteNonZero()) {
int exponentChange = significandMSB() + 1 - fromSemantics.precision;
int omsb = significandMSB() + 1;
int exponentChange = omsb - fromSemantics.precision;
if (exponent + exponentChange < toSemantics.minExponent)
exponentChange = toSemantics.minExponent - exponent;
if (exponentChange < shift)
exponentChange = shift;
if (exponentChange < 0) {
shift -= exponentChange;
exponent += exponentChange;
} else if (omsb <= -shift) {
exponentChange = omsb + shift - 1; // leave at least one bit set
shift -= exponentChange;
exponent += exponentChange;
}
}

Expand Down
12 changes: 3 additions & 9 deletions llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
Expand Up @@ -79,21 +79,17 @@ define float @trunc_denorm_lost_fraction0() {
ret float %b
}

; FIXME: This should be 0.0.

define float @trunc_denorm_lost_fraction1() {
; CHECK-LABEL: @trunc_denorm_lost_fraction1(
; CHECK-NEXT: ret float 0x36A0000000000000
; CHECK-NEXT: ret float 0.000000e+00
;
%b = fptrunc double 0x0000000010000001 to float
ret float %b
}

; FIXME: This should be 0.0.

define float @trunc_denorm_lost_fraction2() {
; CHECK-LABEL: @trunc_denorm_lost_fraction2(
; CHECK-NEXT: ret float 0x36A0000000000000
; CHECK-NEXT: ret float 0.000000e+00
;
%b = fptrunc double 0x000000001fffffff to float
ret float %b
Expand All @@ -107,11 +103,9 @@ define float @trunc_denorm_lost_fraction3() {
ret float %b
}

; FIXME: This should be -0.0.

define float @trunc_denorm_lost_fraction4() {
; CHECK-LABEL: @trunc_denorm_lost_fraction4(
; CHECK-NEXT: ret float 0xB6A0000000000000
; CHECK-NEXT: ret float -0.000000e+00
;
%b = fptrunc double 0x8000000010000001 to float
ret float %b
Expand Down
42 changes: 42 additions & 0 deletions llvm/unittests/ADT/APFloatTest.cpp
Expand Up @@ -1859,6 +1859,48 @@ TEST(APFloatTest, convert) {
EXPECT_EQ(0x7fc00000, test.bitcastToAPInt());
EXPECT_TRUE(losesInfo);
EXPECT_EQ(status, APFloat::opOK);

// Test that subnormals are handled correctly in double to float conversion
test = APFloat(APFloat::IEEEdouble(), "0x0.0000010000000p-1022");
test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
EXPECT_EQ(0.0f, test.convertToFloat());
EXPECT_TRUE(losesInfo);

test = APFloat(APFloat::IEEEdouble(), "0x0.0000010000001p-1022");
test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
EXPECT_EQ(0.0f, test.convertToFloat());
EXPECT_TRUE(losesInfo);

test = APFloat(APFloat::IEEEdouble(), "-0x0.0000010000001p-1022");
test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
EXPECT_EQ(0.0f, test.convertToFloat());
EXPECT_TRUE(losesInfo);

test = APFloat(APFloat::IEEEdouble(), "0x0.0000020000000p-1022");
test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
EXPECT_EQ(0.0f, test.convertToFloat());
EXPECT_TRUE(losesInfo);

test = APFloat(APFloat::IEEEdouble(), "0x0.0000020000001p-1022");
test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
EXPECT_EQ(0.0f, test.convertToFloat());
EXPECT_TRUE(losesInfo);

// Test subnormal conversion to bfloat
test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126");
test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo);
EXPECT_EQ(0.0f, test.convertToFloat());
EXPECT_TRUE(losesInfo);

test = APFloat(APFloat::IEEEsingle(), "0x0.02p-126");
test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo);
EXPECT_EQ(0x01, test.bitcastToAPInt());
EXPECT_FALSE(losesInfo);

test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126");
test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToAway, &losesInfo);
EXPECT_EQ(0x01, test.bitcastToAPInt());
EXPECT_TRUE(losesInfo);
}

TEST(APFloatTest, PPCDoubleDouble) {
Expand Down

0 comments on commit ed6c309

Please sign in to comment.