Permalink
Cannot retrieve contributors at this time
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
950 lines (891 sloc)
26.6 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// efficient double to text conversion using the GRISU-1 algorithm | |
// - as a complement to SynCommons, which tended to increase too much | |
// - licensed under a MPL/GPL/LGPL tri-license; version 1.18 | |
{ | |
Implement 64-bit floating point (double) to ASCII conversion using the | |
GRISU-1 efficient algorithm. | |
Original Code in flt_core.inc flt_conv.inc flt_pack.inc from FPC RTL. | |
Copyright (C) 2013 by Max Nazhalov | |
Licenced with LGPL 2 with the linking exception. | |
If you don't agree with these License terms, disable this feature | |
by undefining DOUBLETOSHORT_USEGRISU in Synopse.inc | |
GRISU Original Algorithm | |
Copyright (c) 2009 Florian Loitsch | |
We extracted a double-to-ascii only cut-down version of those files, | |
and made a huge refactoring to reach the best performance, especially | |
tuning the Intel target with some dedicated asm and code rewrite. | |
With Delphi 10.3 on Win32: (no benefit) | |
100000 FloatToText in 38.11ms i.e. 2,623,570/s, aver. 0us, 47.5 MB/s | |
100000 str in 43.19ms i.e. 2,315,082/s, aver. 0us, 50.7 MB/s | |
100000 DoubleToShort in 45.50ms i.e. 2,197,367/s, aver. 0us, 43.8 MB/s | |
100000 DoubleToAscii in 42.44ms i.e. 2,356,045/s, aver. 0us, 47.8 MB/s | |
With Delphi 10.3 on Win64: | |
100000 FloatToText in 61.83ms i.e. 1,617,233/s, aver. 0us, 29.3 MB/s | |
100000 str in 53.20ms i.e. 1,879,663/s, aver. 0us, 41.2 MB/s | |
100000 DoubleToShort in 18.45ms i.e. 5,417,998/s, aver. 0us, 108 MB/s | |
100000 DoubleToAscii in 18.19ms i.e. 5,496,921/s, aver. 0us, 111.5 MB/s | |
With FPC on Win32: | |
100000 FloatToText in 115.62ms i.e. 864,842/s, aver. 1us, 15.6 MB/s | |
100000 str in 57.30ms i.e. 1,745,109/s, aver. 0us, 39.9 MB/s | |
100000 DoubleToShort in 23.88ms i.e. 4,187,078/s, aver. 0us, 83.5 MB/s | |
100000 DoubleToAscii in 23.34ms i.e. 4,284,490/s, aver. 0us, 86.9 MB/s | |
With FPC on Win64: | |
100000 FloatToText in 76.92ms i.e. 1,300,052/s, aver. 0us, 23.5 MB/s | |
100000 str in 27.70ms i.e. 3,609,456/s, aver. 0us, 82.6 MB/s | |
100000 DoubleToShort in 14.73ms i.e. 6,787,944/s, aver. 0us, 135.4 MB/s | |
100000 DoubleToAscii in 13.78ms i.e. 7,253,735/s, aver. 0us, 147.2 MB/s | |
With FPC on Linux x86_64: | |
100000 FloatToText in 81.48ms i.e. 1,227,249/s, aver. 0us, 22.2 MB/s | |
100000 str in 36.98ms i.e. 2,703,871/s, aver. 0us, 61.8 MB/s | |
100000 DoubleToShort in 13.11ms i.e. 7,626,601/s, aver. 0us, 152.1 MB/s | |
100000 DoubleToAscii in 12.59ms i.e. 7,942,180/s, aver. 0us, 161.2 MB/s | |
- Our rewrite is twice faster than original flt_conv.inc from FPC RTL (str) | |
- Delphi Win32 has trouble making 64-bit computation - no benefit since it | |
has good optimized i87 asm (but slower than our code with FPC/Win32) | |
- FPC is more efficient when compiling integer arithmetic; we avoided slow | |
division by calling our Div100(), but Delphi Win64 is still far behind | |
- Delphi Win64 has very slow FloatToText and str() | |
} | |
// Controls printing of NaN-sign. | |
// Undefine to print NaN sign during float->ASCII conversion. | |
// IEEE does not interpret the sign of a NaN, so leave it defined. | |
{$define GRISU1_F2A_NAN_SIGNLESS} | |
// Controls rounding of generated digits when formatting with narrowed | |
// width (either fixed or exponential notation). | |
// Traditionally, FPC and BP7/Delphi use "roundTiesToAway" mode. | |
// Undefine to use "roundTiesToEven" approach. | |
{$define GRISU1_F2A_HALF_ROUNDUP} | |
// This one is a hack against Grusu sub-optimality. | |
// It may be used only strictly together with GRISU1_F2A_HALF_ROUNDUP. | |
// It does not violate most general rules due to the fact that it is | |
// applicable only when formatting with narrowed width, where the fine | |
// view is more desirable, and the precision is already lost, so it can | |
// be used in general-purpose applications. | |
// Refer to its implementation. | |
{$define GRISU1_F2A_AGRESSIVE_ROUNDUP} // Defining this fixes several tests. | |
// Undefine to enable SNaN support. | |
// Note: IEEE [754-2008, page 31] requires (1) to recognize "SNaN" during | |
// ASCII->float, and (2) to generate the "invalid FP operation" exception | |
// either when SNaN is printed as "NaN", or "SNaN" is evaluated to QNaN, | |
// so it would be preferable to undefine these settings, | |
// but the FPC RTL is not ready for this right now.. | |
{$define GRISU1_F2A_NO_SNAN} | |
/// If Value=0 would just store '0', whatever frac_digits is supplied. | |
{$define GRISU1_F2A_ZERONOFRACT} | |
{$ifndef FPC} | |
// those functions are intrinsics with FPC :) | |
function BSRdword(c: cardinal): cardinal; | |
asm | |
{$ifdef CPU64} | |
.noframe | |
mov eax, c | |
{$endif} | |
bsr eax, eax | |
end; // in our code below, we are sure that c<>0 | |
function BSRqword(const q: qword): cardinal; | |
asm | |
{$ifdef CPU32} | |
bsr eax, [esp + 8] | |
jz @1 | |
add eax, 32 | |
ret | |
@1: bsr eax, [esp + 4] | |
@2: {$else} | |
.noframe | |
mov rax, q | |
bsr rax, rax | |
{$endif} | |
end; // in our code below, we are sure that q<>0 | |
{$endif FPC} | |
const | |
// TFloatFormatProfile for double | |
nDig_mantissa = 17; | |
nDig_exp10 = 3; | |
type | |
// "Do-It-Yourself Floating Point" structures | |
TDIY_FP = record | |
f: qword; | |
e: integer; | |
end; | |
TDIY_FP_Power_of_10 = record | |
c: TDIY_FP; | |
e10: integer; | |
end; | |
PDIY_FP_Power_of_10 = ^TDIY_FP_Power_of_10; | |
const | |
ROUNDER = $80000000; | |
{$ifdef CPUINTEL} // our faster version using 128-bit x86_64 multiplication | |
procedure d2a_diy_fp_multiply(var x, y: TDIY_FP; normalize: boolean; | |
out result: TDIY_FP); {$ifdef HASINLINE} inline; {$endif} | |
var | |
p: THash128Rec; | |
begin | |
mul64x64(x.f, y.f, p); // fast x86_64 / i386 asm | |
if (p.c1 and ROUNDER) <> 0 then | |
inc(p.h); | |
result.f := p.h; | |
result.e := PtrInt(x.e) + PtrInt(y.e) + 64; | |
if normalize then | |
if (PQWordRec(@result.f)^.h and ROUNDER) = 0 then | |
begin | |
result.f := result.f * 2; | |
dec(result.e); | |
end; | |
end; | |
{$else} // regular Grisu method - optimized for 32-bit CPUs | |
procedure d2a_diy_fp_multiply(var x, y: TDIY_FP; normalize: boolean; out result: TDIY_FP); | |
var | |
_x: TQWordRec absolute x; | |
_y: TQWordRec absolute y; | |
r: TQWordRec absolute result; | |
ac, bc, ad, bd, t1: TQWordRec; | |
begin | |
ac.v := qword(_x.h) * _y.h; | |
bc.v := qword(_x.l) * _y.h; | |
ad.v := qword(_x.h) * _y.l; | |
bd.v := qword(_x.l) * _y.l; | |
t1.v := qword(ROUNDER) + bd.h + bc.l + ad.l; | |
result.f := ac.v + ad.h + bc.h + t1.h; | |
result.e := x.e + y.e + 64; | |
if normalize then | |
if (r.h and ROUNDER) = 0 then | |
begin | |
inc(result.f, result.f); | |
dec(result.e); | |
end; | |
end; | |
{$endif CPUINTEL} | |
const | |
// alpha =-61; gamma = 0 | |
// full cache: 1E-450 .. 1E+432, step = 1E+18 | |
// sparse = 1/10 | |
C_PWR10_DELTA = 18; | |
C_PWR10_COUNT = 50; | |
type | |
TDIY_FP_Cached_Power10 = record | |
base: array [ 0 .. 9 ] of TDIY_FP_Power_of_10; | |
factor_plus: array [ 0 .. 1 ] of TDIY_FP_Power_of_10; | |
factor_minus: array [ 0 .. 1 ] of TDIY_FP_Power_of_10; | |
// extra mantissa correction [ulp; signed] | |
corrector: array [ 0 .. C_PWR10_COUNT - 1 ] of shortint; | |
end; | |
const | |
CACHED_POWER10: TDIY_FP_Cached_Power10 = ( | |
base: ( | |
( c: ( f: qword($825ECC24C8737830); e: -362 ); e10: -90 ), | |
( c: ( f: qword($E2280B6C20DD5232); e: -303 ); e10: -72 ), | |
( c: ( f: qword($C428D05AA4751E4D); e: -243 ); e10: -54 ), | |
( c: ( f: qword($AA242499697392D3); e: -183 ); e10: -36 ), | |
( c: ( f: qword($9392EE8E921D5D07); e: -123 ); e10: -18 ), | |
( c: ( f: qword($8000000000000000); e: -63 ); e10: 0 ), | |
( c: ( f: qword($DE0B6B3A76400000); e: -4 ); e10: 18 ), | |
( c: ( f: qword($C097CE7BC90715B3); e: 56 ); e10: 36 ), | |
( c: ( f: qword($A70C3C40A64E6C52); e: 116 ); e10: 54 ), | |
( c: ( f: qword($90E40FBEEA1D3A4B); e: 176 ); e10: 72 ) | |
); | |
factor_plus: ( | |
( c: ( f: qword($F6C69A72A3989F5C); e: 534 ); e10: 180 ), | |
( c: ( f: qword($EDE24AE798EC8284); e: 1132 ); e10: 360 ) | |
); | |
factor_minus: ( | |
( c: ( f: qword($84C8D4DFD2C63F3B); e: -661 ); e10: -180 ), | |
( c: ( f: qword($89BF722840327F82); e: -1259 ); e10: -360 ) | |
); | |
corrector: ( | |
0, 0, 0, 0, 1, 0, 0, 0, 1, -1, | |
0, 1, 1, 1, -1, 0, 0, 1, 0, -1, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
-1, 0, 0, -1, 0, 0, 0, 0, 0, -1, | |
0, 0, 0, 0, 1, 0, 0, 0, -1, 0 | |
)); | |
CACHED_POWER10_MIN10 = -90 -360; | |
// = ref.base[low(ref.base)].e10 + ref.factor_minus[high(ref.factor_minus)].e10 | |
// return normalized correctly rounded approximation of the power of 10 | |
// scaling factor, intended to shift a binary exponent of the original number | |
// into selected [ alpha .. gamma ] range | |
procedure d2a_diy_fp_cached_power10(exp10: integer; out factor: TDIY_FP_Power_of_10); | |
var | |
i, xmul: integer; | |
A, B: PDIY_FP_Power_of_10; | |
cx: PtrInt; | |
ref: ^TDIY_FP_Cached_Power10; | |
begin | |
ref := @CACHED_POWER10; // much better code generation on PIC/x86_64 | |
// find non-sparse index | |
if exp10 <= CACHED_POWER10_MIN10 then | |
i := 0 | |
else | |
begin | |
i := (exp10 - CACHED_POWER10_MIN10) div C_PWR10_DELTA; | |
if i * C_PWR10_DELTA + CACHED_POWER10_MIN10 <> exp10 then | |
inc(i); // round-up | |
if i > C_PWR10_COUNT - 1 then | |
i := C_PWR10_COUNT - 1; | |
end; | |
// generate result | |
xmul := i div length(ref.base); | |
A := @ref.base[i - (xmul * length(ref.base))]; // fast mod | |
dec(xmul, length(ref.factor_minus)); | |
if xmul = 0 then | |
begin | |
// base | |
factor := A^; | |
exit; | |
end; | |
// surrogate | |
if xmul > 0 then | |
begin | |
dec(xmul); | |
B := @ref.factor_plus[xmul]; | |
end | |
else | |
begin | |
xmul := -(xmul + 1); | |
B := @ref.factor_minus[xmul]; | |
end; | |
factor.e10 := A.e10 + B.e10; | |
if A.e10 <> 0 then | |
begin | |
d2a_diy_fp_multiply(A.c, B.c, true, factor.c); | |
// adjust mantissa | |
cx := ref.corrector[i]; | |
if cx <> 0 then | |
inc(int64(factor.c.f), int64(cx)); | |
end | |
else | |
// exact | |
factor.c := B^.c; | |
end; | |
procedure d2a_unpack_float(const f: double; out minus: boolean; out result: TDIY_FP); | |
{$ifdef HASINLINE} inline;{$endif} | |
type | |
TSplitFloat = packed record | |
case byte of | |
0: (f: double); | |
1: (b: array[0..7] of byte); | |
2: (w: array[0..3] of word); | |
3: (d: array[0..1] of cardinal); | |
4: (l: qword); | |
end; | |
var | |
doublebits: TSplitFloat; | |
begin | |
{$ifdef FPC_DOUBLE_HILO_SWAPPED} | |
// high and low cardinal are swapped when using the arm fpa | |
doublebits.d[0] := TSplitFloat(f).d[1]; | |
doublebits.d[1] := TSplitFloat(f).d[0]; | |
{$else not FPC_DOUBLE_HILO_SWAPPED} | |
doublebits.f := f; | |
{$endif FPC_DOUBLE_HILO_SWAPPED} | |
{$ifdef endian_big} | |
minus := (doublebits.b[0] and $80 <> 0); | |
result.e := (doublebits.w[0] shr 4) and $7FF; | |
{$else endian_little} | |
minus := (doublebits.b[7] and $80 <> 0); | |
result.e := (doublebits.w[3] shr 4) and $7FF; | |
{$endif endian} | |
result.f := doublebits.l and $000FFFFFFFFFFFFF; | |
end; | |
const | |
C_FRAC2_BITS = 52; | |
C_EXP2_BIAS = 1023; | |
C_DIY_FP_Q = 64; | |
C_GRISU_ALPHA = -61; | |
C_GRISU_GAMMA = 0; | |
C_EXP2_SPECIAL = C_EXP2_BIAS * 2 + 1; | |
C_MANT2_INTEGER = qword(1) shl C_FRAC2_BITS; | |
type | |
TAsciiDigits = array[0..47] of byte; | |
PAsciiDigits = ^TAsciiDigits; | |
// convert unsigned integers into decimal digits | |
{$ifdef FPC_64} // leverage efficient FPC 64-bit division as mul reciprocal | |
function d2a_gen_digits_64(buf: PAsciiDigits; x: qword): PtrInt; | |
var | |
tab: PWordArray; | |
P: PAnsiChar; | |
c100: qword; | |
begin | |
tab := @TwoDigitByteLookupW; // 0..99 value -> two byte digits (0..9) | |
P := PAnsiChar(@buf[24]); // append backwards | |
repeat | |
if x >= 100 then | |
begin | |
dec(P, 2); | |
c100 := x div 100; | |
dec(x, c100 * 100); | |
PWord(P)^ := tab[x]; | |
if c100 = 0 then | |
break; | |
x := c100; | |
continue; | |
end; | |
if x < 10 then | |
begin | |
dec(P); | |
P^ := AnsiChar(x); | |
break; | |
end; | |
dec(P, 2); | |
PWord(P)^ := tab[x]; | |
break; | |
until false; | |
PQWordArray(buf)[0] := PQWordArray(P)[0]; // faster than MoveSmall(P,buf,result) | |
PQWordArray(buf)[1] := PQWordArray(P)[1]; | |
PQWordArray(buf)[2] := PQWordArray(P)[2]; | |
result := PAnsiChar(@buf[24]) - P; | |
end; | |
{$else not FPC_64} // use three 32-bit groups of digit | |
function d2a_gen_digits_32(buf: PAsciiDigits; x: dword; pad_9zero: boolean): PtrInt; | |
const | |
digits: array[0..9] of cardinal = ( | |
0, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000); | |
var | |
n: PtrInt; | |
m: cardinal; | |
{$ifdef FPC} | |
z: cardinal; | |
{$else} | |
d100: TDiv100Rec; | |
{$endif FPC} | |
tab: PWordArray; | |
begin | |
// Calculate amount of digits | |
if x = 0 then | |
n := 0 // emit nothing if padding is not required | |
else | |
begin | |
n := integer((BSRdword(x) + 1) * 1233) shr 12; | |
if x >= digits[n] then | |
inc(n); | |
end; | |
if pad_9zero and (n < 9) then | |
n := 9; | |
result := n; | |
if n = 0 then | |
exit; | |
// Emit digits | |
dec(PByte(buf)); | |
tab := @TwoDigitByteLookupW; | |
m := x; | |
while (n >= 2) and (m <> 0) do | |
begin | |
dec(n); | |
{$ifdef FPC} // FPC will use fast mul reciprocal | |
z := m div 100; // compute two 0..9 digits | |
PWord(@buf[n])^ := tab^[m - z * 100]; | |
m := z; | |
{$else} | |
Div100(m, d100); // our asm is faster than Delphi div operation | |
PWord(@buf[n])^ := tab^[d100.M]; | |
m := d100.D; | |
{$endif FPC} | |
dec(n); | |
end; | |
if n = 0 then | |
exit; | |
if m <> 0 then | |
begin | |
if m > 9 then | |
m := m mod 10; // compute last 0..9 digit | |
buf[n] := m; | |
dec(n); | |
if n = 0 then | |
exit; | |
end; | |
repeat | |
buf[n] := 0; // padding with 0 | |
dec(n); | |
until n = 0; | |
end; | |
function d2a_gen_digits_64(buf: PAsciiDigits; const x: qword): PtrInt; | |
var | |
n_digits: PtrInt; | |
temp: qword; | |
splitl, splitm, splith: cardinal; | |
begin | |
// Split X into 3 unsigned 32-bit integers; lower two should be < 10 digits long | |
n_digits := 0; | |
if x < 1000000000 then | |
splitl := x | |
else | |
begin | |
temp := x div 1000000000; | |
splitl := x - temp * 1000000000; | |
if temp < 1000000000 then | |
splitm := temp | |
else | |
begin | |
splith := temp div 1000000000; | |
splitm := cardinal(temp) - splith * 1000000000; | |
n_digits := d2a_gen_digits_32(buf, splith, false); // Generate hi digits | |
end; | |
inc(n_digits, d2a_gen_digits_32(@buf[n_digits], splitm, n_digits <> 0)); | |
end; | |
// Generate digits | |
inc(n_digits, d2a_gen_digits_32(@buf[n_digits], splitl, n_digits <> 0)); | |
result := n_digits; | |
end; | |
{$endif FPC_64} | |
// Performs digit sequence rounding, returns decimal point correction | |
function d2a_round_digits(var buf: TAsciiDigits; var n_current: integer; | |
n_max: PtrInt; half_round_to_even: boolean = true): PtrInt; | |
var | |
n: PtrInt; | |
dig_round, dig_sticky: byte; | |
{$ifdef GRISU1_F2A_AGRESSIVE_ROUNDUP} | |
i: PtrInt; | |
{$endif} | |
begin | |
result := 0; | |
n := n_current; | |
n_current := n_max; | |
// Get round digit | |
dig_round := buf[n_max]; | |
{$ifdef GRISU1_F2A_AGRESSIVE_ROUNDUP} | |
// Detect if rounding-up the second last digit turns the "dig_round" | |
// into "5"; also make sure we have at least 1 digit between "dig_round" | |
// and the second last. | |
if not half_round_to_even then | |
if (dig_round = 4) and (n_max < n - 3) then | |
if buf[n - 2] >= 8 then // somewhat arbitrary... | |
begin | |
// check for only "9" are in between | |
i := n - 2; | |
repeat | |
dec(i); | |
until (i = n_max) or (buf[i] <> 9); | |
if i = n_max then | |
// force round-up | |
dig_round := 9; // any value ">=5" | |
end; | |
{$endif GRISU1_F2A_AGRESSIVE_ROUNDUP} | |
if dig_round < 5 then | |
exit; | |
// Handle "round half to even" case | |
if (dig_round = 5) and half_round_to_even and | |
((n_max = 0) or (buf[n_max - 1] and 1 = 0)) then | |
begin | |
// even and a half: check if exactly the half | |
dig_sticky := 0; | |
while (n > n_max + 1) and (dig_sticky = 0) do | |
begin | |
dec(n); | |
dig_sticky := buf[n]; | |
end; | |
if dig_sticky = 0 then | |
exit; // exactly a half -> no rounding is required | |
end; | |
// Round-up | |
while n_max > 0 do | |
begin | |
dec(n_max); | |
inc(buf[n_max]); | |
if buf[n_max] < 10 then | |
begin | |
// no more overflow: stop now | |
n_current := n_max + 1; | |
exit; | |
end; | |
// continue rounding | |
end; | |
// Overflow out of the 1st digit, all n_max digits became 0 | |
buf[0] := 1; | |
n_current := 1; | |
result := 1; | |
end; | |
// format the number in the fixed-point representation | |
procedure d2a_return_fixed(str: PAnsiChar; minus: boolean; var digits: TAsciiDigits; | |
n_digits_have, fixed_dot_pos, frac_digits: integer); | |
var | |
p: PAnsiChar; | |
d: PByte; | |
cut_digits_at, n_before_dot, n_before_dot_pad0, n_after_dot_pad0, | |
n_after_dot, n_tail_pad0: integer; | |
begin | |
// Round digits if necessary | |
cut_digits_at := fixed_dot_pos + frac_digits; | |
if cut_digits_at < 0 then | |
// zero | |
n_digits_have := 0 | |
else if cut_digits_at < n_digits_have then | |
// round digits | |
inc(fixed_dot_pos, d2a_round_digits(digits, n_digits_have, cut_digits_at | |
{$ifdef GRISU1_F2A_HALF_ROUNDUP}, false {$endif} )); | |
// Before dot: digits, pad0 | |
if (fixed_dot_pos <= 0) or (n_digits_have = 0) then | |
begin | |
n_before_dot := 0; | |
n_before_dot_pad0 := 1; | |
end | |
else if fixed_dot_pos > n_digits_have then | |
begin | |
n_before_dot := n_digits_have; | |
n_before_dot_pad0 := fixed_dot_pos - n_digits_have; | |
end | |
else | |
begin | |
n_before_dot := fixed_dot_pos; | |
n_before_dot_pad0 := 0; | |
end; | |
// After dot: pad0, digits, pad0 | |
if fixed_dot_pos < 0 then | |
n_after_dot_pad0 := -fixed_dot_pos | |
else | |
n_after_dot_pad0 := 0; | |
if n_after_dot_pad0 > frac_digits then | |
n_after_dot_pad0 := frac_digits; | |
n_after_dot := n_digits_have - n_before_dot; | |
n_tail_pad0 := frac_digits - n_after_dot - n_after_dot_pad0; | |
p := str + 1; | |
// Sign | |
if minus then | |
begin | |
p^ := '-'; | |
inc(p); | |
end; | |
// Integer significant digits | |
d := @digits; | |
if n_before_dot > 0 then | |
repeat | |
p^ := AnsiChar(d^ + ord('0')); | |
inc(p); | |
inc(d); | |
dec(n_before_dot); | |
until n_before_dot = 0; | |
// Integer 0-padding | |
if n_before_dot_pad0 > 0 then | |
repeat | |
p^ := '0'; | |
inc(p); | |
dec(n_before_dot_pad0); | |
until n_before_dot_pad0 = 0; | |
// | |
if frac_digits <> 0 then | |
begin | |
// Dot | |
p^ := '.'; | |
inc(p); | |
// Pre-fraction 0-padding | |
if n_after_dot_pad0 > 0 then | |
repeat | |
p^ := '0'; | |
inc(p); | |
dec(n_after_dot_pad0); | |
until n_after_dot_pad0 = 0; | |
// Fraction significant digits | |
if n_after_dot > 0 then | |
repeat | |
p^ := AnsiChar(d^ + ord('0')); | |
inc(p); | |
inc(d); | |
dec(n_after_dot); | |
until n_after_dot = 0; | |
// Tail 0-padding | |
if n_tail_pad0 > 0 then | |
repeat | |
p^ := '0'; | |
inc(p); | |
dec(n_tail_pad0); | |
until n_tail_pad0 = 0; | |
end; | |
// Store length | |
str[0] := AnsiChar(p - str - 1); | |
end; | |
// formats the number as exponential representation | |
procedure d2a_return_exponential(str: PAnsiChar; minus: boolean; | |
digits: PByte; n_digits_have, n_digits_req, d_exp: PtrInt); | |
var | |
p, exp: PAnsiChar; | |
begin | |
p := str + 1; | |
// Sign | |
if minus then | |
begin | |
p^ := '-'; | |
inc(p); | |
end; | |
// Integer part | |
if n_digits_have > 0 then | |
begin | |
p^ := AnsiChar(digits^ + ord('0')); | |
dec(n_digits_have); | |
end | |
else | |
p^ := '0'; | |
inc(p); | |
// Dot | |
if n_digits_req > 1 then | |
begin | |
p^ := '.'; | |
inc(p); | |
end; | |
// Fraction significant digits | |
if n_digits_req < n_digits_have then | |
n_digits_have := n_digits_req; | |
if n_digits_have > 0 then | |
begin | |
repeat | |
inc(digits); | |
p^ := AnsiChar(digits^ + ord('0')); | |
inc(p); | |
dec(n_digits_have); | |
until n_digits_have = 0; | |
while p[-1] = '0' do | |
dec(p); // trim #.###00000 -> #.### | |
if p[-1] = '.' then | |
dec(p); // #.0 -> # | |
end; | |
// Exponent designator | |
p^ := 'E'; | |
inc(p); | |
// Exponent sign (+ is not stored, as in Delphi) | |
if d_exp < 0 then | |
begin | |
p^ := '-'; | |
d_exp := -d_exp; | |
inc(p); | |
end; | |
// Exponent digits | |
exp := pointer(SmallUInt32UTF8[d_exp]); // 0..999 range is fine | |
PCardinal(p)^ := PCardinal(exp)^; | |
inc(p, PStrLen(exp - _STRLEN)^); | |
// Store length | |
str[0] := AnsiChar(p - str - 1); | |
end; | |
/// set one of special results with proper sign | |
procedure d2a_return_special(str: PAnsiChar; sign: integer; const spec: shortstring); | |
begin | |
// Compute length | |
str[0] := spec[0]; | |
if sign <> 0 then | |
inc(str[0]); | |
inc(str); | |
// Sign | |
if sign <> 0 then | |
begin | |
if sign > 0 then | |
str^ := '+' | |
else | |
str^ := '-'; | |
inc(str); | |
end; | |
// Special text (3 chars) | |
PCardinal(str)^ := PCardinal(@spec[1])^; | |
end; | |
// Calculates the exp10 of a factor required to bring the binary exponent | |
// of the original number into selected [ alpha .. gamma ] range: | |
// result := ceiling[ ( alpha - e ) * log10(2) ] | |
function d2a_k_comp(e, alpha{, gamma}: integer): integer; | |
var | |
dexp: double; | |
const | |
D_LOG10_2: double = 0.301029995663981195213738894724493027; // log10(2) | |
var | |
x, n: integer; | |
begin | |
x := alpha - e; | |
dexp := x * D_LOG10_2; | |
// ceil( dexp ) | |
n := trunc(dexp); | |
if x > 0 then | |
if dexp <> n then | |
inc(n); // round-up | |
result := n; | |
end; | |
/// raw function to convert a 64-bit double into a shortstring, stored in str | |
// - implements Fabian Loitsch's Grisu algorithm dedicated to double values | |
// - currently, SynCommnons only set min_width=0 (for DoubleToShortNoExp to avoid | |
// any scientific notation ) or min_width=C_NO_MIN_WIDTH (for DoubleToShort to | |
// force the scientific notation when the double cannot be represented as | |
// a simple fractinal number) | |
procedure DoubleToAscii(min_width, frac_digits: integer; const v: double; str: PAnsiChar); | |
var | |
w, D: TDIY_FP; | |
c_mk: TDIY_FP_Power_of_10; | |
n, mk, dot_pos, n_digits_need, n_digits_have: integer; | |
n_digits_req, n_digits_sci: integer; | |
minus: boolean; | |
fl, one_maskl: qword; | |
one_e: integer; | |
{$ifdef CPU32} | |
one_mask, f: cardinal; // run a 2nd loop with 32-bit range | |
{$endif CPU32} | |
buf: TAsciiDigits; | |
begin | |
// Limit parameters | |
if frac_digits > 216 then | |
frac_digits := 216; // Delphi compatible | |
if min_width <= C_NO_MIN_WIDTH then | |
min_width := -1 // no minimal width | |
else if min_width < 0 then | |
min_width := 0; // minimal width is as short as possible | |
// Format profile: select "n_digits_need" (and "n_digits_exp") | |
n_digits_req := nDig_mantissa; | |
// number of digits to be calculated by Grisu | |
n_digits_need := nDig_mantissa; | |
if n_digits_req < n_digits_need then | |
n_digits_need := n_digits_req; | |
// number of mantissa digits to be printed in exponential notation | |
if min_width < 0 then | |
n_digits_sci := n_digits_req | |
else | |
begin | |
n_digits_sci := min_width -1 {sign} -1 {dot} -1 {E} -1 {E-sign} - nDig_exp10; | |
if n_digits_sci < 2 then | |
n_digits_sci := 2; // at least 2 digits | |
if n_digits_sci > n_digits_req then | |
n_digits_sci := n_digits_req; // at most requested by real_type | |
end; | |
// Float -> DIY_FP | |
d2a_unpack_float(v, minus, w); | |
// Handle Zero | |
if (w.e = 0) and (w.f = 0) then | |
begin | |
{$ifdef GRISU1_F2A_ZERONOFRACT} | |
PWord(str)^ := 1 + ord('0') shl 8; // just return '0' | |
{$else} | |
if frac_digits >= 0 then | |
d2a_return_fixed(str, minus, buf, 0, 1, frac_digits) | |
else | |
d2a_return_exponential(str, minus, @buf, 0, n_digits_sci, 0); | |
{$endif GRISU1_F2A_ZERONOFRACT} | |
exit; | |
end; | |
// Handle specials | |
if w.e = C_EXP2_SPECIAL then | |
begin | |
n := 1 - ord(minus) * 2; // default special sign [-1|+1] | |
if w.f = 0 then | |
d2a_return_special(str, n, C_STR_INF) | |
else | |
begin | |
// NaN [also pseudo-NaN, pseudo-Inf, non-normal for floatx80] | |
{$ifdef GRISU1_F2A_NAN_SIGNLESS} | |
n := 0; | |
{$endif} | |
{$ifndef GRISU1_F2A_NO_SNAN} | |
if (w.f and (C_MANT2_INTEGER shr 1)) = 0 then | |
return_special(str, n, C_STR_SNAN) | |
else | |
{$endif GRISU1_F2A_NO_SNAN} | |
d2a_return_special(str, n, C_STR_QNAN); | |
end; | |
exit; | |
end; | |
// Handle denormals | |
if w.e <> 0 then | |
begin | |
// normal | |
w.f := w.f or C_MANT2_INTEGER; | |
n := C_DIY_FP_Q - C_FRAC2_BITS - 1; | |
end | |
else | |
begin | |
// denormal (w.e=0) | |
n := 63 - BSRqword(w.f); // we are sure that w.f<>0 - see Handle Zero above | |
inc(w.e); | |
end; | |
// Final normalization | |
w.f := w.f shl n; | |
dec(w.e, C_EXP2_BIAS + n + C_FRAC2_BITS); | |
// 1. Find the normalized "c_mk = f_c * 2^e_c" such that | |
// "alpha <= e_c + e_w + q <= gamma" | |
// 2. Define "V = D * 10^k": multiply the input number by "c_mk", do not | |
// normalize to land into [ alpha .. gamma ] | |
// 3. Generate digits ( n_digits_need + "round" ) | |
if (C_GRISU_ALPHA <= w.e) and (w.e <= C_GRISU_GAMMA) then | |
begin | |
// no scaling required | |
D := w; | |
c_mk.e10 := 0; | |
end | |
else | |
begin | |
mk := d2a_k_comp(w.e, C_GRISU_ALPHA{, C_GRISU_GAMMA} ); | |
d2a_diy_fp_cached_power10(mk, c_mk); | |
// Let "D = f_D * 2^e_D := w (*) c_mk" | |
if c_mk.e10 = 0 then | |
D := w | |
else | |
d2a_diy_fp_multiply(w, c_mk.c, false, D); | |
end; | |
// Generate digits: integer part | |
n_digits_have := d2a_gen_digits_64(@buf, D.f shr (-D.e)); | |
dot_pos := n_digits_have; | |
// Generate digits: fractional part | |
{$ifdef CPU32} | |
f := 0; // "sticky" digit | |
{$endif CPU32} | |
if D.e < 0 then | |
repeat | |
// MOD by ONE | |
one_e := D.e; | |
one_maskl := qword(1) shl (-D.e) - 1; | |
fl := D.f and one_maskl; | |
// 64-bit loop (very efficient on x86_64, slower on i386) | |
while {$ifdef CPU32} (one_e < -29) and {$endif} | |
(n_digits_have < n_digits_need + 1) and (fl <> 0) do | |
begin | |
// f := f * 5; | |
inc(fl, fl shl 2); | |
// one := one / 2 | |
one_maskl := one_maskl shr 1; | |
inc(one_e); | |
// DIV by one | |
buf[n_digits_have] := fl shr (-one_e); | |
// MOD by one | |
fl := fl and one_maskl; | |
// next | |
inc(n_digits_have); | |
end; | |
{$ifdef CPU32} | |
if n_digits_have >= n_digits_need + 1 then | |
begin | |
// only "sticky" digit remains | |
f := ord(fl <> 0); | |
break; | |
end; | |
one_mask := cardinal(one_maskl); | |
f := cardinal(fl); | |
// 32-bit loop | |
while (n_digits_have < n_digits_need + 1) and (f <> 0) do | |
begin | |
// f := f * 5; | |
inc(f, f shl 2); | |
// one := one / 2 | |
one_mask := one_mask shr 1; | |
inc(one_e); | |
// DIV by one | |
buf[n_digits_have] := f shr (-one_e); | |
// MOD by one | |
f := f and one_mask; | |
// next | |
inc(n_digits_have); | |
end; | |
{$endif CPU32} | |
until true; | |
{$ifdef CPU32} | |
// Append "sticky" digit if any | |
if (f <> 0) and (n_digits_have >= n_digits_need + 1) then | |
begin | |
// single "<>0" digit is enough | |
n_digits_have := n_digits_need + 2; | |
buf[n_digits_need + 1] := 1; | |
end; | |
{$endif CPU32} | |
// Round to n_digits_need using "roundTiesToEven" | |
if n_digits_have > n_digits_need then | |
inc(dot_pos, d2a_round_digits(buf, n_digits_have, n_digits_need)); | |
// Generate output | |
if frac_digits >= 0 then | |
begin | |
d2a_return_fixed(str, minus, buf, n_digits_have, dot_pos - c_mk.e10, | |
frac_digits); | |
exit; | |
end; | |
if n_digits_have > n_digits_sci then | |
inc(dot_pos, d2a_round_digits(buf, n_digits_have, n_digits_sci | |
{$ifdef GRISU1_F2A_HALF_ROUNDUP}, false {$endif} )); | |
d2a_return_exponential(str, minus, @buf, n_digits_have, n_digits_sci, | |
dot_pos - c_mk.e10 - 1); | |
end; | |