Merge pull request #61 from milankl/mk/float64sr

Float64sr via Double64
milankl · Jul 11, 2023 · cd05692 · cd05692
2 parents 97bdf0d + 1e7fae5
commit cd05692
Show file tree

Hide file tree

Showing 5 changed files with 197 additions and 5 deletions.
diff --git a/Project.toml b/Project.toml
@@ -5,11 +5,13 @@ version = "0.6.3"
 
 [deps]
 BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
+DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78"
 RandomNumbers = "e6cf234a-135c-5ec9-84dd-332b85af5143"
 
 [compat]
 BFloat16s = "0.1, 0.2"
-RandomNumbers = "1.4,1.5"
+RandomNumbers = "^1.4"
+DoubleFloats = "1"
 julia = "1"
 
 [extras]

diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@
 [![CI](https://github.com/milankl/StochasticRounding.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/milankl/StochasticRounding.jl/actions/workflows/CI.yml)   
 Stochastic rounding for floating-point arithmetic.
 
-This package exports `Float32sr`,`Float16sr`, and `BFloat16sr`, three number formats that behave
+This package exports `Float64sr`, Float32sr`,`Float16sr`, and `BFloat16sr`, three number formats that behave
 like their deterministic counterparts but with stochastic rounding that is proportional to the
 distance of the next representable numbers and therefore
 [exact in expectation](https://en.wikipedia.org/wiki/Rounding#Stochastic_rounding)
@@ -22,6 +22,12 @@ slower, but e.g. Float32+stochastic rounding is only about 2x slower than Float6
 a random number generator from the [Xorshift family](https://en.wikipedia.org/wiki/Xorshift), is used through the 
 [RandomNumbers.jl](https://github.com/sunoru/RandomNumbers.jl) package, due to its speed and statistical properties.
 
+Ever format of `Float64sr`, Float32sr`,`Float16sr`, and `BFloat16sr` uses a higher precision format
+to obtain the "exact" arithmetic result which is then stochastically rounded to the respective
+lower precision format. `Float16sr` and `BFloat16sr` use `Float32` for this,
+`Float32sr` uses `Float64`, and `Float64sr` uses `Double64` from
+[DoubleFloats.jl](https://github.com/JuliaMath/DoubleFloats.jl)
+
 You are welcome to raise [issues](https://github.com/milankl/StochasticRounding.jl/issues),
 ask questions or suggest any changes or new features.
 
@@ -108,9 +114,10 @@ And similarly for the other number types. Then with Julia 1.6 on an Intel(R) Cor
 | rounding mode         | Float64    | Float32    | Float16   | BFloat16    |
 | --------------------- | ---------- | ---------- | --------- | ----------- |
 | round to nearest      | 1132 μs    |  452 μs    | 1588 μs   |  315 μs     |
-| stochastic rounding   | n/a        | 2650 μs    | 3310 μs   | 1850 μs     |
+| stochastic rounding   | 11,368 μs  | 2650 μs    | 3310 μs   | 1850 μs     |
 
-Stochastic rounding imposes an about x5 performance decrease for Float32 and BFloat16, but only x2 for Float16.
+Stochastic rounding imposes an about x5 performance decrease for Float32 and BFloat16, but only x2 for Float16,
+however, 10x for Float64 due to the use of Double64.
 For more complicated benchmarks the performance decrease is usually within x10.
 About 50% of the time is spend on the random number generation with Xoroshiro128+.
 

diff --git a/src/StochasticRounding.jl b/src/StochasticRounding.jl
@@ -5,14 +5,20 @@ module StochasticRounding
         Float16sr,Float16_stochastic_round,             # Float16 + SR
         Float16_chance_roundup,NaN16sr,Inf16sr,
         Float32sr,Float32_stochastic_round,             # Float32 + SR
-        Float32_chance_roundup,NaN32sr,Inf32sr
+        Float32_chance_roundup,NaN32sr,Inf32sr,
+        Float64sr,Float64_stochastic_round,             # Float64 + SR
+        Float64_chance_roundup,
+        NaNsr,Infsr
 
+    # use BFloat16 from BFloat16s.jl
     import BFloat16s: BFloat16
 
     # faster random number generator
     import RandomNumbers.Xorshifts.Xoroshiro128Plus
     const Xor128 = Ref{Xoroshiro128Plus}(Xoroshiro128Plus())
 
+    import DoubleFloats: DoubleFloats, Double64
+
     """Reseed the PRNG randomly by recalling."""
     function __init__()
         Xor128[] = Xoroshiro128Plus()
@@ -27,5 +33,7 @@ module StochasticRounding
     include("bfloat16sr.jl")
     include("float16sr.jl")
     include("float32sr.jl")
+    include("float64sr.jl")
     include("conversions.jl")
+
 end
diff --git a/src/float64sr.jl b/src/float64sr.jl
@@ -0,0 +1,129 @@
+"""The Float64 + stochastic rounding type."""
+primitive type Float64sr <: AbstractFloat 64 end
+
+# basic properties
+Base.sign_mask(::Type{Float64sr}) = Base.sign_mask(Float64)
+Base.exponent_mask(::Type{Float64sr}) = Base.exponent_mask(Float64)
+Base.significand_mask(::Type{Float64sr}) = Base.significand_mask(Float64)
+Base.precision(::Type{Float64sr}) = Base.precision(Float64)
+
+Base.one(::Type{Float64sr}) = reinterpret(Float64sr,one(Float64))
+Base.zero(::Type{Float64sr}) = reinterpret(Float64sr,zero(Float64))
+Base.one(::Float64sr) = one(Float64sr)
+Base.zero(::Float64sr) = zero(Float64sr)
+
+Base.typemin(::Type{Float64sr}) = Float64sr(typemin(Float64))
+Base.typemax(::Type{Float64sr}) = Float64sr(typemax(Float64))
+Base.floatmin(::Type{Float64sr}) = Float64sr(floatmin(Float64))
+Base.floatmax(::Type{Float64sr}) = Float64sr(floatmax(Float64))
+Base.maxintfloat(::Type{Float64sr}) = Float64sr(maxintfloat(Float64))
+
+Base.typemin(::Float64sr) = typemin(Float64sr)
+Base.typemax(::Float64sr) = typemax(Float64sr)
+Base.floatmin(::Float64sr) = floatmin(Float64sr)
+Base.floatmax(::Float64sr) = floatmax(Float64sr)
+
+Base.eps(::Type{Float64sr}) = Float64sr(eps(Float64))
+Base.eps(x::Float64sr) = Float64sr(eps(Float64(x)))
+
+const Infsr = reinterpret(Float64sr, Inf)
+const NaNsr = reinterpret(Float64sr, NaN)
+
+# basic operations
+Base.abs(x::Float64sr) = reinterpret(Float64sr, abs(reinterpret(Float64,x)))
+Base.isnan(x::Float64sr) = isnan(reinterpret(Float64,x))
+Base.isfinite(x::Float64sr) = isfinite(reinterpret(Float64,x))
+
+Base.uinttype(::Type{Float64sr}) = UInt64
+Base.nextfloat(x::Float64sr) = Float64sr(nextfloat(Float64(x)))
+Base.prevfloat(x::Float64sr) = Float64sr(prevfloat(Float64(x)))
+
+Base.:(-)(x::Float64sr) = reinterpret(Float64sr, reinterpret(UInt64, x) ⊻ Base.sign_mask(Float64sr))
+
+# conversions
+Base.Float64(x::Float64sr) = reinterpret(Float64,x)
+Float64sr(x::Float64) = reinterpret(Float64sr,x)
+Float64sr(x::Float16) = Float64sr(Float64(x))
+Float64sr(x::Float32) = Float64sr(Float64(x))
+Base.Float16(x::Float64sr) = Float16(Float64(x))
+Base.Float32(x::Float64sr) = Float32(Float64(x))
+Float64sr(x::Irrational) = Float64sr(Float64(x))
+
+DoubleFloats.Double64(x::Float64sr) = Double64(Float64(x))
+Float64sr(x::Double64) = Float64sr(Float64(x))
+
+Float64sr(x::Integer) = Float64sr(Float64(x))
+(::Type{T})(x::Float64sr) where {T<:Integer} = T(Float64(x))
+
+"""Stochastically round x::Double64 to Float64 with distance-proportional probabilities."""
+function Float64_stochastic_round(x::Double64)
+    rbits = rand(Xor128[],UInt64)   # create random bits
+
+    # create [1,2)-1.5 = [-0.5,0.5)
+    r = reinterpret(Float64,reinterpret(UInt64,one(Float64)) + (rbits >> 12)) - 1.5
+    a = x.hi    # the more significant float64 in x
+    b = x.lo    # the less significant float64 in x
+    u = eps(a)  # = ulp
+
+    return Float64sr(a + (b+u*r))    # (b+u*r) first as a+b would be rounded to a
+end
+
+function Float64_chance_roundup(x::Real)
+    xround = Float64(x)
+    xround == x && return zero(Float64)
+    xround_down, xround_up = xround < x ? (xround,nextfloat(xround)) :
+        (prevfloat(xround),xround)
+
+    return Float64((x-xround_down)/(xround_up-xround_down))
+end
+
+Base.promote_rule(::Type{Float16}, ::Type{Float64sr}) = Float64sr
+Base.promote_rule(::Type{Float32}, ::Type{Float64sr}) = Float64sr
+Base.promote_rule(::Type{Float64}, ::Type{Float64sr}) = Float64sr
+
+for t in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128)
+    @eval Base.promote_rule(::Type{Float64sr}, ::Type{$t}) = Float64sr
+end
+
+# Rounding
+Base.round(x::Float64sr, r::RoundingMode{:ToZero}) = Float64sr(round(Float64(x), r))
+Base.round(x::Float64sr, r::RoundingMode{:Down}) = Float64sr(round(Float64(x), r))
+Base.round(x::Float64sr, r::RoundingMode{:Up}) = Float64sr(round(Float64(x), r))
+Base.round(x::Float64sr, r::RoundingMode{:Nearest}) = Float64sr(round(Float64(x), r))
+
+# Comparison
+for op in (:(==), :<, :<=, :isless)
+    @eval Base.$op(a::Float64sr, b::Float64sr) = ($op)(Float64(a), Float64(b))
+end
+
+# Arithmetic
+for f in (:+, :-, :*, :/, :^, :mod)
+    @eval Base.$f(x::Float64sr, y::Float64sr) = Float64_stochastic_round($(f)(Double64(x), Double64(y)))
+end
+
+for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh,
+             :atanh,:exp,:exp2,:exp10,:expm1,:log,:log2,:log10,:sqrt,:cbrt,:log1p)
+    @eval begin
+        Base.$func(a::Float64sr) = Float64_stochastic_round($func(Double64(a)))
+    end
+end
+
+for func in (:atan,:hypot)
+    @eval begin
+        Base.$func(a::Float64sr,b::Float64sr) = Float64_stochastic_round($func(Double64(a),Double64(b)))
+    end
+end
+
+
+# Showing
+Base.show(io::IO, x::Float64sr) = show(io,Float64(x))
+Base.bitstring(x::Float64sr) = bitstring(reinterpret(Float64,x))
+
+function Base.bitstring(x::Float64sr,mode::Symbol)
+    if mode == :split	# split into sign, exponent, signficand
+        s = bitstring(x)
+        return "$(s[1]) $(s[2:12]) $(s[13:end])"
+    else
+        return bitstring(x)
+    end
+end
diff --git a/test/float64sr.jl b/test/float64sr.jl
@@ -0,0 +1,46 @@
+using DoubleFloats
+
+function test_chances_round(f128::Double64;N::Int=100_000)
+    p = Float64_chance_roundup(f128)
+
+    f64_round = Float64sr(f128)
+    if Double64(f64_round) == f128
+        f64_roundup = f64_round
+        f64_rounddown = f64_round
+    elseif Double64(f64_round) < f128
+        f64_rounddown = f64_round
+        f64_roundup = nextfloat(f64_round)
+    else
+        f64_roundup = f64_round
+        f64_rounddown = prevfloat(f64_round)
+    end
+
+    Ndown = 0
+    Nup = 0 
+    for _ in 1:N
+        f64 = Float64_stochastic_round(f128)
+        if f64 == f64_rounddown
+            Ndown += 1
+        elseif f64 == f64_roundup
+            Nup += 1
+        end
+    end
+
+    test1 = Ndown + Nup == N
+    test2 = isapprox(Ndown/N,1-p,atol=1e-2)
+    test3 = isapprox(Nup/N,p,atol=1e-2)
+
+    return test1 && test2 && test3
+end
+
+@testset "Test for N(0,1)" begin
+    for x in randn(Double64,10_000)
+        @test test_chances_round(x)
+    end
+end
+
+@testset "Test for U(0,1)" begin
+    for x in rand(Double64,10_000)
+        @test test_chances_round(x)
+    end
+end