From e61394139668221132e455942e237318bca45ff4 Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 21 Dec 2023 19:47:28 +0100 Subject: [PATCH 01/23] Complete restructure --- src/StochasticRounding.jl | 23 +--- src/bfloat16sr.jl | 191 ---------------------------- src/conversions.jl | 11 -- src/float16sr.jl | 186 --------------------------- src/float32sr.jl | 185 --------------------------- src/float64sr.jl | 136 -------------------- src/general.jl | 154 +++++++++++++++++++--- src/{promotion.jl => promotions.jl} | 6 +- src/types.jl | 124 ++++++++++++++++++ test/bfloat16sr.jl | 55 ++++---- test/differential_equations.jl | 2 +- test/runtests.jl | 1 - test/seeding.jl | 40 ++---- 13 files changed, 310 insertions(+), 804 deletions(-) delete mode 100644 src/bfloat16sr.jl delete mode 100644 src/conversions.jl delete mode 100644 src/float16sr.jl delete mode 100644 src/float32sr.jl delete mode 100644 src/float64sr.jl rename src/{promotion.jl => promotions.jl} (54%) create mode 100644 src/types.jl diff --git a/src/StochasticRounding.jl b/src/StochasticRounding.jl index 471f011..53f6343 100644 --- a/src/StochasticRounding.jl +++ b/src/StochasticRounding.jl @@ -1,15 +1,5 @@ module StochasticRounding - export BFloat16sr,BFloat16_stochastic_round, # BFloat16 + SR - BFloat16_chance_roundup,NaNB16sr,InfB16sr, - Float16sr,Float16_stochastic_round, # Float16 + SR - Float16_chance_roundup,NaN16sr,Inf16sr, - Float32sr,Float32_stochastic_round, # Float32 + SR - Float32_chance_roundup,NaN32sr,Inf32sr, - Float64sr,Float64_stochastic_round, # Float64 + SR - Float64_chance_roundup, - NaNsr,Infsr - # use BFloat16 from BFloat16s.jl import BFloat16s: BFloat16 @@ -30,12 +20,11 @@ module StochasticRounding return nothing end - include("bfloat16sr.jl") - include("float16sr.jl") - include("float32sr.jl") - include("float64sr.jl") + # define abstract type + export AbstractStochasticFloat + abstract type AbstractStochasticFloat <: AbstractFloat end - include("general.jl") - include("promotion.jl") - include("conversions.jl") + include("types.jl") # define concrete types + include("promotions.jl") # their promotions + include("general.jl") # and general functions end diff --git a/src/bfloat16sr.jl b/src/bfloat16sr.jl deleted file mode 100644 index 0670d73..0000000 --- a/src/bfloat16sr.jl +++ /dev/null @@ -1,191 +0,0 @@ -"""The BFloat16 + stochastic rounding type.""" -primitive type BFloat16sr <: AbstractFloat 16 end - -Base.sign_mask(::Type{BFloat16sr}) = 0x8000 -Base.exponent_mask(::Type{BFloat16sr}) = 0x7f80 -Base.significand_mask(::Type{BFloat16sr}) = 0x007f - -Base.iszero(x::BFloat16sr) = iszero(Float32(x)) -Base.isfinite(x::BFloat16sr) = isfinite(Float32(x)) -Base.isnan(x::BFloat16sr) = isnan(Float32(x)) - -Base.precision(::Type{BFloat16sr}) = 8 -Base.one(::Type{BFloat16sr}) = reinterpret(BFloat16sr,0x3f80) -Base.zero(::Type{BFloat16sr}) = reinterpret(BFloat16sr,0x0000) -Base.one(::BFloat16sr) = one(BFloat16sr) -Base.zero(::BFloat16sr) = zero(BFloat16sr) -Base.rand(::Type{BFloat16sr}) = reinterpret(BFloat16sr,rand(BFloat16)) -Base.randn(::Type{BFloat16sr}) = reinterpret(BFloat16sr,randn(BFloat16)) - -const InfB16sr = reinterpret(BFloat16sr, 0x7f80) -const NaNB16sr = reinterpret(BFloat16sr, 0x7fc0) - -Base.typemin(::Type{BFloat16sr}) = -InfB16sr -Base.typemax(::Type{BFloat16sr}) = InfB16sr -Base.floatmin(::Type{BFloat16sr}) = reinterpret(BFloat16sr,0x0080) -Base.floatmax(::Type{BFloat16sr}) = reinterpret(BFloat16sr,0x7f7f) -minpos(::Type{BFloat16sr}) = reinterpret(BFloat16sr,0x0001) -Base.maxintfloat(::Type{BFloat16sr}) = BFloat16sr(maxintfloat(BFloat16)) - -Base.typemin(::BFloat16sr) = typemin(BFloat16sr) -Base.typemax(::BFloat16sr) = typemax(BFloat16sr) -Base.floatmin(::BFloat16sr) = floatmin(BFloat16sr) -Base.floatmax(::BFloat16sr) = floatmax(BFloat16sr) -minpos(::BFloat16sr) = minpos(BFloat16sr) - -# Truncation from Float32 -Base.uinttype(::Type{BFloat16sr}) = UInt16 -Base.trunc(::Type{BFloat16sr}, x::Float32) = reinterpret(BFloat16sr, - (reinterpret(UInt32, x) >> 16) % UInt16) - -# same for BFloat16sr, but do not apply stochastic rounding to avoid InexactError -Base.round(x::BFloat16sr, r::RoundingMode{:Up}) = BFloat16sr(ceil(Float32(x))) -Base.round(x::BFloat16sr, r::RoundingMode{:Down}) = BFloat16sr(floor(Float32(x))) -Base.round(x::BFloat16sr, r::RoundingMode{:Nearest}) = BFloat16sr(round(Float32(x))) - -# Conversions -"""Convert BFloat16sr to Float32 by padding trailing zeros.""" -Base.Float32(x::BFloat16sr) = reinterpret(Float32, UInt32(reinterpret(UInt16, x)) << 16) - -BFloat16sr(x::Float64) = BFloat16sr(Float32(x)) -BFloat16sr(x::Float16) = BFloat16sr(Float32(x)) -BFloat16sr(x::Integer) = BFloat16sr(Float32(x)) - -Base.Float64(x::BFloat16sr) = Float64(Float32(x)) -Base.Float16(x::BFloat16sr) = Float16(Float32(x)) -BFloat16sr(x::Irrational) = reinterpret(BFloat16sr,BFloat16(x)) - -# conversion between BFloat16 and BFloat16sr -BFloat16(x::BFloat16sr) = reinterpret(BFloat16,x) -BFloat16sr(x::BFloat16) = reinterpret(BFloat16sr,x) - -# conversion to integer -(::Type{T})(x::BFloat16sr) where {T<:Integer} = T(Float32(x)) - -"""Convert to BFloat16sr from Float32 via round-to-nearest -and tie to even. Identical to BFloat16(::Float32).""" -function BFloat16sr(x::Float32) - isnan(x) && return NaNB16sr - h = reinterpret(UInt32, x) - h += 0x7fff + ((h >> 16) & 1) - return reinterpret(BFloat16sr, (h >> 16) % UInt16) -end - -"""Stochastically round x::Float32 to BFloat16 with distance-proportional probabilities.""" -function BFloat16_stochastic_round(x::Float32) - ui = reinterpret(UInt32,x) - ui += rand(Xor128[],UInt16) # add stochastic perturbation in [0,u) - return reinterpret(BFloat16sr,(ui >> 16) % UInt16) # round to zero and convert to BFloat16 -end - -"""Chance that x::Float32 is round up when converted to BFloat16sr.""" -function BFloat16_chance_roundup(x::Float32) - xround = Float32(BFloat16(x)) - xround == x && return zero(Float32) - xround_down, xround_up = xround < x ? (xround,Float32(nextfloat(BFloat16sr(xround)))) : - (Float32(prevfloat(BFloat16sr(xround))),xround) - - return (x-xround_down)/(xround_up-xround_down) -end - -# Basic arithmetic -for f in (:+, :-, :*, :/, :^, :mod) - @eval Base.$f(x::BFloat16sr, y::BFloat16sr) = BFloat16_stochastic_round($(f)(Float32(x), Float32(y))) -end - -# negation via signbit flip -Base.:(-)(x::BFloat16sr) = reinterpret(BFloat16sr, reinterpret(UInt16, x) ⊻ 0x8000) - -# absolute value by setting the signbit to zero -Base.abs(x::BFloat16sr) = reinterpret(BFloat16sr, reinterpret(UInt16, x) & 0x7fff) - -for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh, - :atanh,:exp,:exp2,:exp10,:expm1,:log,:log2,:log10,:sqrt,:cbrt,:log1p) - @eval begin - Base.$func(a::BFloat16sr) = BFloat16_stochastic_round($func(Float32(a))) - end -end - -function Base.sincos(x::BFloat16sr) - s,c = sincos(Float32(x)) - return (BFloat16_stochastic_round(s),BFloat16_stochastic_round(c)) -end - -for func in (:atan,:hypot) - @eval begin - Base.$func(a::BFloat16sr,b::BFloat16sr) = BFloat16_stochastic_round($func(Float32(a),Float32(b))) - end -end - -# Floating point comparison -function Base.:(==)(x::BFloat16sr, y::BFloat16sr) - return BFloat16(x) == BFloat16(y) -end - -for op in (:<, :<=, :isless) - @eval Base.$op(a::BFloat16sr, b::BFloat16sr) = ($op)(Float32(a), Float32(b)) -end - -# Promotion, always to the deterministic format that contains both -Base.promote_rule(::Type{Float16}, ::Type{BFloat16sr}) = Float32 -Base.promote_rule(::Type{Float32}, ::Type{BFloat16sr}) = Float32 -Base.promote_rule(::Type{Float64}, ::Type{BFloat16sr}) = Float64 - -for t in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128) - @eval Base.promote_rule(::Type{BFloat16sr}, ::Type{$t}) = BFloat16sr -end - -Base.rand(::Type{BFloat16sr},dims::Integer...) = reinterpret.(BFloat16sr,rand(BFloat16,dims...)) -Base.randn(::Type{BFloat16sr},dims::Integer...) = reinterpret.(BFloat16sr,randn(BFloat16,dims...)) -Base.zeros(::Type{BFloat16sr},dims::Integer...) = reinterpret.(BFloat16sr,zeros(BFloat16,dims...)) -Base.ones(::Type{BFloat16sr},dims::Integer...) = reinterpret.(BFloat16sr,ones(BFloat16,dims...)) - -Base.show(io::IO, x::BFloat16sr) = show(io,BFloat16(x)) -Base.bitstring(x::BFloat16sr) = bitstring(reinterpret(UInt16,x)) - -function Base.bitstring(x::BFloat16sr,mode::Symbol) - if mode == :split # split into sign, exponent, signficand - s = bitstring(x) - return "$(s[1]) $(s[2:9]) $(s[10:end])" - else - return bitstring(x) - end -end - -function Base.nextfloat(x::BFloat16sr) - if isfinite(x) - ui = reinterpret(UInt16,x) - if ui < 0x8000 # positive numbers - return reinterpret(BFloat16sr,ui+0x0001) - elseif ui == 0x8000 # =-zero(T) - return reinterpret(BFloat16sr,0x0001) - else # negative numbers - return reinterpret(BFloat16sr,ui-0x0001) - end - else # NaN / Inf case - return x - end -end - -function Base.prevfloat(x::BFloat16sr) - if isfinite(x) - ui = reinterpret(UInt16,x) - if ui == 0x0000 # =zero(T) - return reinterpret(BFloat16sr,0x8001) - elseif ui < 0x8000 # positive numbers - return reinterpret(BFloat16sr,ui-0x0001) - else # negative numbers - return reinterpret(BFloat16sr,ui+0x0001) - end - else # NaN / Inf case - return x - end -end - -#BigFloat -BFloat16sr(x::BigFloat) = BFloat16sr(Float64(x)) -Base.decompose(x::BFloat16sr) = Base.decompose(BFloat16(x)) - -#eps -Base.eps(::Type{BFloat16sr}) = BFloat16sr(eps(BFloat16)) -Base.eps(x::BFloat16sr) = BFloat16sr(eps(BFloat16(x))) diff --git a/src/conversions.jl b/src/conversions.jl deleted file mode 100644 index e7a9df3..0000000 --- a/src/conversions.jl +++ /dev/null @@ -1,11 +0,0 @@ -# conversion from other stochastic floating points to BFloat16sr -BFloat16sr(x::Float16sr) = BFloat16sr(Float32(x)) -BFloat16sr(x::Float32sr) = BFloat16sr(Float32(x)) - -# Conversions from other stochastic floating points to Float16sr -Float16sr(x::BFloat16sr) = Float16sr(Float32(x)) -Float16sr(x::Float32sr) = Float16sr(Float32(x)) - -# Conversions from other stochastic floating points to Float32sr -Float32sr(x::Float16sr) = Float32sr(Float32(x)) -Float32sr(x::BFloat16sr) = Float32sr(Float32(x)) diff --git a/src/float16sr.jl b/src/float16sr.jl deleted file mode 100644 index 6dba892..0000000 --- a/src/float16sr.jl +++ /dev/null @@ -1,186 +0,0 @@ -import BFloat16s.BFloat16 -"""Float16 + stochastic rounding type.""" -primitive type Float16sr <: AbstractFloat 16 end - -# basic properties (same as for Float16) -Base.sign_mask(::Type{Float16sr}) = 0x8000 -Base.exponent_mask(::Type{Float16sr}) = 0x7c00 -Base.significand_mask(::Type{Float16sr}) = 0x03ff -Base.precision(::Type{Float16sr}) = 11 - -Base.one(::Type{Float16sr}) = reinterpret(Float16sr,0x3c00) -Base.zero(::Type{Float16sr}) = reinterpret(Float16sr,0x0000) -Base.one(::Float16sr) = one(Float16sr) -Base.zero(::Float16sr) = zero(Float16sr) -Base.rand(::Type{Float16sr}) = reinterpret(Float16sr,rand(Float16)) -Base.randn(::Type{Float16sr}) = reinterpret(Float16sr,randn(Float16)) - -Base.typemin(::Type{Float16sr}) = Float16sr(typemin(Float16)) -Base.typemax(::Type{Float16sr}) = Float16sr(typemax(Float16)) -Base.floatmin(::Type{Float16sr}) = Float16sr(floatmin(Float16)) -Base.floatmax(::Type{Float16sr}) = Float16sr(floatmax(Float16)) -Base.maxintfloat(::Type{Float16sr}) = Float16sr(maxintfloat(Float16)) - -Base.typemin(::Float16sr) = typemin(Float16sr) -Base.typemax(::Float16sr) = typemax(Float16sr) -Base.floatmin(::Float16sr) = floatmin(Float16sr) -Base.floatmax(::Float16sr) = floatmax(Float16sr) - -Base.eps(::Type{Float16sr}) = Float16sr(eps(Float16)) -Base.eps(x::Float16sr) = Float16sr(eps(Float16(x))) - -const Inf16sr = reinterpret(Float16sr, Inf16) -const NaN16sr = reinterpret(Float16sr, NaN16) - -# basic operations -Base.abs(x::Float16sr) = reinterpret(Float16sr, reinterpret(UInt16, x) & 0x7fff) -Base.isnan(x::Float16sr) = isnan(Float16(x)) -Base.isfinite(x::Float16sr) = isfinite(Float16(x)) - -Base.uinttype(::Type{Float16sr}) = UInt16 -Base.nextfloat(x::Float16sr) = Float16sr(nextfloat(Float16(x))) -Base.prevfloat(x::Float16sr) = Float16sr(prevfloat(Float16(x))) - -Base.:(-)(x::Float16sr) = reinterpret(Float16sr, reinterpret(UInt16, x) ⊻ Base.sign_mask(Float16sr)) - -# conversions via deterministic round-to-nearest -Base.Float16(x::Float16sr) = reinterpret(Float16,x) -Float16sr(x::Float16) = reinterpret(Float16sr,x) -Float16sr(x::Float32) = Float16sr(Float16(x)) -Float16sr(x::Float64) = Float16sr(Float32(x)) -Base.Float32(x::Float16sr) = Float32(Float16(x)) -Base.Float64(x::Float16sr) = Float64(Float16(x)) - -#irrationals -Float16sr(x::Irrational) = reinterpret(Float16sr,Float16(x)) - -# converting to and from BFloat16 -Float16sr(x::BFloat16) = Float16sr(Float32(x)) -BFloat16(x::Float16sr) = BFloat16(Float16(x)) - -Float16sr(x::Integer) = Float16sr(Float32(x)) -(::Type{T})(x::Float16sr) where {T<:Integer} = T(Float32(x)) - -""" - rand_subnormal(rbits::UInt32) -> Float32 - -Create a random perturbation for the Float16 subnormals for -stochastic rounding of Float32 -> Float16. -This function samples uniformly from [-2.980232f-8,2.9802319f-8]. -This function is algorithmically similar to randfloat from RandomNumbers.jl""" -function rand_subnormal(rbits::UInt32) - lz = leading_zeros(rbits) # count leading zeros for correct probabilities of exponent - e = ((101 - lz) % UInt32) << 23 - e |= (rbits << 31) # use last bit for sign - - # combine exponent with random mantissa - return reinterpret(Float32,e | (rbits & 0x007f_ffff)) -end - -const eps_F16 = prevfloat(Float32(nextfloat(zero(Float16))),1) -const floatmin_F16 = Float32(floatmin(Float16)) -const oneF32 = reinterpret(Int32,one(Float32)) - -# # old version -# function rand_subnormal(rbits::UInt32) -# return eps_F16*(reinterpret(Float32,oneF32 | (rbits >> 9))-1.5f0) -# end - -""" - Float16_stochastic_round(x::Float32) -> Float16sr - -Stochastically round `x` to Float16 with distance-proportional probabilities.""" -function Float16_stochastic_round(x::Float32) - rbits = rand(Xor128[],UInt32) # create random bits - - # subnormals are rounded with float-arithmetic for uniform stoch perturbation - abs(x) < floatmin_F16 && return Float16sr(x+rand_subnormal(rbits)) - - # normals are stochastically rounded with integer arithmetic - ui = reinterpret(UInt32,x) - mask = 0x0000_1fff # only mantissa bit 11-23 (the non-Float16 ones) - ui += (rbits & mask) # add perturbation in [0,u) - ui &= ~mask # round to zero - - # via conversion to Float16 to adjust exponent bits - return Float16sr(reinterpret(Float32,ui)) -end - -""" - Float16_chance_roundup(x::Float32) - -Chance that x::Float32 is round up when converted to Float16sr.""" -function Float16_chance_roundup(x::Float32) - xround = Float32(Float16(x)) - xround == x && return zero(Float32) - xround_down, xround_up = xround < x ? (xround,Float32(nextfloat(Float16(xround)))) : - (Float32(prevfloat(Float16(xround))),xround) - - return (x-xround_down)/(xround_up-xround_down) -end - -# Promotion, always to the deterministic format that contains both -Base.promote_rule(::Type{Float16}, ::Type{Float16sr}) = Float16 -Base.promote_rule(::Type{Float32}, ::Type{Float16sr}) = Float32 -Base.promote_rule(::Type{Float64}, ::Type{Float16sr}) = Float64 - -for t in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128) - @eval Base.promote_rule(::Type{Float16sr}, ::Type{$t}) = Float16sr -end - -# Rounding -Base.round(x::Float16sr, r::RoundingMode{:ToZero}) = Float16sr(round(Float32(x), r)) -Base.round(x::Float16sr, r::RoundingMode{:Down}) = Float16sr(round(Float32(x), r)) -Base.round(x::Float16sr, r::RoundingMode{:Up}) = Float16sr(round(Float32(x), r)) -Base.round(x::Float16sr, r::RoundingMode{:Nearest}) = Float16sr(round(Float32(x), r)) - -# Comparison -for op in (:(==), :<, :<=, :isless) - @eval Base.$op(a::Float16sr, b::Float16sr) = ($op)(Float16(a), Float16(b)) -end - -# Arithmetic -for f in (:+, :-, :*, :/, :^, :mod) - @eval Base.$f(x::Float16sr, y::Float16sr) = Float16_stochastic_round($(f)(Float32(x), Float32(y))) -end - -for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh, - :atanh,:exp,:exp2,:exp10,:expm1,:log,:log2,:log10,:sqrt,:cbrt,:log1p) - @eval begin - Base.$func(a::Float16sr) = Float16_stochastic_round($func(Float32(a))) - end -end - -for func in (:atan,:hypot) - @eval begin - Base.$func(a::Float16sr,b::Float16sr) = Float16_stochastic_round($func(Float32(a),Float32(b))) - end -end - -#sincos function -function Base.sincos(x::Float16sr) - s,c = sincos(Float32(x)) - return (Float16_stochastic_round(s),Float16_stochastic_round(c)) -end - -# array generators -Base.rand(::Type{Float16sr},dims::Integer...) = reinterpret.(Float16sr,rand(Float16,dims...)) -Base.randn(::Type{Float16sr},dims::Integer...) = reinterpret.(Float16sr,randn(Float16,dims...)) -Base.zeros(::Type{Float16sr},dims::Integer...) = reinterpret.(Float16sr,zeros(Float16,dims...)) -Base.ones(::Type{Float16sr},dims::Integer...) = reinterpret.(Float16sr,ones(Float16,dims...)) - -Base.show(io::IO, x::Float16sr) = show(io,Float16(x)) -Base.bitstring(x::Float16sr) = bitstring(reinterpret(UInt16,x)) - -function Base.bitstring(x::Float16sr,mode::Symbol) - if mode == :split # split into sign, exponent, signficand - s = bitstring(x) - return "$(s[1]) $(s[2:6]) $(s[7:end])" - else - return bitstring(x) - end -end - -# BIGFLOAT -Float16sr(x::BigFloat) = Float16sr(Float64(x)) -Base.decompose(x::Float16sr) = Base.decompose(Float16(x)) diff --git a/src/float32sr.jl b/src/float32sr.jl deleted file mode 100644 index fe7e4cc..0000000 --- a/src/float32sr.jl +++ /dev/null @@ -1,185 +0,0 @@ -import BFloat16s.BFloat16 -"""The Float32 + stochastic rounding type.""" -primitive type Float32sr <: AbstractFloat 32 end - -# basic properties -Base.sign_mask(::Type{Float32sr}) = 0x8000_0000 -Base.exponent_mask(::Type{Float32sr}) = 0x7f80_0000 -Base.significand_mask(::Type{Float32sr}) = 0x007f_ffff -Base.precision(::Type{Float32sr}) = 24 - -"""Mask for both sign and exponent bits. Equiv to ~significand_mask(Float64).""" -signexp_mask(::Type{Float64}) = 0xfff0_0000_0000_0000 - -Base.one(::Type{Float32sr}) = reinterpret(Float32sr,one(Float32)) -Base.zero(::Type{Float32sr}) = reinterpret(Float32sr,0x0000_0000) -Base.one(::Float32sr) = one(Float32sr) -Base.zero(::Float32sr) = zero(Float32sr) -Base.rand(::Type{Float32sr}) = reinterpret(Float32sr,rand(Float32)) -Base.randn(::Type{Float32sr}) = reinterpret(Float32sr,randn(Float32)) - -Base.typemin(::Type{Float32sr}) = Float32sr(typemin(Float32)) -Base.typemax(::Type{Float32sr}) = Float32sr(typemax(Float32)) -Base.floatmin(::Type{Float32sr}) = Float32sr(floatmin(Float32)) -Base.floatmax(::Type{Float32sr}) = Float32sr(floatmax(Float32)) -Base.maxintfloat(::Type{Float32sr}) = Float32sr(maxintfloat(Float32)) - -Base.typemin(::Float32sr) = typemin(Float32sr) -Base.typemax(::Float32sr) = typemax(Float32sr) -Base.floatmin(::Float32sr) = floatmin(Float32sr) -Base.floatmax(::Float32sr) = floatmax(Float32sr) - -Base.eps(::Type{Float32sr}) = Float32sr(eps(Float32)) -Base.eps(x::Float32sr) = Float32sr(eps(Float32(x))) - -const Inf32sr = reinterpret(Float32sr, Inf32) -const NaN32sr = reinterpret(Float32sr, NaN32) - -# basic operations -Base.abs(x::Float32sr) = reinterpret(Float32sr, abs(reinterpret(Float32,x))) -Base.isnan(x::Float32sr) = isnan(reinterpret(Float32,x)) -Base.isfinite(x::Float32sr) = isfinite(reinterpret(Float32,x)) - -Base.uinttype(::Type{Float32sr}) = UInt32 -Base.nextfloat(x::Float32sr) = Float32sr(nextfloat(Float32(x))) -Base.prevfloat(x::Float32sr) = Float32sr(prevfloat(Float32(x))) - -Base.:(-)(x::Float32sr) = reinterpret(Float32sr, reinterpret(UInt32, x) ⊻ Base.sign_mask(Float32sr)) - -# conversions -Base.Float32(x::Float32sr) = reinterpret(Float32,x) -Float32sr(x::Float32) = reinterpret(Float32sr,x) -Float32sr(x::Float16) = Float32sr(Float32(x)) -Float32sr(x::Float64) = Float32sr(Float32(x)) -Base.Float16(x::Float32sr) = Float16(Float32(x)) -Base.Float64(x::Float32sr) = Float64(Float32(x)) -#irrationals -Float32sr(x::Irrational) = reinterpret(Float32sr,Float32(x)) - -Float32sr(x::Integer) = Float32sr(Float32(x)) -(::Type{T})(x::Float32sr) where {T<:Integer} = T(Float32(x)) - -# converting to and from BFloat16 -Float32sr(x::BFloat16) = Float32sr(Float64(x)) -BFloat16(x::Float32sr) = BFloat16(Float32(x)) - -""" - rand_subnormal(rbits::UInt64) -> Float64 - -Create a random perturbation for the Float16 subnormals for -stochastic rounding of Float32 -> Float16. -This function samples uniformly from [-7.0064923216240846e-46,7.006492321624084e-46]. -This function is algorithmically similar to randfloat from RandomNumbers.jl""" -function rand_subnormal(rbits::UInt64) - lz = leading_zeros(rbits) # count leading zeros for probabilities of exponent - e = ((872 - lz) % UInt64) << 52 - e |= (rbits << 63) # use last bit for sign - - # combine exponent with random mantissa - return reinterpret(Float64,e | (rbits & Base.significand_mask(Float64))) -end - -const eps_F32 = prevfloat(Float64(nextfloat(zero(Float32)))) -const floatmin_F32 = Float64(floatmin(Float32)) -const oneF64 = reinterpret(Int64,one(Float64)) - -# old version -# function rand_subnormal(rbits::UInt64) -# return eps_F32*(reinterpret(Float64,oneF64 | (rbits >> 12))-1.5) -# end - -""" - Float32_stochastic_round(x::Float64) -> Float32sr - -Stochastically round x::Float64 to Float32 with distance-proportional probabilities.""" -function Float32_stochastic_round(x::Float64) - rbits = rand(Xor128[],UInt64) # create random bits - - # subnormals are rounded with float-arithmetic for uniform stoch perturbation - abs(x) < floatmin_F32 && return Float32sr(x+rand_subnormal(rbits)) - - # normals with integer arithmetic - ui = reinterpret(UInt64,x) - mask = 0x0000_0000_1fff_ffff - ui += (rbits & mask) # add stochastic perturbation in [0,u) - ui &= ~mask # round to zero - return Float32sr(reinterpret(Float64,ui)) -end - -""" - Float32_chance_roundup(x::Float64) - -Chance that `x` is round up when converted to Float32sr.""" -function Float32_chance_roundup(x::Float64) - xround = Float64(Float32(x)) - xround == x && return zero(Float64) - xround_down, xround_up = xround < x ? (xround,Float64(nextfloat(Float32(xround)))) : - (Float64(prevfloat(Float32(xround))),xround) - - return (x-xround_down)/(xround_up-xround_down) -end - -# Promotion, always to the deterministic format that contains both -Base.promote_rule(::Type{Float16}, ::Type{Float32sr}) = Float32 -Base.promote_rule(::Type{Float32}, ::Type{Float32sr}) = Float32 -Base.promote_rule(::Type{Float64}, ::Type{Float32sr}) = Float64 - -for t in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128) - @eval Base.promote_rule(::Type{Float32sr}, ::Type{$t}) = Float32sr -end - -# Rounding -Base.round(x::Float32sr, r::RoundingMode{:ToZero}) = Float32sr(round(Float32(x), r)) -Base.round(x::Float32sr, r::RoundingMode{:Down}) = Float32sr(round(Float32(x), r)) -Base.round(x::Float32sr, r::RoundingMode{:Up}) = Float32sr(round(Float32(x), r)) -Base.round(x::Float32sr, r::RoundingMode{:Nearest}) = Float32sr(round(Float32(x), r)) - -# Comparison -for op in (:(==), :<, :<=, :isless) - @eval Base.$op(a::Float32sr, b::Float32sr) = ($op)(Float32(a), Float32(b)) -end - -# Arithmetic -for f in (:+, :-, :*, :/, :^, :mod) - @eval Base.$f(x::Float32sr, y::Float32sr) = Float32_stochastic_round($(f)(Float64(x), Float64(y))) -end - -for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh, - :atanh,:exp,:exp2,:exp10,:expm1,:log,:log2,:log10,:sqrt,:cbrt,:log1p) - @eval begin - Base.$func(a::Float32sr) = Float32_stochastic_round($func(Float64(a))) - end -end - -for func in (:atan,:hypot) - @eval begin - Base.$func(a::Float32sr,b::Float32sr) = Float32_stochastic_round($func(Float64(a),Float64(b))) - end -end - -function Base.sincos(x::Float32sr) - s,c = sincos(Float64(x)) - return (Float32_stochastic_round(s),Float32_stochastic_round(c)) -end - -# array generators -Base.rand(::Type{Float32sr},dims::Integer...) = reinterpret.(Float32sr,rand(Float32,dims...)) -Base.randn(::Type{Float32sr},dims::Integer...) = reinterpret.(Float32sr,randn(Float32,dims...)) -Base.zeros(::Type{Float32sr},dims::Integer...) = reinterpret.(Float32sr,zeros(Float32,dims...)) -Base.ones(::Type{Float32sr},dims::Integer...) = reinterpret.(Float32sr,ones(Float32,dims...)) - -Base.show(io::IO, x::Float32sr) = show(io,Float32(x)) -Base.bitstring(x::Float32sr) = bitstring(reinterpret(UInt32,x)) - -function Base.bitstring(x::Float32sr,mode::Symbol) - if mode == :split # split into sign, exponent, signficand - s = bitstring(x) - return "$(s[1]) $(s[2:9]) $(s[10:end])" - else - return bitstring(x) - end -end - -# BIGFLOAT -Float32sr(x::BigFloat) = Float32sr(Float64(x)) -Base.decompose(x::Float32sr) = Base.decompose(Float32(x)) diff --git a/src/float64sr.jl b/src/float64sr.jl deleted file mode 100644 index d62b193..0000000 --- a/src/float64sr.jl +++ /dev/null @@ -1,136 +0,0 @@ -"""The Float64 + stochastic rounding type.""" -primitive type Float64sr <: AbstractFloat 64 end - -# basic properties -Base.sign_mask(::Type{Float64sr}) = Base.sign_mask(Float64) -Base.exponent_mask(::Type{Float64sr}) = Base.exponent_mask(Float64) -Base.significand_mask(::Type{Float64sr}) = Base.significand_mask(Float64) -Base.precision(::Type{Float64sr}) = Base.precision(Float64) - -Base.one(::Type{Float64sr}) = reinterpret(Float64sr,one(Float64)) -Base.zero(::Type{Float64sr}) = reinterpret(Float64sr,zero(Float64)) -Base.one(::Float64sr) = one(Float64sr) -Base.zero(::Float64sr) = zero(Float64sr) -Base.rand(::Type{Float64sr}) = reinterpret(Float64sr,rand(Float64)) -Base.randn(::Type{Float64sr}) = reinterpret(Float64sr,randn(Float64)) - -Base.typemin(::Type{Float64sr}) = Float64sr(typemin(Float64)) -Base.typemax(::Type{Float64sr}) = Float64sr(typemax(Float64)) -Base.floatmin(::Type{Float64sr}) = Float64sr(floatmin(Float64)) -Base.floatmax(::Type{Float64sr}) = Float64sr(floatmax(Float64)) -Base.maxintfloat(::Type{Float64sr}) = Float64sr(maxintfloat(Float64)) - -Base.typemin(::Float64sr) = typemin(Float64sr) -Base.typemax(::Float64sr) = typemax(Float64sr) -Base.floatmin(::Float64sr) = floatmin(Float64sr) -Base.floatmax(::Float64sr) = floatmax(Float64sr) - -Base.eps(::Type{Float64sr}) = Float64sr(eps(Float64)) -Base.eps(x::Float64sr) = Float64sr(eps(Float64(x))) - -const Infsr = reinterpret(Float64sr, Inf) -const NaNsr = reinterpret(Float64sr, NaN) - -# basic operations -Base.abs(x::Float64sr) = reinterpret(Float64sr, abs(reinterpret(Float64,x))) -Base.isnan(x::Float64sr) = isnan(reinterpret(Float64,x)) -Base.isfinite(x::Float64sr) = isfinite(reinterpret(Float64,x)) - -Base.uinttype(::Type{Float64sr}) = UInt64 -Base.nextfloat(x::Float64sr) = Float64sr(nextfloat(Float64(x))) -Base.prevfloat(x::Float64sr) = Float64sr(prevfloat(Float64(x))) - -Base.:(-)(x::Float64sr) = reinterpret(Float64sr, reinterpret(UInt64, x) ⊻ Base.sign_mask(Float64sr)) - -# conversions -Base.Float64(x::Float64sr) = reinterpret(Float64,x) -Float64sr(x::Float64) = reinterpret(Float64sr,x) -Float64sr(x::Float16) = Float64sr(Float64(x)) -Float64sr(x::Float32) = Float64sr(Float64(x)) -Base.Float16(x::Float64sr) = Float16(Float64(x)) -Base.Float32(x::Float64sr) = Float32(Float64(x)) -Float64sr(x::Irrational) = Float64sr(Float64(x)) - -DoubleFloats.Double64(x::Float64sr) = Double64(Float64(x)) -Float64sr(x::Double64) = Float64sr(Float64(x)) - -Float64sr(x::Integer) = Float64sr(Float64(x)) -(::Type{T})(x::Float64sr) where {T<:Integer} = T(Float64(x)) - -"""Stochastically round x::Double64 to Float64 with distance-proportional probabilities.""" -function Float64_stochastic_round(x::Double64) - rbits = rand(Xor128[],UInt64) # create random bits - - # create [1,2)-1.5 = [-0.5,0.5) - r = reinterpret(Float64,reinterpret(UInt64,one(Float64)) + (rbits >> 12)) - 1.5 - a = x.hi # the more significant float64 in x - b = x.lo # the less significant float64 in x - u = eps(a) # = ulp - - return Float64sr(a + (b+u*r)) # (b+u*r) first as a+b would be rounded to a -end - -function Float64_chance_roundup(x::Real) - xround = Float64(x) - xround == x && return zero(Float64) - xround_down, xround_up = xround < x ? (xround,nextfloat(xround)) : - (prevfloat(xround),xround) - - return Float64((x-xround_down)/(xround_up-xround_down)) -end - -Base.promote_rule(::Type{Float16}, ::Type{Float64sr}) = Float64sr -Base.promote_rule(::Type{Float32}, ::Type{Float64sr}) = Float64sr -Base.promote_rule(::Type{Float64}, ::Type{Float64sr}) = Float64sr - -for t in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128) - @eval Base.promote_rule(::Type{Float64sr}, ::Type{$t}) = Float64sr -end - -# Rounding -Base.round(x::Float64sr, r::RoundingMode{:ToZero}) = Float64sr(round(Float64(x), r)) -Base.round(x::Float64sr, r::RoundingMode{:Down}) = Float64sr(round(Float64(x), r)) -Base.round(x::Float64sr, r::RoundingMode{:Up}) = Float64sr(round(Float64(x), r)) -Base.round(x::Float64sr, r::RoundingMode{:Nearest}) = Float64sr(round(Float64(x), r)) - -# Comparison -for op in (:(==), :<, :<=, :isless) - @eval Base.$op(a::Float64sr, b::Float64sr) = ($op)(Float64(a), Float64(b)) -end - -# Arithmetic -for f in (:+, :-, :*, :/, :^, :mod) - @eval Base.$f(x::Float64sr, y::Float64sr) = Float64_stochastic_round($(f)(Double64(x), Double64(y))) -end - -for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh, - :atanh,:exp,:exp2,:exp10,:expm1,:log,:log2,:log10,:sqrt,:cbrt,:log1p) - @eval begin - Base.$func(a::Float64sr) = Float64_stochastic_round($func(Double64(a))) - end -end - -for func in (:atan,:hypot) - @eval begin - Base.$func(a::Float64sr,b::Float64sr) = Float64_stochastic_round($func(Double64(a),Double64(b))) - end -end - -# array generators -Base.rand(::Type{Float64sr},dims::Integer...) = reinterpret.(Float64sr,rand(Float64,dims...)) -Base.randn(::Type{Float64sr},dims::Integer...) = reinterpret.(Float64sr,randn(Float64,dims...)) -Base.zeros(::Type{Float64sr},dims::Integer...) = reinterpret.(Float64sr,zeros(Float64,dims...)) -Base.ones(::Type{Float64sr},dims::Integer...) = reinterpret.(Float64sr,ones(Float64,dims...)) - -# Showing -Base.show(io::IO, x::Float64sr) = show(io,Float64(x)) -Base.bitstring(x::Float64sr) = bitstring(reinterpret(Float64,x)) - -function Base.bitstring(x::Float64sr,mode::Symbol) - if mode == :split # split into sign, exponent, signficand - s = bitstring(x) - return "$(s[1]) $(s[2:12]) $(s[13:end])" - else - return bitstring(x) - end -end diff --git a/src/general.jl b/src/general.jl index 8bdf27f..3bf0ac5 100644 --- a/src/general.jl +++ b/src/general.jl @@ -1,26 +1,138 @@ -# TODO this should be implemented upstream for various number formats -Base.ldexp(x::AbstractFloat,n::Integer) = x*2^n - -# TODO remove here once implemented in BFloat16s.jl -function Base.nextfloat(x::BFloat16,n::Integer) - n < 0 && return prevfloat(x,-n) - n == 0 && return x - nextfloat(nextfloat(x),n-1) +# conversions +export stochastic_float +stochastic_float(x::AbstractFloat) = reinterpret(stochastic_float(typeof(x)),x) +Base.float(x::AbstractStochasticFloat) = reinterpret(float(typeof(x)),x) +uint(x::AbstractStochasticFloat) = reinterpret(Base.uinttype(typeof(x)),x) +Base.widen(x::AbstractStochasticFloat) = widen(typeof(x))(float(x)) + +# integer conversions +(::Type{T})(x::AbstractStochasticFloat) where {T<:Integer} = T(widen(x)) +for t in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128) + @eval Base.promote_rule(::Type{T}, ::Type{$t}) where {T<:AbstractStochasticFloat} = T end -function Base.prevfloat(x::BFloat16,n::Integer) - n < 0 && return nextfloat(x,-n) - n == 0 && return x - prevfloat(prevfloat(x),n-1) +# other floats, irrational and rationals +(::Type{T})(x::Real) where {T<:AbstractStochasticFloat} = stochastic_float(float(T)(x)) +(::Type{T})(x::Rational) where {T<:AbstractStochasticFloat} = stochastic_float(float(T)(x)) +(::Type{T})(x::AbstractStochasticFloat) where {T<:AbstractFloat} = convert(T,float(x)) +(::Type{T})(x::AbstractStochasticFloat) where {T<:AbstractStochasticFloat} = stochastic_float(convert(float(T),float(x))) + +# masks same as for deterministic floats +Base.sign_mask(T::Type{<:AbstractStochasticFloat}) = Base.sign_mask(float(T)) +Base.exponent_mask(T::Type{<:AbstractStochasticFloat}) = Base.exponent_mask(float(T)) +Base.significand_mask(T::Type{<:AbstractStochasticFloat}) = Base.significand_mask(float(T)) +Base.precision(T::Type{<:AbstractStochasticFloat}) = precision(float(T)) +Base.exponent_bits(T::Type{<:AbstractStochasticFloat}) = Base.exponent_bits(float(T)) +Base.significand_bits(T::Type{<:AbstractStochasticFloat}) = Base.significand_bits(float(T)) + +# one, zero and rand(n) elements +Base.one(T::Type{<:AbstractStochasticFloat}) = stochastic_float(one(float(T))) +Base.zero(T::Type{<:AbstractStochasticFloat}) = stochastic_float(zero(float(T))) +Base.one(x::AbstractStochasticFloat) = stochastic_float(one(float(x))) +Base.zero(x::AbstractStochasticFloat) = stochastic_float(zero(float(x))) +Base.rand(T::Type{<:AbstractStochasticFloat}) = stochastic_float(rand(float(T))) +Base.randn(T::Type{<:AbstractStochasticFloat}) = stochastic_float(randn(float(T))) + +# array generators +Base.rand(::Type{T},dims::Integer...) where {T<:AbstractStochasticFloat} = reinterpret.(T,rand(float(T),dims...)) +Base.randn(::Type{T},dims::Integer...) where {T<:AbstractStochasticFloat} = reinterpret.(T,randn(float(T),dims...)) +Base.zeros(::Type{T},dims::Integer...) where {T<:AbstractStochasticFloat} = reinterpret.(T,zeros(float(T),dims...)) +Base.ones(::Type{T},dims::Integer...) where {T<:AbstractStochasticFloat} = reinterpret.(T,ones(float(T),dims...)) + +Base.iszero(x::AbstractStochasticFloat) = iszero(float(x)) +Base.isfinite(x::AbstractStochasticFloat) = isfinite(float(x)) +Base.isnan(x::AbstractStochasticFloat) = isnan(float(x)) + +nan(T::Type{<:AbstractStochasticFloat}) = T(NaN) +infinity(T::Type{<:AbstractStochasticFloat}) = T(Inf) + +# dynamic range +Base.typemin(T::Type{<:AbstractStochasticFloat}) = stochastic_float(typemin(float(T))) +Base.typemax(T::Type{<:AbstractStochasticFloat}) = stochastic_float(typemax(float(T))) +Base.floatmin(T::Type{<:AbstractStochasticFloat}) = stochastic_float(floatmin(float(T))) +Base.floatmax(T::Type{<:AbstractStochasticFloat}) = stochastic_float(floatmax(float(T))) +Base.maxintfloat(T::Type{<:AbstractStochasticFloat}) = stochastic_float(maxintfloat(float(T))) + +# smallest positive number (subnormal) +export minpos +minpos(T::Type{<:AbstractStochasticFloat}) = stochastic_float(reinterpret(float(T),Base.uinttype(T)(0x1))) +minpos(x::AbstractStochasticFloat) = minpos(typeof(x)) + +# dynamic range called with instance +Base.typemin(x::AbstractStochasticFloat) = typemin(typeof(x)) +Base.typemax(x::AbstractStochasticFloat) = typemax(typeof(x)) +Base.floatmin(x::AbstractStochasticFloat) = floatmin(typeof(x)) +Base.floatmax(x::AbstractStochasticFloat) = floatmax(typeof(x)) + +Base.eps(T::Type{<:AbstractStochasticFloat}) = stochastic_float(eps(float(T))) +Base.eps(x::AbstractStochasticFloat) = stochastic_float(eps(float(x))) + +# show +Base.show(io::IO, x::AbstractStochasticFloat) = show(io,float(x)) +Base.bitstring(x::AbstractStochasticFloat) = bitstring(uint(x)) + +"""Like bitstring(x) but with bitstring(x,:split) showing the bits split +into sign, exponent and mantissa bits.""" +function Base.bitstring(x::AbstractStochasticFloat,mode::Symbol) + if mode == :split # split into sign, exponent, signficand + s = bitstring(x) + ne = Base.exponent_bits(typeof(x)) + return "$(s[1]) $(s[2:ne+1]) $(s[ne+2:end])" + else + return bitstring(x) + end end -function Base.decompose(x::BFloat16)::NTuple{3,Int} - isnan(x) && return 0, 0, 0 - isinf(x) && return ifelse(x < 0, -1, 1), 0, 0 - n = reinterpret(UInt16, x) - s = (n & 0x007f) % Int16 - e = ((n & 0x7f80) >> 7) % Int - s |= Int16(e != 0) << 7 - d = ifelse(signbit(x), -1, 1) - s, e - 134 + (e == 0), d +# same for BFloat16sr, but do not apply stochastic rounding to avoid InexactError +Base.round(x::AbstractStochasticFloat, r::RoundingMode{:Up}) = stochastic_float(ceil(float(x))) +Base.round(x::AbstractStochasticFloat, r::RoundingMode{:Down}) = stochastic_float(floor(float(x))) +Base.round(x::AbstractStochasticFloat, r::RoundingMode{:Nearest}) = stochastic_float(round(floor(float(x)))) +Base.round(x::AbstractStochasticFloat, r::RoundingMode{:ToZero}) = stochastic_float(round(float(x),RoundToZero)) + +# negation, and absolute +Base.:(-)(x::AbstractStochasticFloat) = stochastic_float(-float(x)) +Base.abs(x::AbstractStochasticFloat) = stochastic_float(abs(float(x))) + +# stochastic rounding +export stochastic_round +stochastic_round(T::Type{<:AbstractFloat},x::AbstractFloat) = stochastic_round(T,widen(stochastic_float(T))(x)) +stochastic_round(T::Type{<:AbstractStochasticFloat},x::AbstractFloat) = stochastic_float(stochastic_round(float(T),x)) + +# Comparison +for op in (:(==), :<, :<=, :isless) + @eval Base.$op(a::AbstractStochasticFloat, b::AbstractStochasticFloat) = ($op)(float(a), float(b)) +end + +# Arithmetic +for f in (:+, :-, :*, :/, :^, :mod, :atan, :hypot) + @eval Base.$f(x::T, y::T) where {T<:AbstractStochasticFloat} = stochastic_round(T,$(f)(widen(x), widen(y))) +end + +for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh, + :atanh,:exp,:exp2,:exp10,:expm1,:log,:log2,:log10,:sqrt,:cbrt,:log1p) + @eval begin + Base.$func(a::T) where {T<:AbstractStochasticFloat} = stochastic_round(T,$func(widen(a))) + end +end + +function Base.sincos(x::T) where {T<:AbstractStochasticFloat} + s,c = sincos(widen(x)) + return (stochastic_round(T,s),stochastic_round(T,c)) +end + +Base.decompose(x::AbstractStochasticFloat) = Base.decompose(float(x)) +Base.ldexp(x::AbstractStochasticFloat,n::Integer) = stochastic_float(Base.ldexp(float(x),n)) + +Base.nextfloat(x::AbstractStochasticFloat,n::Integer) = stochastic_float(nextfloat(float(x),n)) +Base.prevfloat(x::AbstractStochasticFloat,n::Integer) = stochastic_float(prevfloat(float(x),n)) + +export chance_roundup +function chance_roundup(T::Type{<:AbstractStochasticFloat},x::Real) + x = widen(T)(x) + xround = float(T)(x) + (xround == x || isnan(x)) && return zero(float(T)) + xround_down, xround_up = xround < x ? (xround,nextfloat(xround)) : + (prevfloat(xround),xround) + + return (x-xround_down)/(xround_up-xround_down) end \ No newline at end of file diff --git a/src/promotion.jl b/src/promotions.jl similarity index 54% rename from src/promotion.jl rename to src/promotions.jl index 9cb9518..9cdb5f7 100644 --- a/src/promotion.jl +++ b/src/promotions.jl @@ -1,4 +1,8 @@ # always promote to the format that contains both Base.promote_rule(::Type{Float16sr}, ::Type{Float32sr}) = Float32sr Base.promote_rule(::Type{BFloat16sr}, ::Type{Float32sr}) = Float32sr -Base.promote_rule(::Type{Float16sr}, ::Type{BFloat16sr}) = Float32sr \ No newline at end of file +Base.promote_rule(::Type{Float16sr}, ::Type{BFloat16sr}) = Float32sr + +Base.promote_rule(::Type{Float16sr}, ::Type{Float64sr}) = Float64sr +Base.promote_rule(::Type{BFloat16sr}, ::Type{Float64sr}) = Float64sr +Base.promote_rule(::Type{Float32sr}, ::Type{Float64sr}) = Float64sr \ No newline at end of file diff --git a/src/types.jl b/src/types.jl new file mode 100644 index 0000000..c4b3504 --- /dev/null +++ b/src/types.jl @@ -0,0 +1,124 @@ +# BFLOAT16 + STOCHASTIC ROUNDING, DEFINE EVERYTHING SPECIFIC +export BFloat16sr +primitive type BFloat16sr <: AbstractStochasticFloat 16 end +Base.float(::Type{BFloat16sr}) = BFloat16 # corresponding deterministic float +stochastic_float(::Type{BFloat16}) = BFloat16sr # and stochastic float +BFloat16sr(x::BFloat16) = stochastic_float(x) # direct conversion +Base.uinttype(::Type{BFloat16sr}) = UInt16 # corresponding uint +Base.widen(::Type{BFloat16sr}) = Float32 # higher precision format for exact arithmetic + +"""Stochastically round x::Float32 to BFloat16 with distance-proportional probabilities.""" +function stochastic_round(T::Type{BFloat16},x::Float32) + ui = reinterpret(UInt32,x) + ui += rand(Xor128[],UInt16) # add stochastic perturbation in [0,u) + return reinterpret(BFloat16,(ui >> 16) % UInt16) # round to zero and convert to BFloat16 +end + +# FLOAT16 + STOCHASTIC ROUNDING, DEFINE EVERYTHING SPECIFIC +export Float16sr +primitive type Float16sr <: AbstractStochasticFloat 16 end +Base.float(::Type{Float16sr}) = Float16 # corresponding deterministic float +stochastic_float(::Type{Float16}) = Float16sr # and stochastic float +Float16sr(x::Float16) = stochastic_float(x) # direct conversion +Base.uinttype(::Type{Float16sr}) = UInt16 # corresponding uint +Base.widen(::Type{Float16sr}) = Float32 # higher precision format for exact arithmetic + +const FLOATMIN_F16 = Float32(floatmin(Float16)) + +""" +Stochastically round x::Float32 to Float16 with distance-proportional probabilities.""" +function stochastic_round(T::Type{Float16},x::Float32) + rbits = rand(Xor128[],UInt32) # create random bits + + # subnormals are rounded with float-arithmetic for uniform stoch perturbation + abs(x) < FLOATMIN_F16 && return Float16(x+rand_subnormal(rbits)) + + # normals are stochastically rounded with integer arithmetic + ui = reinterpret(UInt32,x) + mask = 0x0000_1fff # only mantissa bit 11-23 (the non-Float16 ones) + ui += (rbits & mask) # add perturbation in [0,u) + ui &= ~mask # round to zero + + # via conversion to Float16 to adjust exponent bits + return Float16(reinterpret(Float32,ui)) +end + +""" + rand_subnormal(rbits::UInt32) -> Float32 + +Create a random perturbation for the Float16 subnormals for +stochastic rounding of Float32 -> Float16. +This function samples uniformly from [-2.980232f-8,2.9802319f-8]. +This function is algorithmically similar to randfloat from RandomNumbers.jl""" +function rand_subnormal(rbits::UInt32) + lz = leading_zeros(rbits) # count leading zeros for correct probabilities of exponent + e = ((101 - lz) % UInt32) << 23 + e |= (rbits << 31) # use last bit for sign + + # combine exponent with random mantissa + return reinterpret(Float32,e | (rbits & 0x007f_ffff)) +end + +# FLOAT32 + STOCHASTIC ROUNDING, DEFINE EVERYTHING SPECIFIC +export Float32sr +primitive type Float32sr <: AbstractStochasticFloat 32 end +Base.float(::Type{Float32sr}) = Float32 +stochastic_float(::Type{Float32}) = Float32sr +Float32sr(x::Float32) = stochastic_float(x) +Base.uinttype(::Type{Float32sr}) = UInt32 +Base.widen(::Type{Float32sr}) = Float64 + +const FLOATMIN_F32 = Float64(floatmin(Float32)) + +"""Stochastically round x::Float64 to Float32 with distance-proportional probabilities.""" +function stochastic_round(T::Type{Float32},x::Float64) + rbits = rand(Xor128[],UInt64) # create random bits + + # subnormals are rounded with float-arithmetic for uniform stoch perturbation + abs(x) < FLOATMIN_F32 && return Float32(x+rand_subnormal(rbits)) + + # normals with integer arithmetic + ui = reinterpret(UInt64,x) + mask = 0x0000_0000_1fff_ffff + ui += (rbits & mask) # add stochastic perturbation in [0,u) + ui &= ~mask # round to zero + return Float32(reinterpret(Float64,ui)) +end + +""" + rand_subnormal(rbits::UInt64) -> Float64 + +Create a random perturbation for the Float16 subnormals for +stochastic rounding of Float32 -> Float16. +This function samples uniformly from [-7.0064923216240846e-46,7.006492321624084e-46]. +This function is algorithmically similar to randfloat from RandomNumbers.jl""" +function rand_subnormal(rbits::UInt64) + lz = leading_zeros(rbits) # count leading zeros for probabilities of exponent + e = ((872 - lz) % UInt64) << 52 + e |= (rbits << 63) # use last bit for sign + + # combine exponent with random mantissa + return reinterpret(Float64,e | (rbits & Base.significand_mask(Float64))) +end + +# FLOAT32 + STOCHASTIC ROUNDING, DEFINE EVERYTHING SPECIFIC +export Float64sr +primitive type Float64sr <: AbstractStochasticFloat 64 end +Base.float(::Type{Float64sr}) = Float64 +stochastic_float(::Type{Float64}) = Float64sr +Float64sr(x::Float64) = stochastic_float(x) +Base.uinttype(::Type{Float64sr}) = UInt64 +Base.widen(::Type{Float64sr}) = Double64 + +"""Stochastically round x::Double64 to Float64 with distance-proportional probabilities.""" +function stochastic_round(T::Type{Float64},x::Double64) + rbits = rand(Xor128[],UInt64) # create random bits + + # create [1,2)-1.5 = [-0.5,0.5) + r = reinterpret(Float64,reinterpret(UInt64,one(Float64)) + (rbits >> 12)) - 1.5 + a = x.hi # the more significant float64 in x + b = x.lo # the less significant float64 in x + u = eps(a) # = ulp + + return Float64(a + (b+u*r)) # (b+u*r) first as a+b would be rounded to a +end \ No newline at end of file diff --git a/test/bfloat16sr.jl b/test/bfloat16sr.jl index 3de7e55..a43d798 100644 --- a/test/bfloat16sr.jl +++ b/test/bfloat16sr.jl @@ -1,33 +1,38 @@ -@testset "Sign flip" begin - @test one(BFloat16sr) == -(-(one(BFloat16sr))) - @test zero(BFloat16sr) == -(zero(BFloat16sr)) -end +all_types = (BFloat16sr, Float16sr, Float32sr) -@testset "Integer promotion" begin - f = BFloat16sr(1) - @test 2f == BFloat16sr(2) - @test 0f == BFloat16sr(0) -end +@testset for T in all_types -@testset "NaN and Inf" begin - @test isnan(NaNB16sr) - @test ~isfinite(NaNB16sr) - @test ~isfinite(InfB16sr) + @testset "Sign flip" begin + @test one(T) == -(-(one(T))) + @test zero(T) == -(zero(T)) + end - N = 1000 - for i in 1:N - @test InfB16sr == BFloat16_stochastic_round(Inf32) - @test -InfB16sr == BFloat16_stochastic_round(-Inf32) - @test isnan(BFloat16_stochastic_round(NaN32)) + @testset "Integer promotion" begin + f = T(1) + @test 2f == T(2) + @test 0f == T(0) end -end -@testset "No stochastic round to NaN" begin - f1 = nextfloat(0f0) - f2 = prevfloat(0f0) - for i in 1:100 - @test isfinite(BFloat16_stochastic_round(f1)) - @test isfinite(BFloat16_stochastic_round(f2)) + @testset "NaN and Inf" begin + @test isnan(nan(T)) + @test ~isfinite(nan(T)) + @test ~isfinite(infinity(T)) + + N = 1000 + for i in 1:N + @test stochastic_round(T,Inf) == stochastic_round(T,Inf) + @test stochastic_round(T,-Inf) == stochastic_round(T,-Inf) + @test isnan(stochastic_round(T,NaN)) + end + end + + @testset "No stochastic round to NaN" begin + f1 = nextfloat(zero(T)) + f2 = prevfloat(zero(T)) + for i in 1:100 + @test isfinite(stochastic_round(T,f1)) + @test isfinite(stochastic_round(T,f2)) + end end end diff --git a/test/differential_equations.jl b/test/differential_equations.jl index a6da155..c8f6206 100644 --- a/test/differential_equations.jl +++ b/test/differential_equations.jl @@ -1,5 +1,5 @@ @testset "Successful run with DE.jl" begin - for SR in [Float32sr, Float16sr, BFloat16sr] + for SR in [Float64sr, Float32sr, Float16sr, BFloat16sr] f = (u,p,t) -> (p*u) prob_ode_linear = ODEProblem(f,SR(1.0)/SR(2.0),(SR(0.0),SR(1.0)),SR(1.01)); sol = solve(prob_ode_linear,Tsit5()) diff --git a/test/runtests.jl b/test/runtests.jl index b1616db..0d9ba4b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,7 +2,6 @@ using StochasticRounding using Test using DifferentialEquations - import BFloat16s: BFloat16 include("bfloat16sr.jl") diff --git a/test/seeding.jl b/test/seeding.jl index 6cb331d..14f238a 100644 --- a/test/seeding.jl +++ b/test/seeding.jl @@ -1,29 +1,11 @@ -@testset "BFloat16sr seeding" begin - StochasticRounding.seed(12312311) - O = ones(BFloat16sr,10000) - one_third = O/3 - - # now reseed - StochasticRounding.seed(12312311) - @test O/3 == one_third -end - -@testset "Float16sr seeding" begin - StochasticRounding.seed(12312311) - O = ones(Float16sr,10000) - one_third = O/3 - - # now reseed - StochasticRounding.seed(12312311) - @test O/3 == one_third -end - -@testset "Float32sr seeding" begin - StochasticRounding.seed(12312311) - O = ones(Float32sr,10000) - one_third = O/3 - - # now reseed - StochasticRounding.seed(12312311) - @test O/3 == one_third -end +@testset "Seeding" begin + @testset for T in (BFloat16sr, Float16sr, Float32sr, Float64sr) + StochasticRounding.seed(12312311) + O = ones(T,10000) + one_third = O/3 + + # now reseed + StochasticRounding.seed(12312311) + @test O/3 == one_third + end +end \ No newline at end of file From 226e561f0c816abc726c3dc5e6f486092d71a485 Mon Sep 17 00:00:00 2001 From: Milan Date: Fri, 22 Dec 2023 12:27:43 +0100 Subject: [PATCH 02/23] add trunc --- src/general.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/general.jl b/src/general.jl index 3bf0ac5..7441442 100644 --- a/src/general.jl +++ b/src/general.jl @@ -86,8 +86,9 @@ end # same for BFloat16sr, but do not apply stochastic rounding to avoid InexactError Base.round(x::AbstractStochasticFloat, r::RoundingMode{:Up}) = stochastic_float(ceil(float(x))) Base.round(x::AbstractStochasticFloat, r::RoundingMode{:Down}) = stochastic_float(floor(float(x))) -Base.round(x::AbstractStochasticFloat, r::RoundingMode{:Nearest}) = stochastic_float(round(floor(float(x)))) +Base.round(x::AbstractStochasticFloat, r::RoundingMode{:Nearest}) = stochastic_float(round(float(x))) Base.round(x::AbstractStochasticFloat, r::RoundingMode{:ToZero}) = stochastic_float(round(float(x),RoundToZero)) +Base.trunc(::Type{T},x::AbstractStochasticFloat) where T = trunc(T,float(x)) # negation, and absolute Base.:(-)(x::AbstractStochasticFloat) = stochastic_float(-float(x)) From ce95495ae371b8062ea5a4ec594584eef0b4af59 Mon Sep 17 00:00:00 2001 From: Milan Date: Fri, 22 Dec 2023 12:28:03 +0100 Subject: [PATCH 03/23] promotion between stochastic and deterministic yields deterministic --- src/promotions.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/promotions.jl b/src/promotions.jl index 9cdb5f7..0c762be 100644 --- a/src/promotions.jl +++ b/src/promotions.jl @@ -5,4 +5,10 @@ Base.promote_rule(::Type{Float16sr}, ::Type{BFloat16sr}) = Float32sr Base.promote_rule(::Type{Float16sr}, ::Type{Float64sr}) = Float64sr Base.promote_rule(::Type{BFloat16sr}, ::Type{Float64sr}) = Float64sr -Base.promote_rule(::Type{Float32sr}, ::Type{Float64sr}) = Float64sr \ No newline at end of file +Base.promote_rule(::Type{Float32sr}, ::Type{Float64sr}) = Float64sr + +# (to be extended for new formats) + +# promotion between stochastic and deterministic floats yields deterministic +Base.promote_rule(::Type{S},::Type{D}) where {S<:AbstractStochasticFloat,D<:AbstractFloat} = + promote_type(float(S),D) \ No newline at end of file From b0b7763fe2dde15ed1649def2eeea3be088bcbb5 Mon Sep 17 00:00:00 2001 From: Milan Date: Fri, 22 Dec 2023 15:10:33 +0100 Subject: [PATCH 04/23] temporarily add nextfloat(::BFloat16) as BFloat16s.jl not released yet --- src/bfloat16.jl | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 src/bfloat16.jl diff --git a/src/bfloat16.jl b/src/bfloat16.jl new file mode 100644 index 0000000..8e4c4fe --- /dev/null +++ b/src/bfloat16.jl @@ -0,0 +1,39 @@ +# from BFloat16s.jl#main (22/12/23) to be removed with next release of BFloat16s.jl +function Base.nextfloat(f::BFloat16, d::Integer) + + F = typeof(f) + fumax = reinterpret(Base.uinttype(F), F(Inf)) + U = typeof(fumax) + + isnan(f) && return f + fi = reinterpret(Int16, f) + fneg = fi < 0 + fu = unsigned(fi & typemax(fi)) + + dneg = d < 0 + da = Base.abs(d) + if da > typemax(U) + fneg = dneg + fu = fumax + else + du = da % U + if fneg ⊻ dneg + if du > fu + fu = min(fumax, du - fu) + fneg = !fneg + else + fu = fu - du + end + else + if fumax - fu < du + fu = fumax + else + fu = fu + du + end + end + end + if fneg + fu |= Base.sign_mask(F) + end + reinterpret(F, fu) +end \ No newline at end of file From aec588a7acbb99dee87b818dd5b54fbf85fb4759 Mon Sep 17 00:00:00 2001 From: Milan Date: Fri, 22 Dec 2023 15:10:55 +0100 Subject: [PATCH 05/23] add Double64 to resolve ambiguity --- src/general.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/general.jl b/src/general.jl index 7441442..bddb21f 100644 --- a/src/general.jl +++ b/src/general.jl @@ -16,6 +16,7 @@ end (::Type{T})(x::Rational) where {T<:AbstractStochasticFloat} = stochastic_float(float(T)(x)) (::Type{T})(x::AbstractStochasticFloat) where {T<:AbstractFloat} = convert(T,float(x)) (::Type{T})(x::AbstractStochasticFloat) where {T<:AbstractStochasticFloat} = stochastic_float(convert(float(T),float(x))) +DoubleFloats.Double64(x::T) where T<:AbstractStochasticFloat = Double64(float(x)) # masks same as for deterministic floats Base.sign_mask(T::Type{<:AbstractStochasticFloat}) = Base.sign_mask(float(T)) From 196092bf0655bd7059e2688d55e226d25361b044 Mon Sep 17 00:00:00 2001 From: Milan Date: Fri, 22 Dec 2023 15:11:31 +0100 Subject: [PATCH 06/23] all tests also for BFloat16sr --- src/StochasticRounding.jl | 1 + test/bfloat16sr.jl | 192 ------------------------------ test/float16sr.jl | 217 --------------------------------- test/float32sr.jl | 188 ----------------------------- test/float64sr.jl | 46 ------- test/general_tests.jl | 244 ++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 4 +- 7 files changed, 246 insertions(+), 646 deletions(-) delete mode 100644 test/bfloat16sr.jl delete mode 100644 test/float16sr.jl delete mode 100644 test/float32sr.jl delete mode 100644 test/float64sr.jl create mode 100644 test/general_tests.jl diff --git a/src/StochasticRounding.jl b/src/StochasticRounding.jl index 53f6343..c4e61da 100644 --- a/src/StochasticRounding.jl +++ b/src/StochasticRounding.jl @@ -27,4 +27,5 @@ module StochasticRounding include("types.jl") # define concrete types include("promotions.jl") # their promotions include("general.jl") # and general functions + include("bfloat16.jl") # some functions that aren't yet in BFloat16s.jl end diff --git a/test/bfloat16sr.jl b/test/bfloat16sr.jl deleted file mode 100644 index a43d798..0000000 --- a/test/bfloat16sr.jl +++ /dev/null @@ -1,192 +0,0 @@ -all_types = (BFloat16sr, Float16sr, Float32sr) - -@testset for T in all_types - - @testset "Sign flip" begin - @test one(T) == -(-(one(T))) - @test zero(T) == -(zero(T)) - end - - @testset "Integer promotion" begin - f = T(1) - @test 2f == T(2) - @test 0f == T(0) - end - - @testset "NaN and Inf" begin - @test isnan(nan(T)) - @test ~isfinite(nan(T)) - @test ~isfinite(infinity(T)) - - N = 1000 - for i in 1:N - @test stochastic_round(T,Inf) == stochastic_round(T,Inf) - @test stochastic_round(T,-Inf) == stochastic_round(T,-Inf) - @test isnan(stochastic_round(T,NaN)) - end - end - - @testset "No stochastic round to NaN" begin - f1 = nextfloat(zero(T)) - f2 = prevfloat(zero(T)) - for i in 1:100 - @test isfinite(stochastic_round(T,f1)) - @test isfinite(stochastic_round(T,f2)) - end - end -end - -@testset "Odd floats are never round away" begin - N = 100_000 - - f_odd = prevfloat(one(BFloat16sr)) - f_odd_f32 = Float32(f_odd) - for _ = 1:N - @test f_odd == BFloat16sr(f_odd_f32) - @test f_odd == BFloat16_stochastic_round(f_odd_f32) - end -end - -@testset "Even floats are never round away" begin - N = 100_000 - - f_odd = prevfloat(prevfloat(one(BFloat16sr))) - f_odd_f32 = Float32(f_odd) - for _ = 1:N - @test f_odd == BFloat16sr(f_odd_f32) - @test f_odd == BFloat16_stochastic_round(f_odd_f32) - end -end - -@testset "Rounding" begin - @test 1 == Int(round(BFloat16sr(1.2))) - @test 1 == Int(floor(BFloat16sr(1.2))) - @test 2 == Int(ceil(BFloat16sr(1.2))) - - @test -1 == Int(round(BFloat16sr(-1.2))) - @test -2 == Int(floor(BFloat16sr(-1.2))) - @test -1 == Int(ceil(BFloat16sr(-1.2))) -end - -@testset "Nextfloat prevfloat" begin - o = one(BFloat16sr) - @test o == nextfloat(prevfloat(o)) - @test o == prevfloat(nextfloat(o)) -end - -@testset "Comparisons" begin - @test BFloat16sr(1) < BFloat16sr(2) - @test BFloat16sr(1f0) < BFloat16sr(2f0) - @test BFloat16sr(1.0) < BFloat16sr(2.0) - @test BFloat16sr(1) <= BFloat16sr(2) - @test BFloat16sr(1f0) <= BFloat16sr(2f0) - @test BFloat16sr(1.0) <= BFloat16sr(2.0) - @test BFloat16sr(2) > BFloat16sr(1) - @test BFloat16sr(2f0) > BFloat16sr(1f0) - @test BFloat16sr(2) >= BFloat16sr(1) - @test BFloat16sr(2f0) >= BFloat16sr(1f0) - @test BFloat16sr(2.0) >= BFloat16sr(1.0) -end - -@testset "1.0 always to 1.0" begin - for i = 1:10000 - @test 1.0f0 == Float32(BFloat16sr(1.0f0)) - @test 1.0f0 == Float32(BFloat16_stochastic_round(1.0f0)) - end -end - -function test_chances_round_bf16(f32::Float32;N::Int=100_000) - p = BFloat16_chance_roundup(f32) - - f16_round = BFloat16sr(f32) - if Float32(f16_round) <= f32 - f16_rounddown = f16_round - f16_roundup = nextfloat(f16_round) - else - f16_roundup = f16_round - f16_rounddown = prevfloat(f16_round) - end - - Ndown = 0 - Nup = 0 - for _ in 1:N - f16 = BFloat16_stochastic_round(f32) - if f16 == f16_rounddown - Ndown += 1 - elseif f16 == f16_roundup - Nup += 1 - end - end - - return Ndown,Nup,N,p -end - -@testset "Test for N(0,1)" begin - for x in randn(Float32,10_000) - Ndown,Nup,N,p = test_chances_round_bf16(x) - @test Ndown + Nup == N - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end -end - -@testset "Test for U(0,1)" begin - for x in rand(Float32,10_000) - Ndown,Nup,N,p = test_chances_round_bf16(x) - @test Ndown + Nup == N - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end -end - -@testset "Test for powers of two" begin - for x in Float32[2,4,8,16,32,64,128,256] - Ndown,Nup,N,p = test_chances_round_bf16(x) - @test Ndown + Nup == N - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end - - for x in -Float32[2,4,8,16,32,64,128,256] - Ndown,Nup,N,p = test_chances_round_bf16(x) - @test Ndown + Nup == N - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end - - for x in Float32[1/2,1/4,1/8,1/16,1/32,1/64,1/128,1/256] - Ndown,Nup,N,p = test_chances_round_bf16(x) - @test Ndown + Nup == N - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end - - for x in -Float32[1/2,1/4,1/8,1/16,1/32,1/64,1/128,1/256] - Ndown,Nup,N,p = test_chances_round_bf16(x) - @test Ndown + Nup == N - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end -end - -@testset "Test for subnormals" begin - - floatminBF16 = reinterpret(UInt32,Float32(floatmin(BFloat16sr))) - minposBF16 = reinterpret(UInt32,Float32(nextfloat(zero(BFloat16sr)))) - - N = 10_000 - subnormals = reinterpret.(Float32,rand(minposBF16:floatminBF16,N)) - for x in subnormals - Ndown,Nup,N,p = test_chances_round_bf16(x) - @test Ndown + Nup == N - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end - - for x in -subnormals - Ndown,Nup,N,p = test_chances_round_bf16(x) - @test Ndown + Nup == N - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end -end diff --git a/test/float16sr.jl b/test/float16sr.jl deleted file mode 100644 index 049852f..0000000 --- a/test/float16sr.jl +++ /dev/null @@ -1,217 +0,0 @@ -@testset "Sign flip" begin - @test one(Float16sr) == -(-(one(Float16sr))) - @test zero(Float16sr) == -(zero(Float16sr)) -end - -@testset "NaN and Inf" begin - @test isnan(NaN16sr) - @test ~isfinite(NaN16sr) - @test ~isfinite(Inf16sr) - - N = 1000 - for i in 1:N - @test Inf16sr == Float16_stochastic_round(Inf32) - @test -Inf16sr == Float16_stochastic_round(-Inf32) - @test isnan(Float16_stochastic_round(NaN32)) - end -end - -@testset "No stochastic round to NaN" begin - f1 = nextfloat(0f0) - f2 = prevfloat(0f0) - for i in 1:100 - @test isfinite(Float16_stochastic_round(f1)) - @test isfinite(Float16_stochastic_round(f2)) - end -end - -@testset "Integer promotion" begin - f = Float16sr(1) - @test 2f == Float16sr(2) - @test 0 == Float16sr(0) -end - -@testset "Rounding" begin - @test 1 == Int(round(Float16sr(1.2))) - @test 1 == Int(floor(Float16sr(1.2))) - @test 2 == Int(ceil(Float16sr(1.2))) - - @test -1 == Int(round(Float16sr(-1.2))) - @test -2 == Int(floor(Float16sr(-1.2))) - @test -1 == Int(ceil(Float16sr(-1.2))) -end - -@testset "Nextfloat prevfloat" begin - o = one(Float16sr) - @test o == nextfloat(prevfloat(o)) - @test o == prevfloat(nextfloat(o)) -end - -@testset "Odd floats are never rounded away" begin - # chances are 2^-29 = 2e-9 - # so technically one should use N = 100_000_000 - N = 100_000 - - f_odd = prevfloat(one(Float16sr)) - f_odd_f32 = Float32(f_odd) - for _ = 1:N - @test f_odd == Float16sr(f_odd_f32) - @test f_odd == Float16_stochastic_round(f_odd_f32) - end -end - -@testset "Even floats are never round away" begin - # chances are 2^-29 = 2e-9 - # so technically one should use N = 100_000_000 - N = 100_000 - - f_odd = prevfloat(prevfloat(one(Float16sr))) - f_odd_f32 = Float32(f_odd) - for _ = 1:N - @test f_odd == Float16sr(f_odd_f32) - @test f_odd == Float16_stochastic_round(f_odd_f32) - end -end - -function test_chances_round(f32::Float32;N::Int=100_000) - p = Float16_chance_roundup(f32) - - f16_round = Float16sr(f32) - - if Float32(f16_round) == f32 - f16_roundup=f16_round - f16_rounddown=f16_round - elseif Float32(f16_round) < f32 - f16_rounddown = f16_round - f16_roundup = nextfloat(f16_round) - else - f16_roundup = f16_round - f16_rounddown = prevfloat(f16_round) - end - - Ndown = 0 - Nup = 0 - for _ in 1:N - f16 = Float16_stochastic_round(f32) - if f16 == f16_rounddown - Ndown += 1 - elseif f16 == f16_roundup - Nup += 1 - end - end - - return Ndown,Nup,N,p -end - -@testset "Test for N(0,1)" begin - for x in randn(Float32,10_000) - # exclude subnormals from testing, they are tested further down - if ~issubnormal(Float16(x)) - Ndown,Nup,N,p = test_chances_round(x) - @test Ndown + Nup == N - Ndown + Nup != N && @info "Test failed for $x, $(bitstring(Float16(x)))" - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end - end -end - -@testset "Test for U(0,1)" begin - for x in rand(Float32,10_000) - # exclude subnormals from testing, they are tested further down - if ~issubnormal(Float16(x)) - Ndown,Nup,N,p = test_chances_round(x) - @test Ndown + Nup == N - Ndown + Nup != N && @info "Test failed for $x, $(bitstring(Float16(x)))" - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end - end -end - -@testset "Test for powers of two" begin - for x in Float32[2,4,8,16,32,64,128,256] - Ndown,Nup,N,p = test_chances_round(x) - @test Ndown + Nup == N - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end - - for x in -Float32[2,4,8,16,32,64,128,256] - Ndown,Nup,N,p = test_chances_round(x) - @test Ndown + Nup == N - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end - - for x in Float32[1/2,1/4,1/8,1/16,1/32,1/64,1/128,1/256] - Ndown,Nup,N,p = test_chances_round(x) - @test Ndown + Nup == N - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end - - for x in -Float32[1/2,1/4,1/8,1/16,1/32,1/64,1/128,1/256] - Ndown,Nup,N,p = test_chances_round(x) - @test Ndown + Nup == N - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end -end - -@testset "Test for subnormals" begin - - floatminF16 = reinterpret(UInt32,Float32(floatmin(Float16))) - minposF16 = reinterpret(UInt32,Float32(nextfloat(zero(Float16)))) - - N = 10_000 - subnormals = reinterpret.(Float32,rand(minposF16:floatminF16,N)) - for x in subnormals - Ndown,Nup,N,p = test_chances_round(x) - @test Ndown + Nup == N - Ndown + Nup != N && @info "Test failed for $x, $(bitstring(Float16(x)))" - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end - - for x in -subnormals - Ndown,Nup,N,p = test_chances_round(x) - @test Ndown + Nup == N - Ndown + Nup != N && @info "Test failed for $x, $(bitstring(Float16(x)))" - @test isapprox(Ndown/N,1-p,atol=2e-2) - @test isapprox(Nup/N,p,atol=2e-2) - end -end - -# USED TO TEST SOME SUBNORMALS MANUALLY -# const ϵ = prevfloat(Float32(nextfloat(zero(Float16))),1) - -# function g(x::Float32,r::Float32) -# # x + ϵ*(r-1.5f0) -# x + randfloat(reinterpret(UInt32,r)) -# end - -# function randfloat(ui::UInt32) - -# lz = leading_zeros(ui) -# e = ((101 - lz) % UInt32) << 23 -# e |= (ui << 31) - -# # combine exponent and significand -# return reinterpret(Float32,e | (ui & 0x007f_ffff)) -# end - - -# function h(x::Float32) -# # deterministically rounded correct value in uint16 -# c = Int(reinterpret(UInt16,Float16(x))) - -# v = zeros(Int16,2^23) -# r = 1f0 - -# for i in 0:2^23-1 -# v[i+1] = Int(reinterpret(UInt16,Float16(g(x,r)))) - c -# r = nextfloat(r) -# end -# return v -# end \ No newline at end of file diff --git a/test/float32sr.jl b/test/float32sr.jl deleted file mode 100644 index d4bc02e..0000000 --- a/test/float32sr.jl +++ /dev/null @@ -1,188 +0,0 @@ -@testset "1.0 always to 1.0" begin - N = 10000 - - for i = 1:N - @test 1.0f0 == Float32(Float32sr(1.0)) - @test 1.0f0 == Float32(Float32_stochastic_round(1.0)) - end -end - -@testset "Odd floats are never round away" begin - # chances are 2^-28 = 4e-9 - # so technically one should use N > 100_000_000 but takes too long - N = 100_000 - - f_odd = prevfloat(one(Float32sr)) - f_odd_f64 = Float64(f_odd) - for _ = 1:N - @test f_odd == Float32sr(f_odd_f64) - @test f_odd == Float32_stochastic_round(f_odd_f64) - end -end - -@testset "Even floats are never round away" begin - # chances are 2^-28 = 4e-9 - # so technically one should use N > 100_000_000 but takes too long - N = 100_000 - - f_odd = prevfloat(prevfloat(one(Float32sr))) - f_odd_f64 = Float64(f_odd) - for _ = 1:N - @test f_odd == Float32sr(f_odd_f64) - @test f_odd == Float32_stochastic_round(f_odd_f64) - end -end - -@testset "Sign flip" begin - @test one(Float32sr) == -(-(one(Float32sr))) - @test zero(Float32sr) == -(zero(Float32sr)) -end - -@testset "Integer promotion" begin - f = Float32sr(1) - @test 2f == Float32sr(2) - @test 0 == Float32sr(0) -end - -@testset "Rounding" begin - @test 1 == Int(round(Float32sr(1.2))) - @test 1 == Int(floor(Float32sr(1.2))) - @test 2 == Int(ceil(Float32sr(1.2))) - - @test -1 == Int(round(Float32sr(-1.2))) - @test -2 == Int(floor(Float32sr(-1.2))) - @test -1 == Int(ceil(Float32sr(-1.2))) -end - -@testset "Nextfloat prevfloat" begin - o = one(Float32sr) - @test o == nextfloat(prevfloat(o)) - @test o == prevfloat(nextfloat(o)) -end - -@testset "Comparisons" begin - @test Float32sr(1f0) < Float32sr(2f0) - @test Float32sr(1) < Float32sr(2) - @test Float32sr(1.0) < Float32sr(2.0) - @test Float32sr(1) <= Float32sr(2) - @test Float32sr(1f0) <= Float32sr(2f0) - @test Float32sr(1.0) <= Float32sr(2.0) - @test Float32sr(2) > Float32sr(1) - @test Float32sr(2f0) > Float32sr(1f0) - @test Float32sr(2) >= Float32sr(1) - @test Float32sr(2f0) >= Float32sr(1f0) - @test Float32sr(2.0) >= Float32sr(1.0) -end - -@testset "NaN and Inf" begin - @test isnan(NaN32sr) - @test ~isfinite(NaN32sr) - @test ~isfinite(Inf32sr) - @test isnan(Float32_stochastic_round(NaN)) - @test Inf32 == Float32(Float32_stochastic_round(Inf)) - @test -Inf32 == Float32(Float32_stochastic_round(-Inf)) - - N = 1000 - for i in 1:N - @test Inf32sr == Float32_stochastic_round(Inf) - @test -Inf32sr == Float32_stochastic_round(-Inf) - @test isnan(Float32_stochastic_round(NaN)) - end -end - -@testset "No stochastic round to NaN" begin - f1 = nextfloat(0.0) - f2 = prevfloat(0.0) - for i in 1:1000 - @test isfinite(Float32_stochastic_round(f1)) - @test isfinite(Float32_stochastic_round(f2)) - end -end - -@testset "No stochastic round to NaN" begin - f1 = Float64(floatmax(Float32)) # larger can map to Inf though! - f2 = -f1 - for i in 1:1000 - @test isfinite(Float32_stochastic_round(f1)) - @test isfinite(Float32_stochastic_round(f2)) - end -end - -function test_chances_round(f64::Float64;N::Int=100_000) - p = Float32_chance_roundup(f64) - - f32_round = Float32sr(f64) - if Float64(f32_round) == f64 - f32_roundup = f32_round - f32_rounddown = f32_round - elseif Float64(f32_round) < f64 - f32_rounddown = f32_round - f32_roundup = nextfloat(f32_round) - else - f32_roundup = f32_round - f32_rounddown = prevfloat(f32_round) - end - - Ndown = 0 - Nup = 0 - for _ in 1:N - f32 = Float32_stochastic_round(f64) - if f32 == f32_rounddown - Ndown += 1 - elseif f32 == f32_roundup - Nup += 1 - end - end - - test1 = Ndown + Nup == N - test2 = isapprox(Ndown/N,1-p,atol=1e-2) - test3 = isapprox(Nup/N,p,atol=1e-2) - - return test1 && test2 && test3 -end - -@testset "Test for N(0,1)" begin - for x in randn(10_000) - @test test_chances_round(x) - end -end - -@testset "Test for U(0,1)" begin - for x in rand(10_000) - @test test_chances_round(x) - end -end - -@testset "Test for powers of two" begin - for x in Float64[2,4,8,16,32,64,128,256] - @test test_chances_round(x) - end - - for x in -Float64[2,4,8,16,32,64,128,256] - @test test_chances_round(x) - end - - for x in Float64[1/2,1/4,1/8,1/16,1/32,1/64,1/128,1/256] - @test test_chances_round(x) - end - - for x in -Float64[1/2,1/4,1/8,1/16,1/32,1/64,1/128,1/256] - @test test_chances_round(x) - end -end - -@testset "Test for subnormals" begin - - floatminF32 = reinterpret(UInt64,Float64(floatmin(Float32))) - minposF32 = reinterpret(UInt64,Float64(nextfloat(zero(Float32)))) - - N = 5000 - subnormals = reinterpret.(Float64,rand(minposF32:floatminF32,N)) - for x in subnormals - @test test_chances_round(x) - end - - for x in -subnormals - @test test_chances_round(x) - end -end \ No newline at end of file diff --git a/test/float64sr.jl b/test/float64sr.jl deleted file mode 100644 index 6a42c0b..0000000 --- a/test/float64sr.jl +++ /dev/null @@ -1,46 +0,0 @@ -using DoubleFloats - -function test_chances_round(f128::Double64;N::Int=100_000) - p = Float64_chance_roundup(f128) - - f64_round = Float64sr(f128) - if Double64(f64_round) == f128 - f64_roundup = f64_round - f64_rounddown = f64_round - elseif Double64(f64_round) < f128 - f64_rounddown = f64_round - f64_roundup = nextfloat(f64_round) - else - f64_roundup = f64_round - f64_rounddown = prevfloat(f64_round) - end - - Ndown = 0 - Nup = 0 - for _ in 1:N - f64 = Float64_stochastic_round(f128) - if f64 == f64_rounddown - Ndown += 1 - elseif f64 == f64_roundup - Nup += 1 - end - end - - test1 = Ndown + Nup == N - test2 = isapprox(Ndown/N,1-p,atol=1e-2) - test3 = isapprox(Nup/N,p,atol=1e-2) - - return test1 && test2 && test3 -end - -@testset "Test for N(0,1)" begin - for x in randn(Double64,10_000) - @test test_chances_round(x) - end -end - -@testset "Test for U(0,1)" begin - for x in rand(Double64,10_000) - @test test_chances_round(x) - end -end \ No newline at end of file diff --git a/test/general_tests.jl b/test/general_tests.jl new file mode 100644 index 0000000..118442c --- /dev/null +++ b/test/general_tests.jl @@ -0,0 +1,244 @@ +@testset for T in (BFloat16sr,Float16sr, Float32sr,Float64sr) + @testset "Sign flip" begin + @test one(T) == -(-(one(T))) + @test zero(T) == -(zero(T)) + end +end + +#TODO Float64sr can round down for powers of two +@testset for T in (BFloat16sr, Float16sr, Float32sr) + @testset "Integer promotion" begin + f = T(1) + @test 2f == T(2) + @test 0f == T(0) + end +end + +#TODO for Float64sr, stochastic_round(Float64sr,Inf) = NaN atm +@testset for T in (BFloat16sr, Float16sr, Float32sr) + @testset "NaN and Inf" begin + @test isnan(StochasticRounding.nan(T)) + @test ~isfinite(StochasticRounding.nan(T)) + @test ~isfinite(StochasticRounding.infinity(T)) + + N = 1000 + for i in 1:N + @test stochastic_round(T,Inf) == stochastic_round(T,Inf) + @test stochastic_round(T,-Inf) == stochastic_round(T,-Inf) + @test isnan(stochastic_round(T,NaN)) + end + end +end + +@testset for T in (BFloat16sr, Float16sr, Float32sr, Float64sr) + @testset "No stochastic round to NaN" begin + f1 = nextfloat(zero(T)) + f2 = prevfloat(zero(T)) + for i in 1:100 + @test isfinite(stochastic_round(T,f1)) + @test isfinite(stochastic_round(T,f2)) + end + end +end + +@testset for T in (BFloat16sr, Float16sr, Float32sr,Float64sr) + @testset "Odd floats are never rounded away" begin + N = 100_000 + + f_odd = prevfloat(one(T)) + f_odd_widened = widen(f_odd) + for _ = 1:N + @test f_odd == T(f_odd_widened) + @test f_odd == stochastic_round(T,f_odd_widened) + end + end +end + +#TODO Float64sr can round down for powers of two +@testset for T in (BFloat16sr, Float16sr, Float32sr) + @testset "Even floats are never round away" begin + N = 100_000 + + f_even = prevfloat(one(T),2) + f_even_widened = Float32(f_even) + for _ = 1:N + @test f_even == T(f_even_widened) + @test f_even == stochastic_round(T,f_even_widened) + end + end +end + +@testset for T in (BFloat16sr, Float16sr, Float32sr, Float64sr) + @testset "Rounding" begin + @test 1 == round(Int,T(1.2)) + @test 1 == floor(Int,T(1.2)) + @test 2 == ceil(Int,T(1.2)) + + @test -1 == round(Int,T(-1.2)) + @test -2 == floor(Int,T(-1.2)) + @test -1 == ceil(Int,T(-1.2)) + end +end + +@testset for T in (BFloat16sr, Float16sr, Float32sr, Float64sr) + @testset "Nextfloat prevfloat" begin + o = one(T) + @test o == nextfloat(prevfloat(o)) + @test o == prevfloat(nextfloat(o)) + end +end + +@testset for T in (BFloat16sr, Float16sr, Float32sr, Float64sr) + @testset "Comparisons" begin + @test T(1) < T(2) + @test T(1f0) < T(2f0) + @test T(1.0) < T(2.0) + @test T(1) <= T(2) + @test T(1f0) <= T(2f0) + @test T(1.0) <= T(2.0) + @test T(2) > T(1) + @test T(2f0) > T(1f0) + @test T(2) >= T(1) + @test T(2f0) >= T(1f0) + @test T(2.0) >= T(1.0) + end +end + +#TODO Float64sr can round down for powers of two +@testset for T in (BFloat16sr, Float16sr, Float32sr) + @testset "1.0 always to 1.0" begin + for i = 1:10000 + @test 1 == T(1) + @test 1 == stochastic_round(T,1.0f0) + end + end +end + +function test_chances_round( + T::Type{<:AbstractStochasticFloat}, + f::AbstractFloat; + N::Int=100_000 +) + + p = chance_roundup(T,f) + f_round = T(f) + + if f_round <= f + f_rounddown = f_round + f_roundup = nextfloat(f_round) + else + f_roundup = f_round + f_rounddown = prevfloat(f_round) + end + + Ndown = 0 + Nup = 0 + for _ in 1:N + f_sr = stochastic_round(T,f) + if f_sr == f_rounddown + Ndown += 1 + elseif f_sr == f_roundup + Nup += 1 + end + end + + return Ndown,Nup,N,p +end + +@testset for T in (BFloat16sr, Float16sr, Float32sr, Float64sr) + @testset "Test for N(0,1)" begin + for x in randn(widen(T),10_000) + Ndown,Nup,N,p = test_chances_round(T,x) + @test Ndown + Nup == N + @test isapprox(Ndown/N,1-p,atol=2e-2) + @test isapprox(Nup/N,p,atol=2e-2) + end + end +end + +@testset for T in (BFloat16sr, Float16sr, Float32sr, Float64sr) + @testset "Test for U(0,1)" begin + for x in rand(widen(T),10_000) + Ndown,Nup,N,p = test_chances_round(T,x) + @test Ndown + Nup == N + @test isapprox(Ndown/N,1-p,atol=2e-2) + @test isapprox(Nup/N,p,atol=2e-2) + end + end +end + +#TODO Float64sr can round down for powers of two +@testset for T in (BFloat16sr, Float16sr, Float32sr) + @testset "Test for powers of two" begin + for x in [2,4,8,16,32,64,128,256] + Ndown,Nup,N,p = test_chances_round(T,widen(T)(x)) + @test Ndown + Nup == N + @test isapprox(Ndown/N,1-p,atol=2e-2) + @test isapprox(Nup/N,p,atol=2e-2) + + # negative + Ndown,Nup,N,p = test_chances_round(T,widen(T)(-x)) + @test Ndown + Nup == N + @test isapprox(Ndown/N,1-p,atol=2e-2) + @test isapprox(Nup/N,p,atol=2e-2) + + # inverse + Ndown,Nup,N,p = test_chances_round(T,widen(T)(inv(x))) + @test Ndown + Nup == N + @test isapprox(Ndown/N,1-p,atol=2e-2) + @test isapprox(Nup/N,p,atol=2e-2) + + # negative inverse + Ndown,Nup,N,p = test_chances_round(T,widen(T)(-inv(x))) + @test Ndown + Nup == N + @test isapprox(Ndown/N,1-p,atol=2e-2) + @test isapprox(Nup/N,p,atol=2e-2) + end + end +end + +# skip Float64sr because Double64 subnormals are the same as Float64 subnormals +# = deterministic rounding in those cases anyway +@testset for T in (BFloat16sr, Float16sr, Float32sr) + @testset "Test for subnormals" begin + + N = 10_000 + + if T == Float64sr + ui = Base.uinttype(Float64) + floatminT = reinterpret(ui,floatmin(Float64sr)) + minposT = reinterpret(ui,minpos(Float64sr)) + lows = reinterpret.(Float64,rand(minposT:floatminT,N)) + subnormals = [Double64(low) for low in lows] + else + ui = Base.uinttype(widen(T)) + floatminT = reinterpret(ui,widen(floatmin(T))) + minposT = reinterpret(ui,widen(minpos(T))) + subnormals = reinterpret.(widen(T),rand(minposT:floatminT,N)) + end + + for x in subnormals + Ndown,Nup,N,p = test_chances_round(T,x) + @test Ndown + Nup == N + @test isapprox(Ndown/N,1-p,atol=2e-2) + @test isapprox(Nup/N,p,atol=2e-2) + end + + for x in -subnormals + Ndown,Nup,N,p = test_chances_round(T,x) + @test Ndown + Nup == N + @test isapprox(Ndown/N,1-p,atol=2e-2) + @test isapprox(Nup/N,p,atol=2e-2) + end + end +end + + + + + + + + + + diff --git a/test/runtests.jl b/test/runtests.jl index 0d9ba4b..d7dfbe0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,9 +4,7 @@ using DifferentialEquations import BFloat16s: BFloat16 -include("bfloat16sr.jl") -include("float16sr.jl") -include("float32sr.jl") +include("general_tests.jl") include("conversions.jl") include("seeding.jl") include("differential_equations.jl") \ No newline at end of file From 6247df2f351075059427927608906bc610e0edb9 Mon Sep 17 00:00:00 2001 From: Milan Date: Fri, 22 Dec 2023 15:24:39 +0100 Subject: [PATCH 07/23] decompose added for BFloat16 (not yet in BFloat16s.jl) --- src/bfloat16.jl | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/bfloat16.jl b/src/bfloat16.jl index 8e4c4fe..e9a1716 100644 --- a/src/bfloat16.jl +++ b/src/bfloat16.jl @@ -36,4 +36,15 @@ function Base.nextfloat(f::BFloat16, d::Integer) fu |= Base.sign_mask(F) end reinterpret(F, fu) +end + +function Base.decompose(x::BFloat16)::NTuple{3,Int} + isnan(x) && return 0, 0, 0 + isinf(x) && return ifelse(x < 0, -1, 1), 0, 0 + n = reinterpret(UInt16, x) + s = (n & 0x007f) % Int16 + e = ((n & 0x7f80) >> 7) % Int + s |= Int16(e != 0) << 7 + d = ifelse(signbit(x), -1, 1) + s, e - 134 + (e == 0), d end \ No newline at end of file From 98d624908b272ecff0716b80ef89cdd0f9f5513a Mon Sep 17 00:00:00 2001 From: Milan Date: Fri, 22 Dec 2023 16:05:46 +0100 Subject: [PATCH 08/23] add test failed feedback for Float16sr --- src/bfloat16.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bfloat16.jl b/src/bfloat16.jl index e9a1716..24fd921 100644 --- a/src/bfloat16.jl +++ b/src/bfloat16.jl @@ -11,7 +11,7 @@ function Base.nextfloat(f::BFloat16, d::Integer) fu = unsigned(fi & typemax(fi)) dneg = d < 0 - da = Base.abs(d) + da = Base.uabs(d) if da > typemax(U) fneg = dneg fu = fumax From 20aa6782674afa4b80db6b66bcc627156632262f Mon Sep 17 00:00:00 2001 From: Milan Date: Fri, 22 Dec 2023 18:41:29 +0100 Subject: [PATCH 09/23] Resolve Float16sr round away subnormal error --- src/types.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/types.jl b/src/types.jl index c4b3504..25321c8 100644 --- a/src/types.jl +++ b/src/types.jl @@ -49,14 +49,18 @@ end Create a random perturbation for the Float16 subnormals for stochastic rounding of Float32 -> Float16. This function samples uniformly from [-2.980232f-8,2.9802319f-8]. += [-u/2,u/2]. This function is algorithmically similar to randfloat from RandomNumbers.jl""" function rand_subnormal(rbits::UInt32) lz = leading_zeros(rbits) # count leading zeros for correct probabilities of exponent e = ((101 - lz) % UInt32) << 23 e |= (rbits << 31) # use last bit for sign - + # combine exponent with random mantissa - return reinterpret(Float32,e | (rbits & 0x007f_ffff)) + # the mask should be 0x007f_ffff but that can create a + # Float16-representable number to be rounded away at very low chance + # hence tweak the mask so that the largest perturbation is tiny bit smaller + return reinterpret(Float32,e | (rbits & 0x007f_fdff)) end # FLOAT32 + STOCHASTIC ROUNDING, DEFINE EVERYTHING SPECIFIC From 5028a54eb620f8730ad7bcdc09365d473245723f Mon Sep 17 00:00:00 2001 From: Milan Date: Fri, 22 Dec 2023 18:41:43 +0100 Subject: [PATCH 10/23] tests with more feedback if fail --- test/general_tests.jl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test/general_tests.jl b/test/general_tests.jl index 118442c..96db828 100644 --- a/test/general_tests.jl +++ b/test/general_tests.jl @@ -139,6 +139,8 @@ function test_chances_round( Ndown += 1 elseif f_sr == f_roundup Nup += 1 + else + @info "$f although in [$f_rounddown,$f_roundup] was rounded to $f_sr" end end @@ -147,12 +149,20 @@ end @testset for T in (BFloat16sr, Float16sr, Float32sr, Float64sr) @testset "Test for N(0,1)" begin - for x in randn(widen(T),10_000) + N = 10_000 + # p_ave = 0 # average absolute error in rounding chances + # p_max = 0 # max abs error in rounding chances + for x in randn(widen(T),N) Ndown,Nup,N,p = test_chances_round(T,x) @test Ndown + Nup == N + Ndown + Nup != N && @info "Test failed for $x, $(bitstring(x))" @test isapprox(Ndown/N,1-p,atol=2e-2) @test isapprox(Nup/N,p,atol=2e-2) + p_ave += abs(Nup/N - p) + p_max = max(p_max,abs(Nup/N - p)) end + # @info p_ave/N + # @info p_max end end @@ -161,6 +171,7 @@ end for x in rand(widen(T),10_000) Ndown,Nup,N,p = test_chances_round(T,x) @test Ndown + Nup == N + Ndown + Nup != N && @info "Test failed for $x, $(bitstring(x))" @test isapprox(Ndown/N,1-p,atol=2e-2) @test isapprox(Nup/N,p,atol=2e-2) end From a5e48d7b91ca0806efe8c5adbed4655adc12dd84 Mon Sep 17 00:00:00 2001 From: Milan Date: Fri, 22 Dec 2023 18:50:41 +0100 Subject: [PATCH 11/23] forgot to comment checks in tests --- test/general_tests.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/general_tests.jl b/test/general_tests.jl index 96db828..8fcaa32 100644 --- a/test/general_tests.jl +++ b/test/general_tests.jl @@ -158,8 +158,8 @@ end Ndown + Nup != N && @info "Test failed for $x, $(bitstring(x))" @test isapprox(Ndown/N,1-p,atol=2e-2) @test isapprox(Nup/N,p,atol=2e-2) - p_ave += abs(Nup/N - p) - p_max = max(p_max,abs(Nup/N - p)) + # p_ave += abs(Nup/N - p) + # p_max = max(p_max,abs(Nup/N - p)) end # @info p_ave/N # @info p_max From 3dae9347e1afa75cd6c3f43bafe2a0da1d62c565 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Kl=C3=B6wer?= Date: Fri, 22 Dec 2023 18:53:38 +0100 Subject: [PATCH 12/23] bump to v0.8 --- Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index f201f5d..74fbd84 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StochasticRounding" uuid = "3843c9a1-1f18-49ff-9d99-1b4c8a8e97ed" -authors = ["Milan Kloewer"] -version = "0.7" +authors = ["Milan Kloewer and StochasticRounding.jl contributors"] +version = "0.8" [deps] BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" From 18166be2e1b67123c506dec0899c441cea9b6ca2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Kl=C3=B6wer?= Date: Fri, 22 Dec 2023 19:12:43 +0100 Subject: [PATCH 13/23] Update README.md --- README.md | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 0816731..fad166f 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Stochastic rounding for floating-point arithmetic. -This package exports `Float64sr`, `Float32sr`,`Float16sr`, and `BFloat16sr`, three number formats that behave +This package exports `Float64sr`, `Float32sr`,`Float16sr`, and `BFloat16sr`, four number formats that behave like their deterministic counterparts but with stochastic rounding that is proportional to the distance of the next representable numbers and therefore [exact in expectation](https://en.wikipedia.org/wiki/Rounding#Stochastic_rounding) @@ -35,8 +35,25 @@ ask questions or suggest any changes or new features. ## Usage -`Float64sr`, `Float32sr`, `Float16sr` and `BFloat16sr` are supposed to be drop-in replacements for their -deterministically rounded counterparts. You can create data of those types as expected +StochasticRounding.jl defines `stochastic_round` as a general interface to round stochastically to +a floating-point number, e.g. +```julia +julia> stochastic_round(Float32,π) +3.1415927f0 + +julia> stochastic_round(Float32,π) +3.1415925f0 +``` +Here we round π (which is first converted to Float64) to Float32 where it is not exactly representable, +hence we have results that differ in the last bit. The chance here is +```julia +julia> chance_roundup(Float32,π) +0.6333222836256027 +``` +about 63% to obtain the first (which therefore would be round to nearest, the default rounding mode), +and hence 37% to obtain the latter (round away from nearest). The `stochastic_round` function is wrapped +into the floating-point formats `Float64sr`, `Float32sr`, `Float16sr` and `BFloat16sr` are supposed to be drop-in +replacements for their deterministically rounded counterparts. You can create data of those types as expected (which is bitwise identical to the deterministic formats respectively) and the type will trigger stochastic rounding on every arithmetic operation. @@ -50,7 +67,8 @@ BFloat16sr(0.33203125) ``` As `1/3` is not exactly representable the rounding will be at 66.6% chance towards 0.33398438 and at 33.3% towards 0.33203125 such that in expectation the result is 0.33333... and therefore exact. -You can use `BFloat16_chance_roundup(x::Float32)` to get the chance that `x` will be rounded up. + +## Example Solving a linear equation system with LU decomposition and stochastic rounding: ```julia From 7654e3aa2408ccab3ecb66018e4ac91ce774613b Mon Sep 17 00:00:00 2001 From: Milan Date: Fri, 22 Dec 2023 19:15:29 +0100 Subject: [PATCH 14/23] BFloat16(::BigFloat) added --- src/bfloat16.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/bfloat16.jl b/src/bfloat16.jl index 24fd921..aad6e4b 100644 --- a/src/bfloat16.jl +++ b/src/bfloat16.jl @@ -47,4 +47,6 @@ function Base.decompose(x::BFloat16)::NTuple{3,Int} s |= Int16(e != 0) << 7 d = ifelse(signbit(x), -1, 1) s, e - 134 + (e == 0), d -end \ No newline at end of file +end + +BFloat16s.BFloat16(x::BigFloat) = Float32(x) \ No newline at end of file From ab686f34f5423a3d5f52f0c1ee21230dbcbfdad6 Mon Sep 17 00:00:00 2001 From: Milan Date: Fri, 22 Dec 2023 19:15:47 +0100 Subject: [PATCH 15/23] stochastic_round, chance_roundup also for ::Real --- src/general.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/general.jl b/src/general.jl index bddb21f..8991b3e 100644 --- a/src/general.jl +++ b/src/general.jl @@ -97,7 +97,7 @@ Base.abs(x::AbstractStochasticFloat) = stochastic_float(abs(float(x))) # stochastic rounding export stochastic_round -stochastic_round(T::Type{<:AbstractFloat},x::AbstractFloat) = stochastic_round(T,widen(stochastic_float(T))(x)) +stochastic_round(T::Type{<:AbstractFloat},x::Real) = stochastic_round(T,widen(stochastic_float(T))(x)) stochastic_round(T::Type{<:AbstractStochasticFloat},x::AbstractFloat) = stochastic_float(stochastic_round(float(T),x)) # Comparison @@ -129,6 +129,7 @@ Base.nextfloat(x::AbstractStochasticFloat,n::Integer) = stochastic_float(nextflo Base.prevfloat(x::AbstractStochasticFloat,n::Integer) = stochastic_float(prevfloat(float(x),n)) export chance_roundup +chance_roundup(T::Type{<:AbstractFloat},x::Real) = chance_roundup(stochastic_float(T),x) function chance_roundup(T::Type{<:AbstractStochasticFloat},x::Real) x = widen(T)(x) xround = float(T)(x) @@ -137,4 +138,5 @@ function chance_roundup(T::Type{<:AbstractStochasticFloat},x::Real) (prevfloat(xround),xround) return (x-xround_down)/(xround_up-xround_down) -end \ No newline at end of file +end + From 234e2dab38de8592f5f5ac187cbb0bad2d322786 Mon Sep 17 00:00:00 2001 From: Milan Date: Sun, 24 Dec 2023 13:56:19 +0100 Subject: [PATCH 16/23] import BFloat16s was missing --- src/StochasticRounding.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StochasticRounding.jl b/src/StochasticRounding.jl index c4e61da..e75cbc1 100644 --- a/src/StochasticRounding.jl +++ b/src/StochasticRounding.jl @@ -1,7 +1,7 @@ module StochasticRounding # use BFloat16 from BFloat16s.jl - import BFloat16s: BFloat16 + import BFloat16s: BFloat16s, BFloat16 # faster random number generator import RandomNumbers.Xorshifts.Xoroshiro128Plus From 17f631eb75439c487829fbd37941028ba0f75e10 Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 28 Dec 2023 15:17:08 +0100 Subject: [PATCH 17/23] add BFloat16(::BigFloat) --- src/bfloat16.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bfloat16.jl b/src/bfloat16.jl index aad6e4b..4f1e069 100644 --- a/src/bfloat16.jl +++ b/src/bfloat16.jl @@ -49,4 +49,4 @@ function Base.decompose(x::BFloat16)::NTuple{3,Int} s, e - 134 + (e == 0), d end -BFloat16s.BFloat16(x::BigFloat) = Float32(x) \ No newline at end of file +BFloat16s.BFloat16(x::BigFloat) = BFloat16(Float32(x)) \ No newline at end of file From 035ef104cc8eeee12de15cd3094185c1ebb76586 Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 28 Dec 2023 16:32:24 +0100 Subject: [PATCH 18/23] BigFloat(::BFloat16) added --- src/bfloat16.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bfloat16.jl b/src/bfloat16.jl index 4f1e069..2cba2e5 100644 --- a/src/bfloat16.jl +++ b/src/bfloat16.jl @@ -49,4 +49,5 @@ function Base.decompose(x::BFloat16)::NTuple{3,Int} s, e - 134 + (e == 0), d end -BFloat16s.BFloat16(x::BigFloat) = BFloat16(Float32(x)) \ No newline at end of file +BFloat16s.BFloat16(x::BigFloat) = BFloat16(Float32(x)) +Base.BigFloat(x::BFloat16) = BigFloat(Float32(x)) \ No newline at end of file From f5dde71a81cc80a97c0d700c7fd7dd166ae18162 Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 28 Dec 2023 16:33:05 +0100 Subject: [PATCH 19/23] Random samplers added for Complex{<:AbstractStochasticFloat} rand, randn --- src/general.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/general.jl b/src/general.jl index 8991b3e..b1d6069 100644 --- a/src/general.jl +++ b/src/general.jl @@ -40,6 +40,11 @@ Base.randn(::Type{T},dims::Integer...) where {T<:AbstractStochasticFloat} = rein Base.zeros(::Type{T},dims::Integer...) where {T<:AbstractStochasticFloat} = reinterpret.(T,zeros(float(T),dims...)) Base.ones(::Type{T},dims::Integer...) where {T<:AbstractStochasticFloat} = reinterpret.(T,ones(float(T),dims...)) +import Random: rand, randn, randexp, AbstractRNG, Sampler +rand( rng::AbstractRNG, ::Sampler{T}) where {T<:AbstractStochasticFloat} = convert(T, rand(rng)) +randn( rng::AbstractRNG, ::Type{T}) where {T<:AbstractStochasticFloat} = convert(T, randn(rng)) +randexp(rng::AbstractRNG, ::Type{T}) where {T<:AbstractStochasticFloat} = convert(T, randexp(rng)) + Base.iszero(x::AbstractStochasticFloat) = iszero(float(x)) Base.isfinite(x::AbstractStochasticFloat) = isfinite(float(x)) Base.isnan(x::AbstractStochasticFloat) = isnan(float(x)) From 937a10dd9a9c53647f49be5d22c36e6d46a273ee Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 28 Dec 2023 16:33:14 +0100 Subject: [PATCH 20/23] random sampler tests --- test/general_tests.jl | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/general_tests.jl b/test/general_tests.jl index 8fcaa32..e707b5f 100644 --- a/test/general_tests.jl +++ b/test/general_tests.jl @@ -80,6 +80,17 @@ end end end +@testset for T in (BFloat16sr, Float16sr, Float32sr, Float64sr) + @testset "Random" begin + for r in (rand,randn) + @test r(T) isa T + @test r(Complex{T}) isa Complex{T} + @test r(T,2,3,4) isa Array{T} + @test r(Complex{T},2,3,4) isa Array{Complex{T}} + end + end +end + @testset for T in (BFloat16sr, Float16sr, Float32sr, Float64sr) @testset "Nextfloat prevfloat" begin o = one(T) From 8bc9278622853f840e7339aa7d24110508bd29f7 Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 28 Dec 2023 16:43:30 +0100 Subject: [PATCH 21/23] add Random to Project.toml --- Project.toml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index 74fbd84..49d1eaa 100644 --- a/Project.toml +++ b/Project.toml @@ -1,22 +1,24 @@ name = "StochasticRounding" uuid = "3843c9a1-1f18-49ff-9d99-1b4c8a8e97ed" authors = ["Milan Kloewer and StochasticRounding.jl contributors"] -version = "0.8" +version = "0.8.0" [deps] BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" RandomNumbers = "e6cf234a-135c-5ec9-84dd-332b85af5143" [compat] BFloat16s = "0.1, 0.2, 0.3, 0.4" -RandomNumbers = "^1.4" DoubleFloats = "1" +Random = "1" +RandomNumbers = "^1.4" julia = "1" [extras] -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test","DifferentialEquations"] +test = ["Test", "DifferentialEquations"] From 9d274dc75354de491b89ae7118d5945d19bfc38f Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 28 Dec 2023 20:08:56 +0100 Subject: [PATCH 22/23] add maxintfloat(::BFloat16) until new BFloat16s version --- src/bfloat16.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/bfloat16.jl b/src/bfloat16.jl index 2cba2e5..372a48b 100644 --- a/src/bfloat16.jl +++ b/src/bfloat16.jl @@ -50,4 +50,6 @@ function Base.decompose(x::BFloat16)::NTuple{3,Int} end BFloat16s.BFloat16(x::BigFloat) = BFloat16(Float32(x)) -Base.BigFloat(x::BFloat16) = BigFloat(Float32(x)) \ No newline at end of file +Base.BigFloat(x::BFloat16) = BigFloat(Float32(x)) + +Base.maxintfloat(::Type{BFloat16}) = reinterpret(BFloat16,0x4380) \ No newline at end of file From 40d8758bfebe13aa08d6b3dd289d05a4cc81c19a Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 28 Dec 2023 20:24:33 +0100 Subject: [PATCH 23/23] add round ToZero till released with BFloat16s v0.5 --- src/bfloat16.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/bfloat16.jl b/src/bfloat16.jl index 372a48b..1de6ee8 100644 --- a/src/bfloat16.jl +++ b/src/bfloat16.jl @@ -52,4 +52,7 @@ end BFloat16s.BFloat16(x::BigFloat) = BFloat16(Float32(x)) Base.BigFloat(x::BFloat16) = BigFloat(Float32(x)) -Base.maxintfloat(::Type{BFloat16}) = reinterpret(BFloat16,0x4380) \ No newline at end of file +Base.maxintfloat(::Type{BFloat16}) = reinterpret(BFloat16,0x4380) + +Base.round(x::BFloat16, r::RoundingMode{:ToZero}) = BFloat16(trunc(Float32(x))) +Base.round(x::BFloat16, r::RoundingMode{:Down}) = BFloat16(floor(Float32(x))) \ No newline at end of file