Skip to content

Commit

Permalink
Merge pull request #61 from milankl/mk/float64sr
Browse files Browse the repository at this point in the history
Float64sr via Double64
  • Loading branch information
milankl committed Jul 11, 2023
2 parents 97bdf0d + 1e7fae5 commit cd05692
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 5 deletions.
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ version = "0.6.3"

[deps]
BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78"
RandomNumbers = "e6cf234a-135c-5ec9-84dd-332b85af5143"

[compat]
BFloat16s = "0.1, 0.2"
RandomNumbers = "1.4,1.5"
RandomNumbers = "^1.4"
DoubleFloats = "1"
julia = "1"

[extras]
Expand Down
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
[![CI](https://github.com/milankl/StochasticRounding.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/milankl/StochasticRounding.jl/actions/workflows/CI.yml)
Stochastic rounding for floating-point arithmetic.

This package exports `Float32sr`,`Float16sr`, and `BFloat16sr`, three number formats that behave
This package exports `Float64sr`, Float32sr`,`Float16sr`, and `BFloat16sr`, three number formats that behave
like their deterministic counterparts but with stochastic rounding that is proportional to the
distance of the next representable numbers and therefore
[exact in expectation](https://en.wikipedia.org/wiki/Rounding#Stochastic_rounding)
Expand All @@ -22,6 +22,12 @@ slower, but e.g. Float32+stochastic rounding is only about 2x slower than Float6
a random number generator from the [Xorshift family](https://en.wikipedia.org/wiki/Xorshift), is used through the
[RandomNumbers.jl](https://github.com/sunoru/RandomNumbers.jl) package, due to its speed and statistical properties.

Ever format of `Float64sr`, Float32sr`,`Float16sr`, and `BFloat16sr` uses a higher precision format
to obtain the "exact" arithmetic result which is then stochastically rounded to the respective
lower precision format. `Float16sr` and `BFloat16sr` use `Float32` for this,
`Float32sr` uses `Float64`, and `Float64sr` uses `Double64` from
[DoubleFloats.jl](https://github.com/JuliaMath/DoubleFloats.jl)

You are welcome to raise [issues](https://github.com/milankl/StochasticRounding.jl/issues),
ask questions or suggest any changes or new features.

Expand Down Expand Up @@ -108,9 +114,10 @@ And similarly for the other number types. Then with Julia 1.6 on an Intel(R) Cor
| rounding mode | Float64 | Float32 | Float16 | BFloat16 |
| --------------------- | ---------- | ---------- | --------- | ----------- |
| round to nearest | 1132 μs | 452 μs | 1588 μs | 315 μs |
| stochastic rounding | n/a | 2650 μs | 3310 μs | 1850 μs |
| stochastic rounding | 11,368 μs | 2650 μs | 3310 μs | 1850 μs |

Stochastic rounding imposes an about x5 performance decrease for Float32 and BFloat16, but only x2 for Float16.
Stochastic rounding imposes an about x5 performance decrease for Float32 and BFloat16, but only x2 for Float16,
however, 10x for Float64 due to the use of Double64.
For more complicated benchmarks the performance decrease is usually within x10.
About 50% of the time is spend on the random number generation with Xoroshiro128+.

Expand Down
10 changes: 9 additions & 1 deletion src/StochasticRounding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,20 @@ module StochasticRounding
Float16sr,Float16_stochastic_round, # Float16 + SR
Float16_chance_roundup,NaN16sr,Inf16sr,
Float32sr,Float32_stochastic_round, # Float32 + SR
Float32_chance_roundup,NaN32sr,Inf32sr
Float32_chance_roundup,NaN32sr,Inf32sr,
Float64sr,Float64_stochastic_round, # Float64 + SR
Float64_chance_roundup,
NaNsr,Infsr

# use BFloat16 from BFloat16s.jl
import BFloat16s: BFloat16

# faster random number generator
import RandomNumbers.Xorshifts.Xoroshiro128Plus
const Xor128 = Ref{Xoroshiro128Plus}(Xoroshiro128Plus())

import DoubleFloats: DoubleFloats, Double64

"""Reseed the PRNG randomly by recalling."""
function __init__()
Xor128[] = Xoroshiro128Plus()
Expand All @@ -27,5 +33,7 @@ module StochasticRounding
include("bfloat16sr.jl")
include("float16sr.jl")
include("float32sr.jl")
include("float64sr.jl")
include("conversions.jl")

end
129 changes: 129 additions & 0 deletions src/float64sr.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""The Float64 + stochastic rounding type."""
primitive type Float64sr <: AbstractFloat 64 end

# basic properties
Base.sign_mask(::Type{Float64sr}) = Base.sign_mask(Float64)
Base.exponent_mask(::Type{Float64sr}) = Base.exponent_mask(Float64)
Base.significand_mask(::Type{Float64sr}) = Base.significand_mask(Float64)
Base.precision(::Type{Float64sr}) = Base.precision(Float64)

Base.one(::Type{Float64sr}) = reinterpret(Float64sr,one(Float64))
Base.zero(::Type{Float64sr}) = reinterpret(Float64sr,zero(Float64))
Base.one(::Float64sr) = one(Float64sr)
Base.zero(::Float64sr) = zero(Float64sr)

Base.typemin(::Type{Float64sr}) = Float64sr(typemin(Float64))
Base.typemax(::Type{Float64sr}) = Float64sr(typemax(Float64))
Base.floatmin(::Type{Float64sr}) = Float64sr(floatmin(Float64))
Base.floatmax(::Type{Float64sr}) = Float64sr(floatmax(Float64))
Base.maxintfloat(::Type{Float64sr}) = Float64sr(maxintfloat(Float64))

Base.typemin(::Float64sr) = typemin(Float64sr)
Base.typemax(::Float64sr) = typemax(Float64sr)
Base.floatmin(::Float64sr) = floatmin(Float64sr)
Base.floatmax(::Float64sr) = floatmax(Float64sr)

Base.eps(::Type{Float64sr}) = Float64sr(eps(Float64))
Base.eps(x::Float64sr) = Float64sr(eps(Float64(x)))

const Infsr = reinterpret(Float64sr, Inf)
const NaNsr = reinterpret(Float64sr, NaN)

# basic operations
Base.abs(x::Float64sr) = reinterpret(Float64sr, abs(reinterpret(Float64,x)))
Base.isnan(x::Float64sr) = isnan(reinterpret(Float64,x))
Base.isfinite(x::Float64sr) = isfinite(reinterpret(Float64,x))

Base.uinttype(::Type{Float64sr}) = UInt64
Base.nextfloat(x::Float64sr) = Float64sr(nextfloat(Float64(x)))
Base.prevfloat(x::Float64sr) = Float64sr(prevfloat(Float64(x)))

Base.:(-)(x::Float64sr) = reinterpret(Float64sr, reinterpret(UInt64, x) Base.sign_mask(Float64sr))

# conversions
Base.Float64(x::Float64sr) = reinterpret(Float64,x)
Float64sr(x::Float64) = reinterpret(Float64sr,x)
Float64sr(x::Float16) = Float64sr(Float64(x))
Float64sr(x::Float32) = Float64sr(Float64(x))
Base.Float16(x::Float64sr) = Float16(Float64(x))
Base.Float32(x::Float64sr) = Float32(Float64(x))
Float64sr(x::Irrational) = Float64sr(Float64(x))

DoubleFloats.Double64(x::Float64sr) = Double64(Float64(x))
Float64sr(x::Double64) = Float64sr(Float64(x))

Float64sr(x::Integer) = Float64sr(Float64(x))
(::Type{T})(x::Float64sr) where {T<:Integer} = T(Float64(x))

"""Stochastically round x::Double64 to Float64 with distance-proportional probabilities."""
function Float64_stochastic_round(x::Double64)
rbits = rand(Xor128[],UInt64) # create random bits

# create [1,2)-1.5 = [-0.5,0.5)
r = reinterpret(Float64,reinterpret(UInt64,one(Float64)) + (rbits >> 12)) - 1.5
a = x.hi # the more significant float64 in x
b = x.lo # the less significant float64 in x
u = eps(a) # = ulp

return Float64sr(a + (b+u*r)) # (b+u*r) first as a+b would be rounded to a
end

function Float64_chance_roundup(x::Real)
xround = Float64(x)
xround == x && return zero(Float64)
xround_down, xround_up = xround < x ? (xround,nextfloat(xround)) :
(prevfloat(xround),xround)

return Float64((x-xround_down)/(xround_up-xround_down))
end

Base.promote_rule(::Type{Float16}, ::Type{Float64sr}) = Float64sr
Base.promote_rule(::Type{Float32}, ::Type{Float64sr}) = Float64sr
Base.promote_rule(::Type{Float64}, ::Type{Float64sr}) = Float64sr

for t in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128)
@eval Base.promote_rule(::Type{Float64sr}, ::Type{$t}) = Float64sr
end

# Rounding
Base.round(x::Float64sr, r::RoundingMode{:ToZero}) = Float64sr(round(Float64(x), r))
Base.round(x::Float64sr, r::RoundingMode{:Down}) = Float64sr(round(Float64(x), r))
Base.round(x::Float64sr, r::RoundingMode{:Up}) = Float64sr(round(Float64(x), r))
Base.round(x::Float64sr, r::RoundingMode{:Nearest}) = Float64sr(round(Float64(x), r))

# Comparison
for op in (:(==), :<, :<=, :isless)
@eval Base.$op(a::Float64sr, b::Float64sr) = ($op)(Float64(a), Float64(b))
end

# Arithmetic
for f in (:+, :-, :*, :/, :^, :mod)
@eval Base.$f(x::Float64sr, y::Float64sr) = Float64_stochastic_round($(f)(Double64(x), Double64(y)))
end

for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh,
:atanh,:exp,:exp2,:exp10,:expm1,:log,:log2,:log10,:sqrt,:cbrt,:log1p)
@eval begin
Base.$func(a::Float64sr) = Float64_stochastic_round($func(Double64(a)))
end
end

for func in (:atan,:hypot)
@eval begin
Base.$func(a::Float64sr,b::Float64sr) = Float64_stochastic_round($func(Double64(a),Double64(b)))
end
end


# Showing
Base.show(io::IO, x::Float64sr) = show(io,Float64(x))
Base.bitstring(x::Float64sr) = bitstring(reinterpret(Float64,x))

function Base.bitstring(x::Float64sr,mode::Symbol)
if mode == :split # split into sign, exponent, signficand
s = bitstring(x)
return "$(s[1]) $(s[2:12]) $(s[13:end])"
else
return bitstring(x)
end
end
46 changes: 46 additions & 0 deletions test/float64sr.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
using DoubleFloats

function test_chances_round(f128::Double64;N::Int=100_000)
p = Float64_chance_roundup(f128)

f64_round = Float64sr(f128)
if Double64(f64_round) == f128
f64_roundup = f64_round
f64_rounddown = f64_round
elseif Double64(f64_round) < f128
f64_rounddown = f64_round
f64_roundup = nextfloat(f64_round)
else
f64_roundup = f64_round
f64_rounddown = prevfloat(f64_round)
end

Ndown = 0
Nup = 0
for _ in 1:N
f64 = Float64_stochastic_round(f128)
if f64 == f64_rounddown
Ndown += 1
elseif f64 == f64_roundup
Nup += 1
end
end

test1 = Ndown + Nup == N
test2 = isapprox(Ndown/N,1-p,atol=1e-2)
test3 = isapprox(Nup/N,p,atol=1e-2)

return test1 && test2 && test3
end

@testset "Test for N(0,1)" begin
for x in randn(Double64,10_000)
@test test_chances_round(x)
end
end

@testset "Test for U(0,1)" begin
for x in rand(Double64,10_000)
@test test_chances_round(x)
end
end

0 comments on commit cd05692

Please sign in to comment.