Skip to content

Commit

Permalink
Merge pull request #67 from milankl/mk/subnormals
Browse files Browse the repository at this point in the history
Float16 stochastic round of subnormals
  • Loading branch information
milankl committed Jul 10, 2023
2 parents c801a4f + f8aef51 commit 8033db4
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 27 deletions.
47 changes: 38 additions & 9 deletions src/float16sr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,26 +55,55 @@ BFloat16(x::Float16sr) = BFloat16(Float16(x))
Float16sr(x::Integer) = Float16sr(Float32(x))
(::Type{T})(x::Float16sr) where {T<:Integer} = T(Float32(x))

const eps_F16 = prevfloat(Float32(nextfloat(zero(Float16))))
"""
rand_subnormal(rbits::UInt32) -> Float32
Create a random perturbation for the Float16 subnormals for
stochastic rounding of Float32 -> Float16.
This function samples uniformly from [-2.980232f-8,2.9802319f-8].
This function is algorithmically similar to randfloat from RandomNumbers.jl"""
function rand_subnormal(rbits::UInt32)
lz = leading_zeros(rbits) # count leading zeros for correct probabilities of exponent
e = ((101 - lz) % UInt32) << 23
e |= (rbits << 31) # use last bit for sign

# combine exponent with random mantissa
return reinterpret(Float32,e | (rbits & 0x007f_ffff))
end

const eps_F16 = prevfloat(Float32(nextfloat(zero(Float16))),1)
const floatmin_F16 = Float32(floatmin(Float16))
const oneF32 = reinterpret(Int32,one(Float32))

"""Stochastically round x::Float32 to Float16 with distance-proportional probabilities."""
# # old version
# function rand_subnormal(rbits::UInt32)
# return eps_F16*(reinterpret(Float32,oneF32 | (rbits >> 9))-1.5f0)
# end

"""
Float16_stochastic_round(x::Float32) -> Float16sr
Stochastically round `x` to Float16 with distance-proportional probabilities."""
function Float16_stochastic_round(x::Float32)
rbits = rand(Xor128[],UInt32) # create random bits

# subnormals are rounded with float-arithmetic for uniform stoch perturbation
abs(x) < floatmin_F16 && return Float16sr(x+eps_F16*(reinterpret(Float32,oneF32 | (rand(Xor128[],UInt32) >> 9))-1.5f0))

abs(x) < floatmin_F16 && return Float16sr(x+rand_subnormal(rbits))

# normals are stochastically rounded with integer arithmetic
ui = reinterpret(UInt32,x)
ui += (rbits & 0x0000_1fff) # add perturbation in [0,u)
ui &= 0xffff_e000 # round to zero
mask = 0x0000_1fff # only mantissa bit 11-23 (the non-Float16 ones)
ui += (rbits & mask) # add perturbation in [0,u)
ui &= ~mask # round to zero

# convert to Float16 to adjust exponent bits
return reinterpret(Float16sr,Float16(reinterpret(Float32,ui)))
# via conversion to Float16 to adjust exponent bits
return Float16sr(reinterpret(Float32,ui))
end

"""Chance that x::Float32 is round up when converted to Float16sr."""
"""
Float16_chance_roundup(x::Float32)
Chance that x::Float32 is round up when converted to Float16sr."""
function Float16_chance_roundup(x::Float32)
xround = Float32(Float16(x))
xround == x && return zero(Float32)
Expand Down
41 changes: 35 additions & 6 deletions src/float32sr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,24 +58,53 @@ Float32sr(x::Integer) = Float32sr(Float32(x))
Float32sr(x::BFloat16) = Float32sr(Float64(x))
BFloat16(x::Float32sr) = BFloat16(Float32(x))

"""
rand_subnormal(rbits::UInt64) -> Float64
Create a random perturbation for the Float16 subnormals for
stochastic rounding of Float32 -> Float16.
This function samples uniformly from [-7.0064923216240846e-46,7.006492321624084e-46].
This function is algorithmically similar to randfloat from RandomNumbers.jl"""
function rand_subnormal(rbits::UInt64)
lz = leading_zeros(rbits) # count leading zeros for probabilities of exponent
e = ((872 - lz) % UInt64) << 52
e |= (rbits << 63) # use last bit for sign

# combine exponent with random mantissa
return reinterpret(Float64,e | (rbits & Base.significand_mask(Float64)))
end

const eps_F32 = prevfloat(Float64(nextfloat(zero(Float32))))
const floatmin_F32 = Float64(floatmin(Float32))
const oneF64 = reinterpret(Int64,one(Float64))

"""Stochastically round x::Float64 to Float32 with distance-proportional probabilities."""
# old version
# function rand_subnormal(rbits::UInt64)
# return eps_F32*(reinterpret(Float64,oneF64 | (rbits >> 12))-1.5)
# end

"""
Float32_stochastic_round(x::Float64) -> Float32sr
Stochastically round x::Float64 to Float32 with distance-proportional probabilities."""
function Float32_stochastic_round(x::Float64)
rbits = rand(Xor128[],UInt64) # create random bits

# subnormals are rounded with float-arithmetic for uniform stoch perturbation
abs(x) < floatmin_F32 && return Float32sr(x+eps_F32*(reinterpret(Float64,oneF64 | (rbits >> 12))-1.5))
abs(x) < floatmin_F32 && return Float32sr(x+rand_subnormal(rbits))

# normals with integer arithmetic
ui = reinterpret(UInt64,x)
ui += (rbits & 0x0000_0000_1fff_ffff) # add stochastic perturbation in [0,u)
ui &= 0xffff_ffff_e000_0000 # round to zero
return reinterpret(Float32sr,Float32(reinterpret(Float64,ui)))
mask = 0x0000_0000_1fff_ffff
ui += (rbits & mask) # add stochastic perturbation in [0,u)
ui &= ~mask # round to zero
return Float32sr(reinterpret(Float64,ui))
end

"""Chance that x::Float64 is round up when converted to Float32sr."""
"""
Float32_chance_roundup(x::Float64)
Chance that `x` is round up when converted to Float32sr."""
function Float32_chance_roundup(x::Float64)
xround = Float64(Float32(x))
xround == x && return zero(Float64)
Expand Down
66 changes: 56 additions & 10 deletions test/float16sr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ end
@test o == prevfloat(nextfloat(o))
end

@testset "Odd floats are never round away" begin
@testset "Odd floats are never rounded away" begin
# chances are 2^-29 = 2e-9
# so technically one should use N = 100_000_000
N = 100_000
Expand Down Expand Up @@ -77,7 +77,11 @@ function test_chances_round(f32::Float32;N::Int=100_000)
p = Float16_chance_roundup(f32)

f16_round = Float16sr(f32)
if Float32(f16_round) <= f32

if Float32(f16_round) == f32
f16_roundup=f16_round
f16_rounddown=f16_round
elseif Float32(f16_round) < f32
f16_rounddown = f16_round
f16_roundup = nextfloat(f16_round)
else
Expand All @@ -101,19 +105,27 @@ end

@testset "Test for N(0,1)" begin
for x in randn(Float32,10_000)
Ndown,Nup,N,p = test_chances_round(x)
@test Ndown + Nup == N
@test isapprox(Ndown/N,1-p,atol=2e-2)
@test isapprox(Nup/N,p,atol=2e-2)
# exclude subnormals from testing, they are tested further down
if ~issubnormal(Float16(x))
Ndown,Nup,N,p = test_chances_round(x)
@test Ndown + Nup == N
Ndown + Nup != N && @info "Test failed for $x, $(bitstring(Float16(x)))"
@test isapprox(Ndown/N,1-p,atol=2e-2)
@test isapprox(Nup/N,p,atol=2e-2)
end
end
end

@testset "Test for U(0,1)" begin
for x in rand(Float32,10_000)
Ndown,Nup,N,p = test_chances_round(x)
@test Ndown + Nup == N
@test isapprox(Ndown/N,1-p,atol=2e-2)
@test isapprox(Nup/N,p,atol=2e-2)
# exclude subnormals from testing, they are tested further down
if ~issubnormal(Float16(x))
Ndown,Nup,N,p = test_chances_round(x)
@test Ndown + Nup == N
Ndown + Nup != N && @info "Test failed for $x, $(bitstring(Float16(x)))"
@test isapprox(Ndown/N,1-p,atol=2e-2)
@test isapprox(Nup/N,p,atol=2e-2)
end
end
end

Expand Down Expand Up @@ -157,15 +169,49 @@ end
for x in subnormals
Ndown,Nup,N,p = test_chances_round(x)
@test Ndown + Nup == N
Ndown + Nup != N && @info "Test failed for $x, $(bitstring(Float16(x)))"
@test isapprox(Ndown/N,1-p,atol=2e-2)
@test isapprox(Nup/N,p,atol=2e-2)
end

for x in -subnormals
Ndown,Nup,N,p = test_chances_round(x)
@test Ndown + Nup == N
Ndown + Nup != N && @info "Test failed for $x, $(bitstring(Float16(x)))"
@test isapprox(Ndown/N,1-p,atol=2e-2)
@test isapprox(Nup/N,p,atol=2e-2)
end
end

# USED TO TEST SOME SUBNORMALS MANUALLY
# const ϵ = prevfloat(Float32(nextfloat(zero(Float16))),1)

# function g(x::Float32,r::Float32)
# # x + ϵ*(r-1.5f0)
# x + randfloat(reinterpret(UInt32,r))
# end

# function randfloat(ui::UInt32)

# lz = leading_zeros(ui)
# e = ((101 - lz) % UInt32) << 23
# e |= (ui << 31)

# # combine exponent and significand
# return reinterpret(Float32,e | (ui & 0x007f_ffff))
# end


# function h(x::Float32)
# # deterministically rounded correct value in uint16
# c = Int(reinterpret(UInt16,Float16(x)))

# v = zeros(Int16,2^23)
# r = 1f0

# for i in 0:2^23-1
# v[i+1] = Int(reinterpret(UInt16,Float16(g(x,r)))) - c
# r = nextfloat(r)
# end
# return v
# end
7 changes: 5 additions & 2 deletions test/float32sr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,10 @@ function test_chances_round(f64::Float64;N::Int=100_000)
p = Float32_chance_roundup(f64)

f32_round = Float32sr(f64)
if Float64(f32_round) <= f64
if Float64(f32_round) == f64
f32_roundup = f32_round
f32_rounddown = f32_round
elseif Float64(f32_round) < f64
f32_rounddown = f32_round
f32_roundup = nextfloat(f32_round)
else
Expand Down Expand Up @@ -173,7 +176,7 @@ end
floatminF32 = reinterpret(UInt64,Float64(floatmin(Float32)))
minposF32 = reinterpret(UInt64,Float64(nextfloat(zero(Float32))))

N = 100
N = 5000
subnormals = reinterpret.(Float64,rand(minposF32:floatminF32,N))
for x in subnormals
@test test_chances_round(x)
Expand Down

0 comments on commit 8033db4

Please sign in to comment.