Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make new conversion default #61

Merged
merged 2 commits into from
Oct 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/SoftPosit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module SoftPosit
Posit8_2, Posit16_2, Posit24_2,
notareal, minusone,
AbstractQuire, Quire8, Quire16, Quire32, fms,
Posit16_new, Float32_new
Posit16_old, Float32_old

import Base: Float64, Float32, Float16, Int32, Int64,
UInt8, UInt16, UInt32,
Expand Down
60 changes: 33 additions & 27 deletions src/conversionFloatToPosit.jl
Original file line number Diff line number Diff line change
@@ -1,39 +1,16 @@
# from Float64 to Posit8,16,32
Posit8(x::Float64) = ccall((:convertDoubleToP8, SoftPositPath), Posit8, (Float64,),x)
Posit16(x::Float64) = ccall((:convertDoubleToP16, SoftPositPath), Posit16, (Float64,),x)
Posit16_old(x::Float64) = ccall((:convertDoubleToP16, SoftPositPath), Posit16, (Float64,),x)
Posit32(x::Float64) = ccall((:convertDoubleToP32, SoftPositPath), Posit32, (Float64,),x)

# from Float32/16 to Posit8,16,32
Posit8(x::T where {T <: Float16or32}) = Posit8(Float64(x))
Posit16(x::T where {T <: Float16or32}) = Posit16(Float64(x))
Posit32(x::T where {T <: Float16or32}) = Posit32(Float64(x))

# from Float64 to Posit_1 (convertDoubleToPX1 currently not available)
# Posit8_1(x::Float64) = ccall((:convertDoubleToPX1, SoftPositPath), Posit8_1, (Float64,Int64),x,8)
# Posit16_1(x::Float64) = ccall((:convertDoubleToPX1, SoftPositPath), Posit16_1, (Float64,Int64),x,16)
# Posit24_1(x::Float64) = ccall((:convertDoubleToPX1, SoftPositPath), Posit24_1, (Float64,Int64),x,24)
Posit16(x::Float16) = Posit16(Float32(x))
Posit16(x::Float64) = Posit16(Float32(x))

# use detour
Posit8_1(x::Float64) = Posit8_1(Posit32(x))
Posit16_1(x::Float64) = Posit16_1(Posit32(x))
Posit24_1(x::Float64) = Posit24_1(Posit32(x))

# from Float32/16 to Posit_1
Posit8_1(x::T where {T <: Float16or32}) = Posit8_1(Float64(x))
Posit16_1(x::T where {T <: Float16or32}) = Posit16_1(Float64(x))
Posit24_1(x::T where {T <: Float16or32}) = Posit24_1(Float64(x))

# from Float64 to Posit_2
Posit8_2(x::Float64) = ccall((:convertDoubleToPX2, SoftPositPath), Posit8_2, (Float64,Int64),x,8)
Posit16_2(x::Float64) = ccall((:convertDoubleToPX2, SoftPositPath), Posit16_2, (Float64,Int64),x,16)
Posit24_2(x::Float64) = ccall((:convertDoubleToPX2, SoftPositPath), Posit24_2, (Float64,Int64),x,24)

# from Float32/16 to Posit_2
Posit8_2(x::T where {T <: Float16or32}) = Posit8_2(Float64(x))
Posit16_2(x::T where {T <: Float16or32}) = Posit16_2(Float64(x))
Posit24_2(x::T where {T <: Float16or32}) = Posit24_2(Float64(x))

function Posit16_new(x::Float32)
function Posit16(x::Float32)
ui = reinterpret(UInt32,x)

# REGIME AND EXPONENT BITS
Expand Down Expand Up @@ -64,3 +41,32 @@ function Posit16_new(x::Float32)
p16 = signbit(x) ? -p16 : p16
return reinterpret(Posit16,p16)
end

# legacy
Posit16_old(x::Float32) = Posit16_old(Float64(x))
Posit16_old(x::Float16) = Posit16_old(Float64(x))

# from Float64 to Posit_1 (convertDoubleToPX1 currently not available)
# Posit8_1(x::Float64) = ccall((:convertDoubleToPX1, SoftPositPath), Posit8_1, (Float64,Int64),x,8)
# Posit16_1(x::Float64) = ccall((:convertDoubleToPX1, SoftPositPath), Posit16_1, (Float64,Int64),x,16)
# Posit24_1(x::Float64) = ccall((:convertDoubleToPX1, SoftPositPath), Posit24_1, (Float64,Int64),x,24)

# use detour
Posit8_1(x::Float64) = Posit8_1(Posit32(x))
Posit16_1(x::Float64) = Posit16_1(Posit32(x))
Posit24_1(x::Float64) = Posit24_1(Posit32(x))

# from Float32/16 to Posit_1
Posit8_1(x::T where {T <: Float16or32}) = Posit8_1(Float64(x))
Posit16_1(x::T where {T <: Float16or32}) = Posit16_1(Float64(x))
Posit24_1(x::T where {T <: Float16or32}) = Posit24_1(Float64(x))

# from Float64 to Posit_2
Posit8_2(x::Float64) = ccall((:convertDoubleToPX2, SoftPositPath), Posit8_2, (Float64,Int64),x,8)
Posit16_2(x::Float64) = ccall((:convertDoubleToPX2, SoftPositPath), Posit16_2, (Float64,Int64),x,16)
Posit24_2(x::Float64) = ccall((:convertDoubleToPX2, SoftPositPath), Posit24_2, (Float64,Int64),x,24)

# from Float32/16 to Posit_2
Posit8_2(x::T where {T <: Float16or32}) = Posit8_2(Float64(x))
Posit16_2(x::T where {T <: Float16or32}) = Posit16_2(Float64(x))
Posit24_2(x::T where {T <: Float16or32}) = Posit24_2(Float64(x))
10 changes: 7 additions & 3 deletions src/conversionPositToFloat.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# from Posit to Float64,32,16
Float64(x::Posit8) = ccall((:convertP8ToDouble, SoftPositPath), Float64, (Posit8,),x)
Float64(x::Posit16) = ccall((:convertP16ToDouble, SoftPositPath), Float64, (Posit16,),x)
Float64(x::Posit32) = ccall((:convertP32ToDouble, SoftPositPath), Float64, (Posit32,),x)

# conversion from PX2 to Float64,32,16
Expand All @@ -14,10 +13,12 @@ Float64(x::Posit16_1) = ccall((:convertPX1ToDouble, SoftPositPath), Float64, (Po
Float64(x::Posit24_1) = ccall((:convertPX1ToDouble, SoftPositPath), Float64, (Posit24_1,),x)

# conversion to Float32,16
Float32(x::AbstractPosit) = Float32(Float64(x))
Float16(x::AbstractPosit) = Float16(Float64(x))
Float32(x::AbstractPosit) = Float32(Float64(x))
Float16(x::Posit16) = Float16(Float32(x))
Float64(x::Posit16) = Float64(Float32(x))

function Float32_new(x::Posit16)
function Float32(x::Posit16)
ui = reinterpret(UInt16,x)

signbitx = signbit(x) # sign of number
Expand All @@ -44,3 +45,6 @@ function Float32_new(x::Posit16)
return reinterpret(Float32,f32)
end

# legacy
Float32_old(x::AbstractPosit) = Float32(Float64_old(x))
Float64_old(x::Posit16) = ccall((:convertP16ToDouble, SoftPositPath), Float64, (Posit16,),x)
64 changes: 32 additions & 32 deletions test/test_conversions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ end

@testset "0,±1,±2,±4" begin
for f in Float32[-4,-2,-1,0,1,2,4]
@test Posit16(f) == Posit16_new(f)
@test f == Float32_new(Posit16_new(f))
@test Posit16(f) == Posit16_old(f)
@test f == Float32(Posit16(f))
end
end


@testset "0,±1/2,±1/4,±1/8" begin
for f in Float32[-1/8,-1/4,-1/2,0,1/2,1/4,1/8]
@test Posit16(f) == Posit16_new(f)
@test f == Float32_new(Posit16_new(f))
@test Posit16(f) == Posit16_old(f)
@test f == Float32(Posit16(f))
end
end

Expand All @@ -63,73 +63,73 @@ end
0.125,
0.25,
0.5]
@test Posit16(f) == Posit16_new(f)
@test Posit16(-f) == Posit16_new(-f)
@test Posit16(1/f) == Posit16_new(1/f)
@test Posit16(-1/f) == Posit16_new(-1/f)

@test f == Float32_new(Posit16_new(f))
@test -f == Float32_new(Posit16_new(-f))
@test 1/f == Float32_new(Posit16_new(1/f))
@test -1/f == Float32_new(Posit16_new(-1/f))
@test Posit16(f) == Posit16_old(f)
@test Posit16(-f) == Posit16_old(-f)
@test Posit16(1/f) == Posit16_old(1/f)
@test Posit16(-1/f) == Posit16_old(-1/f)

@test f == Float32(Posit16(f))
@test -f == Float32(Posit16(-f))
@test 1/f == Float32(Posit16(1/f))
@test -1/f == Float32(Posit16(-1/f))
end
end

@testset "NaN, Inf" begin
for f in [NaN32,Inf32,-Inf32]
@test Posit16(f) == Posit16_new(f)
@test isnan(Float32_new(Posit16_new(f)))
@test Posit16(f) == Posit16_old(f)
@test isnan(Float32(Posit16(f)))
end

for _ in 1:10
# create various NaNs
f = reinterpret(Float32,reinterpret(UInt32,NaN32)+(rand(UInt32)>>10))
@test Posit16(f) == Posit16_new(f)
@test isnan(Posit16_new(f))
@test Posit16(f) == Posit16_old(f)
@test isnan(Posit16(f))
end
end

@testset "U(0,1)" begin
for f in rand(Float32,100)
@test Posit16(f) == Posit16_new(f)
@test Float32(Posit16(f)) == Float32_new(Posit16_new(f))
@test Posit16(f) == Posit16_old(f)
@test Float32(Posit16(f)) == Float32_old(Posit16_old(f))

# idempotence
f32 = Float32(Posit16(f))
@test f32 == Float32_new(Posit16_new(f32))
@test f32 == Float32(Posit16(f32))
end
end

@testset "U(1,21)" begin
for f in 1 .+ 20*rand(Float32,100)
@test Posit16(-f) == Posit16_new(-f)
@test Posit16(f) == Posit16_new(f)
@test Float32(Posit16(f)) == Float32_new(Posit16_new(f))
@test Posit16(-f) == Posit16_old(-f)
@test Posit16(f) == Posit16_old(f)
@test Float32(Posit16(f)) == Float32_old(Posit16_old(f))

# idempotence
f32 = Float32(Posit16(f))
@test f32 == Float32_new(Posit16_new(f32))
@test f32 == Float32_old(Posit16_old(f32))
end
end

@testset "N(0,1)" begin
for f in randn(Float32,100)
@test Posit16(f) == Posit16_new(f)
@test Float32(Posit16(f)) == Float32_new(Posit16_new(f))
@test Posit16(f) == Posit16_old(f)
@test Float32(Posit16(f)) == Float32_old(Posit16_old(f))

# idempotence
f32 = Float32(Posit16(f))
@test f32 == Float32_new(Posit16_new(f32))
@test f32 == Float32(Posit16(f32))
end
end

@testset "Overflow and underflow" begin
for f in Float32[2f8,3f8,4f8,5f8,1f9,1f10,1f15,1f20,1f25,1f30,1f35,floatmax(Float32)/4]
@test floatmax(Posit16) == Posit16_new(f)
@test -floatmax(Posit16) == Posit16_new(-f)
@test floatmax(Posit16) == Posit16(f)
@test -floatmax(Posit16) == Posit16(-f)
end

@test isnan(Posit16_new(NaN32))
@test isnan(Posit16_new(Inf32))
@test isnan(Posit16_new(-Inf32))
@test isnan(Posit16(NaN32))
@test isnan(Posit16(Inf32))
@test isnan(Posit16(-Inf32))
end