Skip to content

Commit

Permalink
Merge e57ed3c into b089af5
Browse files Browse the repository at this point in the history
  • Loading branch information
chriselrod committed Dec 26, 2018
2 parents b089af5 + e57ed3c commit 6694583
Show file tree
Hide file tree
Showing 10 changed files with 622 additions and 459 deletions.
9 changes: 6 additions & 3 deletions Project.toml
Expand Up @@ -2,11 +2,14 @@ name = "SLEEF"
uuid = "3e6341c9-01f6-5312-b33f-f8894a2e817a"
license = "MIT"
repo = "https://github.com/musm/SLEEF.jl.git"
version = "v0.5.1"
version = "0.5.1"

[deps]
SIMD = "fdea26ae-647d-5447-a871-4b548cad5224"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test","Printf"]
test = ["Test", "Printf"]
26 changes: 26 additions & 0 deletions src/SLEEF.jl
Expand Up @@ -8,6 +8,32 @@ module SLEEF

using Base.Math: uinttype, @horner, exponent_bias, exponent_mask, significand_bits, IEEEFloat, exponent_raw_max

using SIMD
using SIMD: vifelse

const FloatType64 = Union{Float64,Vec{<:Any,Float64}}
const FloatType32 = Union{Float32,Vec{<:Any,Float32}}
const FloatType = Union{FloatType64,FloatType32}
const IntegerType64 = Union{Int64,Vec{<:Any,Int64}}
const IntegerType32 = Union{Int32,Vec{<:Any,Int32}}
const IntegerType = Union{IntegerType64,IntegerType32}

fpinttype(::Type{Float64}) = Int == Int32 ? Int32 : Int64
fpinttype(::Type{Float32}) = Int32
function fpinttype(::Type{Vec{N,Float64}}) where {N}
Int == Int32 ? Vec{N,Int32} : Vec{N,Int64}
end
function fpinttype(::Type{Vec{N,Float32}}) where {N}
Vec{N,Int32}
end

@generated function Base.unsafe_trunc(::Type{I}, x::Vec{N,T}) where {N,T,I}
quote
$(Expr(:meta,:inline))
Vec{$N,$I}($(Expr(:tuple, [:(Core.VecElement(unsafe_trunc($I, x[$n]))) for n 1:N]...)))
end
end

## constants

const MLN2 = 6.931471805599453094172321214581765680755001343602552541206800094933936219696955e-01 # log(2)
Expand Down
111 changes: 62 additions & 49 deletions src/double.jl
@@ -1,18 +1,27 @@
import Base: -, <, copysign, flipsign, convert

struct Double{T<:IEEEFloat} <: Number
const vIEEEFloat = Union{IEEEFloat,Vec{<:Any,<:IEEEFloat}}

struct Double{T<:vIEEEFloat} <: Number
hi::T
lo::T
end
Double(x::T) where {T<:IEEEFloat} = Double(x, zero(T))
Double(x::T) where {T<:vIEEEFloat} = Double(x, zero(T))

(::Type{T})(x::Double{T}) where {T<:IEEEFloat} = x.hi + x.lo
(::Type{T})(x::Double{T}) where {T<:vIEEEFloat} = x.hi + x.lo


@inline trunclo(x::Float64) = reinterpret(Float64, reinterpret(UInt64, x) & 0xffff_ffff_f800_0000) # clear lower 27 bits (leave upper 26 bits)
@inline trunclo(x::Float32) = reinterpret(Float32, reinterpret(UInt32, x) & 0xffff_f000) # clear lowest 12 bits (leave upper 12 bits)

@inline function splitprec(x::IEEEFloat)
@inline function trunclo(x::Vec{N,Float64}) where {N}
reinterpret(Vec{N,Float64}, reinterpret(Vec{N,UInt64}, x) & 0xffff_ffff_f800_0000) # clear lower 27 bits (leave upper 26 bits)
end
@inline function trunclo(x::Vec{N,Float32}) where {N}
reinterpret(Vec{N,Float32}, reinterpret(Vec{N,UInt32}, x) & 0xffff_f000) # clear lowest 12 bits (leave upper 12 bits)
end

@inline function splitprec(x::vIEEEFloat)
hx = trunclo(x)
hx, x - hx
end
Expand All @@ -22,230 +31,234 @@ end
Double(r, (x.hi - r) + x.lo)
end

@inline flipsign(x::Double{T}, y::T) where {T<:IEEEFloat} = Double(flipsign(x.hi, y), flipsign(x.lo, y))
@inline flipsign(x::Double{<:vIEEEFloat}, y::vIEEEFloat) = Double(flipsign(x.hi, y), flipsign(x.lo, y))

@inline scale(x::Double{T}, s::T) where {T<:IEEEFloat} = Double(s * x.hi, s * x.lo)
@inline scale(x::Double{<:vIEEEFloat}, s::vIEEEFloat) = Double(s * x.hi, s * x.lo)


@inline (-)(x::Double{T}) where {T<:IEEEFloat} = Double(-x.hi, -x.lo)
@inline (-)(x::Double{T}) where {T<:vIEEEFloat} = Double(-x.hi, -x.lo)

@inline function (<)(x::Double{T}, y::Double{T}) where {T<:IEEEFloat}
@inline function (<)(x::Double{<:vIEEEFloat}, y::Double{<:vIEEEFloat})
x.hi < y.hi
end

@inline function (<)(x::Double{T}, y::Number) where {T<:IEEEFloat}
@inline function (<)(x::Double{<:vIEEEFloat}, y::Union{Number,Vec})
x.hi < y
end

@inline function (<)(x::Number, y::Double{T}) where {T<:IEEEFloat}
@inline function (<)(x::Union{Number,Vec}, y::Double{<:vIEEEFloat})
x < y.hi
end

# quick-two-sum x+y
@inline function dadd(x::T, y::T) where {T<:IEEEFloat} #WARNING |x| >= |y|
@inline function dadd(x::vIEEEFloat, y::vIEEEFloat) #WARNING |x| >= |y|
s = x + y
Double(s, (x - s) + y)
end

@inline function dadd(x::T, y::Double{T}) where {T<:IEEEFloat} #WARNING |x| >= |y|
@inline function dadd(x::vIEEEFloat, y::Double{<:vIEEEFloat}) #WARNING |x| >= |y|
s = x + y.hi
Double(s, (x - s) + y.hi + y.lo)
end

@inline function dadd(x::Double{T}, y::T) where {T<:IEEEFloat} #WARNING |x| >= |y|
@inline function dadd(x::Double{<:vIEEEFloat}, y::vIEEEFloat) #WARNING |x| >= |y|
s = x.hi + y
Double(s, (x.hi - s) + y + x.lo)
end

@inline function dadd(x::Double{T}, y::Double{T}) where {T<:IEEEFloat} #WARNING |x| >= |y|
@inline function dadd(x::Double{<:vIEEEFloat}, y::Double{<:vIEEEFloat}) #WARNING |x| >= |y|
s = x.hi + y.hi
Double(s, (x.hi - s) + y.hi + y.lo + x.lo)
end

@inline function dsub(x::Double{T}, y::Double{T}) where {T<:IEEEFloat} #WARNING |x| >= |y|
@inline function dsub(x::Double{<:vIEEEFloat}, y::Double{<:vIEEEFloat}) #WARNING |x| >= |y|
s = x.hi - y.hi
Double(s, (x.hi - s) - y.hi - y.lo + x.lo)
end

@inline function dsub(x::Double{T}, y::T) where {T<:IEEEFloat} #WARNING |x| >= |y|
@inline function dsub(x::Double{<:vIEEEFloat}, y::vIEEEFloat) #WARNING |x| >= |y|
s = x.hi - y
Double(s, (x.hi - s) - y + x.lo)
end

@inline function dsub(x::T, y::Double{T}) where {T<:IEEEFloat} #WARNING |x| >= |y|
@inline function dsub(x::vIEEEFloat, y::Double{<:vIEEEFloat}) #WARNING |x| >= |y|
s = x - y.hi
Double(s, (x - s) - y.hi - y.lo)
end

@inline function dsub(x::T, y::T) where {T<:IEEEFloat} #WARNING |x| >= |y|
@inline function dsub(x::vIEEEFloat, y::vIEEEFloat) #WARNING |x| >= |y|
s = x - y
Double(s, (x - s) - y)
end


# two-sum x+y NO BRANCH
@inline function dadd2(x::T, y::T) where {T<:IEEEFloat}
@inline function dadd2(x::vIEEEFloat, y::vIEEEFloat)
s = x + y
v = s - x
Double(s, (x - (s - v)) + (y - v))
end

@inline function dadd2(x::T, y::Double{T}) where {T<:IEEEFloat}
@inline function dadd2(x::vIEEEFloat, y::Double{<:vIEEEFloat})
s = x + y.hi
v = s - x
Double(s, (x - (s - v)) + (y.hi - v) + y.lo)
end

@inline dadd2(x::Double{T}, y::T) where {T<:IEEEFloat} = dadd2(y, x)
@inline dadd2(x::Double{<:vIEEEFloat}, y::vIEEEFloat) = dadd2(y, x)

@inline function dadd2(x::Double{T}, y::Double{T}) where {T<:IEEEFloat}
@inline function dadd2(x::Double{<:vIEEEFloat}, y::Double{<:vIEEEFloat}) where {T<:vIEEEFloat}
s = x.hi + y.hi
v = s - x.hi
Double(s, (x.hi - (s - v)) + (y.hi - v) + x.lo + y.lo)
end

@inline function dsub2(x::T, y::T) where {T<:IEEEFloat}
@inline function dsub2(x::vIEEEFloat, y::vIEEEFloat)
s = x - y
v = s - x
Double(s, (x - (s - v)) + (-y - v))
end

@inline function dsub2(x::T, y::Double{T}) where {T<:IEEEFloat}
@inline function dsub2(x::vIEEEFloat, y::Double{<:vIEEEFloat})
s = x - y.hi
v = s - x
Double(s, (x - (s - v)) + (-y.hi - v) - y.lo)
end

@inline function dsub2(x::Double{T}, y::T) where {T<:IEEEFloat}
@inline function dsub2(x::Double{<:vIEEEFloat}, y::vIEEEFloat)
s = x.hi - y
v = s - x.hi
Double(s, (x.hi - (s - v)) + (-y - v) + x.lo)
end

@inline function dsub2(x::Double{T}, y::Double{T}) where {T<:IEEEFloat}
@inline function dsub2(x::Double{<:vIEEEFloat}, y::Double{<:vIEEEFloat})
s = x.hi - y.hi
v = s - x.hi
Double(s, (x.hi - (s - v)) + (-y.hi - v) + x.lo - y.lo)
end


@inline (::Type{Vec{N,T}})(x::Vec{N,T}) where {N,T} = x
@inline function SIMD.vifelse(b::Vec{N,Bool}, x::Double{T1}, y::Double{T2}) where {N,T<:Union{Float32,Float64},T1<:Union{T,Vec{N,T}},T2<:Union{T,Vec{N,T}}}
V = Vec{N,T}
Double(vifelse(b, V(x.hi), V(y.hi)), vifelse(b, V(x.lo), V(y.lo)))
end

if FMA_FAST

# two-prod-fma
@inline function dmul(x::T, y::T) where {T<:IEEEFloat}
@inline function dmul(x::vIEEEFloat, y::vIEEEFloat)
z = x * y
Double(z, fma(x, y, -z))
end

@inline function dmul(x::Double{T}, y::T) where {T<:IEEEFloat}
@inline function dmul(x::Double{<:vIEEEFloat}, y::vIEEEFloat)
z = x.hi * y
Double(z, fma(x.hi, y, -z) + x.lo * y)
end

@inline dmul(x::T, y::Double{T}) where {T<:IEEEFloat} = dmul(y, x)
@inline dmul(x::vIEEEFloat, y::Double{<:vIEEEFloat}) = dmul(y, x)

@inline function dmul(x::Double{T}, y::Double{T}) where {T<:IEEEFloat}
@inline function dmul(x::Double{<:vIEEEFloat}, y::Double{<:vIEEEFloat})
z = x.hi * y.hi
Double(z, fma(x.hi, y.hi, -z) + x.hi * y.lo + x.lo * y.hi)
end

# x^2
@inline function dsqu(x::T) where {T<:IEEEFloat}
@inline function dsqu(x::T) where {T<:vIEEEFloat}
z = x * x
Double(z, fma(x, x, -z))
end

@inline function dsqu(x::Double{T}) where {T<:IEEEFloat}
@inline function dsqu(x::Double{T}) where {T<:vIEEEFloat}
z = x.hi * x.hi
Double(z, fma(x.hi, x.hi, -z) + x.hi * (x.lo + x.lo))
end

# sqrt(x)
@inline function dsqrt(x::Double{T}) where {T<:IEEEFloat}
@inline function dsqrt(x::Double{T}) where {T<:vIEEEFloat}
zhi = _sqrt(x.hi)
Double(zhi, (x.lo + fma(-zhi, zhi, x.hi)) / (zhi + zhi))
end

# x/y
@inline function ddiv(x::Double{T}, y::Double{T}) where {T<:IEEEFloat}
@inline function ddiv(x::Double{<:vIEEEFloat}, y::Double{<:vIEEEFloat})
invy = 1 / y.hi
zhi = x.hi * invy
Double(zhi, (fma(-zhi, y.hi, x.hi) + fma(-zhi, y.lo, x.lo)) * invy)
end

@inline function ddiv(x::T, y::T) where {T<:IEEEFloat}
@inline function ddiv(x::vIEEEFloat, y::vIEEEFloat)
ry = 1 / y
r = x * ry
Double(r, fma(-r, y, x) * ry)
end

# 1/x
@inline function drec(x::T) where {T<:IEEEFloat}
@inline function drec(x::vIEEEFloat)
zhi = 1 / x
Double(zhi, fma(-zhi, x, one(T)) * zhi)
end

@inline function drec(x::Double{T}) where {T<:IEEEFloat}
@inline function drec(x::Double{<:vIEEEFloat})
zhi = 1 / x.hi
Double(zhi, (fma(-zhi, x.hi, one(T)) + -zhi * x.lo) * zhi)
end

else

#two-prod x*y
@inline function dmul(x::T, y::T) where {T<:IEEEFloat}
@inline function dmul(x::vIEEEFloat, y::vIEEEFloat)
hx, lx = splitprec(x)
hy, ly = splitprec(y)
z = x * y
Double(z, ((hx * hy - z) + lx * hy + hx * ly) + lx * ly)
end

@inline function dmul(x::Double{T}, y::T) where {T<:IEEEFloat}
@inline function dmul(x::Double{<:vIEEEFloat}, y::vIEEEFloat)
hx, lx = splitprec(x.hi)
hy, ly = splitprec(y)
z = x.hi * y
Double(z, (hx * hy - z) + lx * hy + hx * ly + lx * ly + x.lo * y)
end

@inline dmul(x::T, y::Double{T}) where {T<:IEEEFloat} = dmul(y, x)
@inline dmul(x::vIEEEFloat, y::Double{<:vIEEEFloat}) = dmul(y, x)

@inline function dmul(x::Double{T}, y::Double{T}) where {T<:IEEEFloat}
@inline function dmul(x::Double{<:vIEEEFloat}, y::Double{<:vIEEEFloat})
hx, lx = splitprec(x.hi)
hy, ly = splitprec(y.hi)
z = x.hi * y.hi
Double(z, (((hx * hy - z) + lx * hy + hx * ly) + lx * ly) + x.hi * y.lo + x.lo * y.hi)
end

# x^2
@inline function dsqu(x::T) where {T<:IEEEFloat}
@inline function dsqu(x::T) where {T<:vIEEEFloat}
hx, lx = splitprec(x)
z = x * x
Double(z, (hx * hx - z) + lx * (hx + hx) + lx * lx)
end

@inline function dsqu(x::Double{T}) where {T<:IEEEFloat}
@inline function dsqu(x::Double{T}) where {T<:vIEEEFloat}
hx, lx = splitprec(x.hi)
z = x.hi * x.hi
Double(z, (hx * hx - z) + lx * (hx + hx) + lx * lx + x.hi * (x.lo + x.lo))
end

# sqrt(x)
@inline function dsqrt(x::Double{T}) where {T<:IEEEFloat}
@inline function dsqrt(x::Double{T}) where {T<:vIEEEFloat}
c = _sqrt(x.hi)
u = dsqu(c)
Double(c, (x.hi - u.hi - u.lo + x.lo) / (c + c))
end

# x/y
@inline function ddiv(x::Double{T}, y::Double{T}) where {T<:IEEEFloat}
@inline function ddiv(x::Double{<:vIEEEFloat}, y::Double{<:vIEEEFloat})
invy = 1 / y.hi
c = x.hi * invy
u = dmul(c, y.hi)
Double(c, ((((x.hi - u.hi) - u.lo) + x.lo) - c * y.lo) * invy)
end

@inline function ddiv(x::T, y::T) where {T<:IEEEFloat}
@inline function ddiv(x::vIEEEFloat, y::vIEEEFloat)
ry = 1 / y
r = x * ry
hx, lx = splitprec(r)
Expand All @@ -255,13 +268,13 @@ else


# 1/x
@inline function drec(x::T) where {T<:IEEEFloat}
@inline function drec(x::T) where {T<:vIEEEFloat}
c = 1 / x
u = dmul(c, x)
Double(c, (one(T) - u.hi - u.lo) * c)
end

@inline function drec(x::Double{T}) where {T<:IEEEFloat}
@inline function drec(x::Double{T}) where {T<:vIEEEFloat}
c = 1 / x.hi
u = dmul(c, x.hi)
Double(c, (one(T) - u.hi - u.lo - c * x.lo) * c)
Expand Down

0 comments on commit 6694583

Please sign in to comment.