diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c143632
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+*.jl.cov
+*.jl.*.cov
+*.jl.mem
+
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..c7b3940
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,17 @@
+# Documentation: http://docs.travis-ci.com/user/languages/julia/
+language: julia
+os:
+ - linux
+ - osx
+julia:
+ - 0.6
+ - nightly
+notifications:
+ email: false
+# uncomment the following lines to override the default test script
+#script:
+# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
+ # - julia -e 'versioninfo(); Pkg.clone(pwd()); Pkg.build("Sleef"); Pkg.test("Sleef"; coverage=true)'
+after_success:
+ # push coverage results
+ - julia -e 'cd(Pkg.dir("Sleef")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder()); Codecov.submit(Codecov.process_folder())'
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..bac3d25
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,28 @@
+The Sleef.jl package is licensed under the MIT "Expat" License:
+
+> Copyright (c) 2016: Mustafa Mohamad and other contributors:
+>
+> https://github.com/musm/Sleef.jl/graphs/contributors
+>
+> Permission is hereby granted, free of charge, to any person obtaining a copy
+> of this software and associated documentation files (the "Software"), to deal
+> in the Software without restriction, including without limitation the rights
+> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+> copies of the Software, and to permit persons to whom the Software is
+> furnished to do so, subject to the following conditions:
+>
+> The above copyright notice and this permission notice shall be included in all
+> copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+> SOFTWARE.
+>
+
+Sleef.jl includes ported code from the following project
+
+- [sleef](https://github.com/shibatch/sleef) [public domain] Author Naoki Shibata
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1e85d58
--- /dev/null
+++ b/README.md
@@ -0,0 +1,61 @@
+
+
+[![Travis Build Status](https://travis-ci.org/musm/Sleef.jl.svg?branch=master)](https://travis-ci.org/musm/Sleef.jl)
+[![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/j7lpafn4uf1trlfi/branch/master?svg=true)](https://ci.appveyor.com/project/musm/sleef-jl/branch/master)
+[![Coverage Status](https://coveralls.io/repos/github/musm/Sleef.jl/badge.svg?branch=master)](https://coveralls.io/github/musm/Sleef.jl?branch=master)
+[![codecov.io](http://codecov.io/github/musm/Sleef.jl/coverage.svg?branch=master)](http://codecov.io/github/musm/Sleef.jl?branch=master)
+
+A port of the [SLEEF math library](https://github.com/shibatch/sleef) (author Naoki Shibata) in pure Julia. This port includes a few extras including an `exp10` function and many bug fixes over the original code ([see](https://github.com/JuliaMath/Sleef.jl/issues) for a list of remanining issues that have not been fixed upstream). The library supports `Float32` and `Float64` types.
+
+Based on SLEEF version 2.80 (with additional bugfixes).
+
+# Usage
+
+We recommend running Julia with `-O3` for maximal performance using `Sleef.jl` and to also build a custom system image by running
+```julia
+# Pkg.add("WinRPM"); WinRPM.install("gcc") # on Windows please first run this line
+julia> include(joinpath(dirname(JULIA_HOME), "share", "julia", "build_sysimg.jl"))
+julia> build_sysimg(force=true)
+```
+and then to restart `julia`; this will ensure you are taking full advantage of hardware [FMA](https://en.wikipedia.org/wiki/FMA_instruction_set) if your CPU supports it.
+
+
+To use `Sleef.jl`
+```julia
+julia> Pkg.clone("https://github.com/JuliaMath/Sleef.jl.git")
+
+julia> using Sleef
+
+julia> Sleef.sin(2.3)
+0.7457052121767203
+
+julia> Sleef.sin(2.3f0)
+0.74570525f0
+
+julia> Sleef.exp(3.0)
+20.085536923187668
+
+julia> Sleef.exp(3f0)
+20.085537f0
+```
+
+The available functions include (within 1 ulp)
+```julia
+sin, cos, tan, asin, acos, atan, atan2, sincos, sinh, cosh, tanh,
+ asinh, acosh, atanh, log, log2, log10, log1p, ilog2, exp, exp2, exp10, expm1, ldexp, cbrt, pow
+ ```
+ Faster variants include (within 3 ulp)
+
+ ```julia
+sin_fast, cos_fast, tan_fast, sincos_fast, asin_fast, acos_fast, atan_fast, atan2_fast, log_fast, cbrt_fast
+```
+
+# Benchmarks
+
+You can benchmark the performance of the Sleef.jl math library on your machine by running
+
+```julia
+include(joinpath(Pkg.dir("Sleef"), "benchmark", "benchmark.jl"))
+```
diff --git a/REQUIRE b/REQUIRE
new file mode 100644
index 0000000..137767a
--- /dev/null
+++ b/REQUIRE
@@ -0,0 +1 @@
+julia 0.6
diff --git a/appveyor.yml b/appveyor.yml
new file mode 100644
index 0000000..ca76fcf
--- /dev/null
+++ b/appveyor.yml
@@ -0,0 +1,32 @@
+environment:
+ matrix:
+ - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe"
+ - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe"
+ - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe"
+ - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe"
+
+branches:
+ only:
+ - master
+ - /release-.*/
+
+notifications:
+ - provider: Email
+ on_build_success: false
+ on_build_failure: false
+ on_build_status_changed: false
+
+install:
+ - ps: "[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.SecurityProtocolType]::Tls12"
+# Download most recent Julia Windows binary
+ - ps: (new-object net.webclient).DownloadFile($env:JULIA_URL, "C:\projects\julia-binary.exe")
+# Run installer silently, output to C:\projects\julia
+ - C:\projects\julia-binary.exe /S /D=C:\projects\julia
+
+build_script:
+# Need to convert from shallow to complete for Pkg.clone to work
+ - IF EXIST .git\shallow (git fetch --unshallow)
+ - C:\projects\julia\bin\julia -e "versioninfo(); Pkg.clone(pwd(), \"Sleef\"); Pkg.build(\"Sleef\")"
+
+test_script:
+ - C:\projects\julia\bin\julia -e "Pkg.test(\"Sleef\")"
\ No newline at end of file
diff --git a/benchmark/benchmark.jl b/benchmark/benchmark.jl
new file mode 100644
index 0000000..3c53c1a
--- /dev/null
+++ b/benchmark/benchmark.jl
@@ -0,0 +1,132 @@
+using Libm
+using BenchmarkTools
+using JLD, DataStructures
+
+RETUNE = false
+VERBOSE = true
+DETAILS = false
+
+test_types = (Float64, Float32) # Which types do you want to bench?
+
+const bench = ("Base","Libm")
+const suite = BenchmarkGroup()
+for n in bench
+ suite[n] = BenchmarkGroup([n])
+end
+
+
+bench_reduce(f::Function, X) = mapreduce(x -> reinterpret(Unsigned,x), |, f(x) for x in X)
+
+typealias IEEEFloat Union{Float32,Float64}
+MRANGE(::Type{Float64}) = 10000000
+MRANGE(::Type{Float32}) = 10000
+IntF(::Type{Float64}) = Int64
+IntF(::Type{Float32}) = Int32
+x_trig{T<:IEEEFloat}(::Type{T}) = begin
+ x_trig = T[]
+ for i = 1:10000
+ s = reinterpret(T, reinterpret(IntF(T), T(pi)/4 * i) - IntF(T)(20))
+ e = reinterpret(T, reinterpret(IntF(T), T(pi)/4 * i) + IntF(T)(20))
+ d = s
+ while d <= e
+ append!(x_trig, d)
+ d = reinterpret(T, reinterpret(IntF(T), d) + IntF(T)(1))
+ end
+ end
+ x_trig = append!(x_trig, -10:0.0002:10)
+ x_trig = append!(x_trig, -MRANGE(T):200.1:MRANGE(T))
+end
+x_exp{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10:0.0002:10, -1000:0.1:1000))
+x_exp2{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10:0.0002:10, -120:0.023:1000, -1000:0.02:2000))
+x_exp10{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10:0.0002:10, -35:0.023:1000, -300:0.01:300))
+x_expm1{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10:0.0002:10, -1000:0.021:1000, -1000:0.023:1000, 10.0.^-(0:0.02:300), -10.0.^-(0:0.02:300), 10.0.^(0:0.021:300), -10.0.^-(0:0.021:300)))
+x_log{T<:IEEEFloat}(::Type{T}) = map(T, vcat(0.0001:0.0001:10, 0.001:0.1:10000, 1.1.^(-1000:1000), 2.1.^(-1000:1000)))
+x_log10{T<:IEEEFloat}(::Type{T}) = map(T, vcat(0.0001:0.0001:10, 0.0001:0.1:10000))
+x_log1p{T<:IEEEFloat}(::Type{T}) = map(T, vcat(0.0001:0.0001:10, 0.0001:0.1:10000, 10.0.^-(0:0.02:300), -10.0.^-(0:0.02:300)))
+x_atrig{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-1:0.00002:1))
+x_atan{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10:0.0002:10, -10000:0.2:10000, -10000:0.201:10000))
+x_cbrt{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10000:0.2:10000, 1.1.^(-1000:1000), 2.1.^(-1000:1000)))
+x_trigh{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10:0.0002:10, -1000:0.02:1000))
+x_asinhatanh{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10:0.0002:10, -1000:0.02:1000))
+x_acosh{T<:IEEEFloat}(::Type{T}) = map(T, vcat(1:0.0002:10, 1:0.02:1000))
+x_pow{T<:IEEEFloat}(::Type{T}) = begin
+ xx1 = map(Tuple{T,T}, [(x,y) for x = -100:0.20:100, y = 0.1:0.20:100])[:]
+ xx2 = map(Tuple{T,T}, [(x,y) for x = -100:0.21:100, y = 0.1:0.22:100])[:]
+ xx3 = map(Tuple{T,T}, [(x,y) for x = 2.1, y = -1000:0.1:1000])
+ xx = vcat(xx1, xx2, xx2)
+end
+
+import Base.atanh
+for f in (:atanh,)
+ @eval begin
+ ($f)(x::Float64) = ccall(($(string(f)),Base.libm_name), Float64, (Float64,), x)
+ ($f)(x::Float32) = ccall(($(string(f,"f")),Base.libm_name), Float32, (Float32,), x)
+ end
+end
+
+const micros = OrderedDict(
+ "sin" => x_trig,
+ "cos" => x_trig,
+ "tan" => x_trig,
+ "asin" => x_atrig,
+ "acos" => x_atrig,
+ "atan" => x_atan,
+ "exp" => x_exp,
+ "exp2" => x_exp2,
+ "exp10" => x_exp10,
+ "expm1" => x_expm1,
+ "log" => x_log,
+ "log2" => x_log10,
+ "log10" => x_log10,
+ "log1p" => x_log1p,
+ "sinh" => x_trigh,
+ "cosh" => x_trigh,
+ "tanh" => x_trigh,
+ "asinh" => x_asinhatanh,
+ "acosh" => x_acosh,
+ "atanh" => x_asinhatanh,
+ "cbrt" => x_cbrt
+ )
+
+for n in ("Base","Libm")
+ for (f,x) in micros
+ suite[n][f] = BenchmarkGroup([f])
+ for T in test_types
+ fex = Expr(:., Symbol(n), QuoteNode(Symbol(f)))
+ suite[n][f][string(T)] = @benchmarkable bench_reduce($fex, $(x(T)))
+ end
+ end
+end
+
+
+tune_params = joinpath(dirname(@__FILE__), "params.jld")
+if !isfile(tune_params) || RETUNE
+ tune!(suite; verbose=VERBOSE, seconds = 2)
+ save(tune_params, "suite", params(suite))
+ println("Saving tuned parameters.")
+else
+ println("Loading pretuned parameters.")
+ loadparams!(suite, load(tune_params, "suite"), :evals, :samples)
+end
+
+println("Running micro benchmarks...")
+results = run(suite; verbose=VERBOSE, seconds = 2)
+
+print_with_color(:blue, "Benchmarks: median ratio Libm/Base\n")
+for f in keys(micros)
+ print_with_color(:magenta, string(f))
+ for T in test_types
+ println()
+ print("time: ", )
+ tratio = ratio(median(results["Libm"][f][string(T)]), median(results["Base"][f][string(T)])).time
+ tcolor = tratio > 3 ? :red : tratio < 1.5 ? :green : :blue
+ print_with_color(tcolor, @sprintf("%.2f",tratio), " ", string(T))
+ if DETAILS
+ print_with_color(:blue, "details Libm/Base\n")
+ println(results["Libm"][f][string(T)])
+ println(results["Base"][f][string(T)])
+ println()
+ end
+ end
+ println("\n")
+end
diff --git a/doc/src/assets/logo.svg b/doc/src/assets/logo.svg
new file mode 100644
index 0000000..7eb3f3a
--- /dev/null
+++ b/doc/src/assets/logo.svg
@@ -0,0 +1,34 @@
+
+
+
diff --git a/src/Sleef.jl b/src/Sleef.jl
new file mode 100644
index 0000000..3c17936
--- /dev/null
+++ b/src/Sleef.jl
@@ -0,0 +1,88 @@
+__precompile__()
+module Sleef
+
+# export sin, cos, tan, asin, acos, atan, atan2, sincos, sinh, cosh, tanh,
+# asinh, acosh, atanh, log, log2, log10, log1p, ilog2, exp, exp2, exp10, expm1, ldexp, cbrt, pow
+
+# fast variants (within 3 ulp)
+# export sin_fast, cos_fast, tan_fast, sincos_fast, asin_fast, acos_fast, atan_fast, atan2_fast, log_fast, cbrt_fast
+
+using Base: Math.@horner, Math.exponent_bias, exponent_mask, Math.significand_bits, Math.IEEEFloat
+
+## constants (refactor later to all use dispatch)
+
+const MLN2 = 6.931471805599453094172321214581765680755001343602552541206800094933936219696955e-01 # log(2)
+const MLN2E = 1.442695040888963407359924681001892137426645954152985934135449406931109219181187 # log2(e)
+
+const MPI = 3.141592653589793238462643383279502884197169399375105820974944592307816406286198 # pi
+const MPI2 = 1.570796326794896619231321691639751442098584699687552910487472296153908203143099 # pi/2
+const MPI4 = 7.853981633974483096156608458198757210492923498437764552437361480769541015715495e-01 # pi/4
+const M1PI = 3.183098861837906715377675267450287240689192914809128974953346881177935952684543e-01 # 1/pi
+const M2PI = 6.366197723675813430755350534900574481378385829618257949906693762355871905369086e-01 # 2/pi
+const M4PI = 1.273239544735162686151070106980114896275677165923651589981338752471174381073817 # 4/pi
+
+const M1SQRT2 = 7.07106781186547524400844362104849039284835937688474036588339868995366239231051e-01 # 1/sqrt(2)
+
+const M2P13 = 1.259921049894873164767210607278228350570251464701507980081975112155299676513956 # 2^1/3
+const M2P23 = 1.587401051968199474751705639272308260391493327899853009808285761825216505624206 # 2^2/3
+
+## constants (dispatch)
+
+MDLN10E(::Type{Float64}) = Double(0.4342944819032518, 1.098319650216765e-17) # log10(e)
+MDLN10E(::Type{Float32}) = Double(0.4342945f0, -1.010305f-8)
+
+MDLN2E(::Type{Float64}) = Double(1.4426950408889634, 2.0355273740931033e-17) # log2(e)
+MDLN2E(::Type{Float32}) = Double(1.442695f0, 1.925963f-8)
+
+MDLN10(::Type{Float64}) = Double(2.302585092994046, -2.1707562233822494e-16) # log(10)
+MDLN10(::Type{Float32}) = Double(2.3025851f0, -3.1975436f-8)
+
+MDLN2(::Type{Float64}) = Double(0.6931471805599453, 2.3190468138462996e-17) # log(2)
+MDLN2(::Type{Float32}) = Double(0.6931472f0, -1.9046542f-9)
+
+MDPI(::Type{Float64}) = Double(3.141592653589793, 1.2246467991473532e-16) # pi
+MDPI(::Type{Float32}) = Double(3.1415927f0, -8.742278f-8)
+MDPI2(::Type{Float64}) = Double(1.5707963267948966, 6.123233995736766e-17) # pi/2
+MDPI2(::Type{Float32}) = Double(1.5707964f0, -4.371139f-8)
+
+MD2P13(::Type{Float64}) = Double(1.2599210498948732, -2.589933375300507e-17) # 2^1/3
+MD2P13(::Type{Float32}) = Double(1.2599211f0, -2.4018702f-8)
+
+MD2P23(::Type{Float64}) = Double(1.5874010519681996, -1.0869008194197823e-16) # 2^2/3
+MD2P23(::Type{Float32}) = Double(1.587401f0, 1.9520385f-8)
+
+# Split 4/pi into four parts (each is 26 bits)
+PI4A(::Type{Float64}) = 0.78539816290140151978
+PI4B(::Type{Float64}) = 4.9604678871439933374e-10
+PI4C(::Type{Float64}) = 1.1258708853173288931e-18
+PI4D(::Type{Float64}) = 1.7607799325916000908e-27
+
+PI4A(::Type{Float32}) = 0.78515625f0
+PI4B(::Type{Float32}) = 0.00024187564849853515625f0
+PI4C(::Type{Float32}) = 3.7747668102383613586f-08
+PI4D(::Type{Float32}) = 1.2816720341285448015f-12
+
+# Split log(2) into upper and lower parts
+LN2U(::Type{Float64}) = 0.69314718055966295651160180568695068359375
+LN2L(::Type{Float64}) = 0.28235290563031577122588448175013436025525412068e-12
+
+LN2U(::Type{Float32}) = 0.693145751953125f0
+LN2L(::Type{Float32}) = 1.428606765330187045f-06
+
+include("utils.jl") # utility functions
+include("double.jl") # Dekker style Double implementation
+include("priv.jl") # private math functions
+include("exp.jl") # exponential functions
+include("log.jl") # logarithmic functions
+include("trig.jl") # trigonometric and inverse trigonometric functions
+include("hyp.jl") # hyperbolic and inverse hyperbolic functions
+include("misc.jl") # miscallenous math functions including pow and cbrt
+
+# fallback definitions #FIXME add all
+for func in (:sin, :cos, :tan, :asin, :acos, :atan, :sinh, :cosh, :tanh, :log, :log2, :log10, :log1p, :exp, :exp2, :exp10, :expm1)
+ @eval begin
+ $func(x::Real) = $func(float(x))
+ end
+end
+
+end
diff --git a/src/double.jl b/src/double.jl
new file mode 100644
index 0000000..ccf4dd3
--- /dev/null
+++ b/src/double.jl
@@ -0,0 +1,271 @@
+import Base: -, <, copysign, flipsign, normalize, convert
+
+immutable Double{T<:IEEEFloat} <: Number
+ hi::T
+ lo::T
+end
+Double{T<:IEEEFloat}(x::T) = Double(x, zero(T))
+
+convert{T<:IEEEFloat}(::Type{Tuple{T,T}}, x::Double{T}) = (x.hi, x.lo)
+convert{T<:IEEEFloat}(::Type{T}, x::Double) = x.hi + x.lo
+
+
+@inline trunclo(x::Float64) = reinterpret(Float64, reinterpret(UInt64, x) & 0xffff_ffff_f800_0000) # clear lower 27 bits (leave upper 26 bits)
+@inline trunclo(x::Float32) = reinterpret(Float32, reinterpret(UInt32, x) & 0xffff_f000) # clear lowest 12 bits (leave upper 12 bits)
+
+@inline function splitprec(x::IEEEFloat)
+ hx = trunclo(x)
+ hx, x-hx
+end
+
+@inline function normalize{T}(x::Double{T})
+ r = x.hi + x.lo
+ Double(r, (x.hi - r) + x.lo)
+end
+
+@inline flipsign{T<:IEEEFloat}(x::Double{T}, y::T) = Double(flipsign(x.hi, y), flipsign(x.lo, y))
+
+@inline scale{T<:IEEEFloat}(x::Double{T}, s::T) = Double(s*x.hi, s*x.lo)
+
+
+@inline -{T<:IEEEFloat}(x::Double{T}) = Double(-x.hi,-x.lo)
+
+@inline function <{T<:IEEEFloat}(x::Double{T}, y::Double{T})
+ x.hi < y.hi
+end
+
+@inline function <{T<:IEEEFloat}(x::Double{T}, y::Number)
+ x.hi < y
+end
+
+@inline function <{T<:IEEEFloat}(x::Number, y::Double{T})
+ x < y.hi
+end
+
+# quick-two-sum x+y
+@inline function dadd{T<:IEEEFloat}(x::T, y::T) #WARNING |x| >= |y|
+ s = x + y
+ Double(s, (x - s) + y)
+end
+
+@inline function dadd{T<:IEEEFloat}(x::T, y::Double{T}) #WARNING |x| >= |y|
+ s = x + y.hi
+ Double(s, (x - s) + y.hi + y.lo)
+end
+
+@inline function dadd{T<:IEEEFloat}(x::Double{T}, y::T) #WARNING |x| >= |y|
+ s = x.hi + y
+ Double(s, (x.hi - s) + y + x.lo)
+end
+
+@inline function dadd{T<:IEEEFloat}(x::Double{T}, y::Double{T}) #WARNING |x| >= |y|
+ s = x.hi + y.hi
+ Double(s, (x.hi - s) + y.hi + y.lo + x.lo)
+end
+
+@inline function dsub{T<:IEEEFloat}(x::Double{T}, y::Double{T}) #WARNING |x| >= |y|
+ s = x.hi - y.hi
+ Double(s, (x.hi - s) - y.hi - y.lo + x.lo)
+end
+
+@inline function dsub{T<:IEEEFloat}(x::Double{T}, y::T) #WARNING |x| >= |y|
+ s = x.hi - y
+ Double(s, (x.hi - s) - y + x.lo)
+end
+
+@inline function dsub{T<:IEEEFloat}(x::T, y::Double{T}) #WARNING |x| >= |y|
+ s = x - y.hi
+ Double(s, (x - s) - y.hi - y.lo)
+end
+
+@inline function dsub{T<:IEEEFloat}(x::T, y::T) #WARNING |x| >= |y|
+ s = x - y
+ Double(s, (x - s) - y)
+end
+
+
+# two-sum x+y NO BRANCH
+@inline function dadd2{T<:IEEEFloat}(x::T, y::T)
+ s = x + y
+ v = s - x
+ Double(s, (x - (s - v)) + (y - v))
+end
+
+@inline function dadd2{T<:IEEEFloat}(x::T, y::Double{T})
+ s = x + y.hi
+ v = s - x
+ Double(s, (x - (s - v)) + (y.hi - v) + y.lo)
+end
+
+@inline dadd2{T<:IEEEFloat}(x::Double{T}, y::T) = dadd2(y,x)
+
+@inline function dadd2{T<:IEEEFloat}(x::Double{T}, y::Double{T})
+ s = x.hi + y.hi
+ v = s - x.hi
+ Double(s,(x.hi - (s - v)) + (y.hi - v) + x.lo + y.lo)
+end
+
+@inline function dsub2{T<:IEEEFloat}(x::T, y::T)
+ s = x - y
+ v = s - x
+ Double(s, (x - (s - v)) + (-y - v))
+end
+
+@inline function dsub2{T<:IEEEFloat}(x::T, y::Double{T})
+ s = x - y.hi
+ v = s - x
+ Double(s, (x - (s - v)) + (-y.hi - v) - y.lo)
+end
+
+@inline function dsub2{T<:IEEEFloat}(x::Double{T}, y::T)
+ s = x.hi - y
+ v = s - x.hi
+ Double(s,(x.hi - (s - v)) + (-y - v) + x.lo)
+end
+
+@inline function dsub2{T<:IEEEFloat}(x::Double{T}, y::Double{T})
+ s = x.hi - y.hi
+ v = s - x.hi
+ Double(s,(x.hi - (s - v)) + (-y.hi - v) + x.lo - y.lo)
+end
+
+
+
+if FMA_FAST
+
+ # two-prod-fma
+ @inline function dmul{T<:IEEEFloat}(x::T, y::T)
+ z = x*y
+ Double(z, fma(x, y, -z))
+ end
+
+ @inline function dmul{T<:IEEEFloat}(x::Double{T}, y::T)
+ z = x.hi*y
+ Double(z, fma(x.hi, y, -z) + x.lo*y)
+ end
+
+ @inline dmul{T<:IEEEFloat}(x::T, y::Double{T}) = dmul(y,x)
+
+ @inline function dmul{T<:IEEEFloat}(x::Double{T}, y::Double{T})
+ z = x.hi*y.hi
+ Double(z, fma(x.hi, y.hi, -z) + x.hi*y.lo + x.lo*y.hi)
+ end
+
+ # x^2
+ @inline function dsqu{T<:IEEEFloat}(x::T)
+ z = x*x
+ Double(z, fma(x,x,-z))
+ end
+
+ @inline function dsqu{T<:IEEEFloat}(x::Double{T})
+ z = x.hi*x.hi
+ Double(z, fma(x.hi, x.hi, -z) + x.hi*(x.lo+x.lo))
+ end
+
+ # sqrt(x)
+ @inline function dsqrt{T<:IEEEFloat}(x::Double{T})
+ zhi = _sqrt(x.hi)
+ Double(zhi, (x.lo + fma(-zhi, zhi, x.hi))/(zhi+zhi))
+ end
+
+ # x/y
+ @inline function ddiv{T<:IEEEFloat}(x::Double{T}, y::Double{T})
+ invy = 1/y.hi
+ zhi = x.hi*invy
+ Double(zhi, (fma(-zhi, y.hi, x.hi) + fma(-zhi, y.lo, x.lo))*invy)
+ end
+
+ @inline function ddiv{T<:IEEEFloat}(x::T, y::T)
+ ry = 1/y
+ r = x*ry
+ Double(r, fma(-r,y,x)*ry)
+ end
+
+ # 1/x
+ @inline function ddrec{T<:IEEEFloat}(x::T)
+ zhi = 1/x
+ Double(zhi, fma(-zhi, x, one(T))*zhi)
+ end
+
+ @inline function ddrec{T<:IEEEFloat}(x::Double{T})
+ zhi = 1/x.hi
+ Double(zhi, (fma(-zhi, x.hi, one(T)) + -zhi*x.lo)*zhi)
+ end
+
+else
+
+ #two-prod x*y
+ @inline function dmul{T<:IEEEFloat}(x::T, y::T)
+ hx, lx = splitprec(x)
+ hy, ly = splitprec(y)
+ z = x*y
+ Double(z, ((hx*hy-z) + lx*hy + hx*ly) + lx*ly)
+ end
+
+ @inline function dmul{T<:IEEEFloat}(x::Double{T}, y::T)
+ hx, lx = splitprec(x.hi)
+ hy, ly = splitprec(y)
+ z = x.hi*y
+ Double(z, (hx*hy-z) + lx*hy + hx*ly + lx*ly + x.lo*y)
+ end
+
+ @inline dmul{T<:IEEEFloat}(x::T, y::Double{T}) = dmul(y,x)
+
+ @inline function dmul{T<:IEEEFloat}(x::Double{T}, y::Double{T})
+ hx, lx = splitprec(x.hi)
+ hy, ly = splitprec(y.hi)
+ z = x.hi*y.hi
+ Double(z, (((hx*hy-z) + lx*hy + hx*ly) + lx*ly) + x.hi*y.lo + x.lo*y.hi)
+ end
+
+ # x^2
+ @inline function dsqu{T<:IEEEFloat}(x::T)
+ hx, lx = splitprec(x)
+ z = x*x
+ Double(z, (hx*hx-z) + lx*(hx + hx) + lx*lx)
+ end
+
+ @inline function dsqu{T<:IEEEFloat}(x::Double{T})
+ hx, lx = splitprec(x.hi)
+ z = x.hi*x.hi
+ Double(z, (hx*hx-z) + lx*(hx+hx) + lx*lx + x.hi*(x.lo+x.lo))
+ end
+
+ # sqrt(x)
+ @inline function dsqrt{T<:IEEEFloat}(x::Double{T})
+ c = _sqrt(x.hi)
+ u = dsqu(c)
+ Double(c, (x.hi - u.hi - u.lo + x.lo)/(c+c))
+ end
+
+ # x/y
+ @inline function ddiv{T<:IEEEFloat}(x::Double{T}, y::Double{T})
+ invy = 1/y.hi
+ c = x.hi*invy
+ u = dmul(c, y.hi)
+ Double(c,((((x.hi - u.hi) - u.lo) + x.lo) - c*y.lo)*invy)
+ end
+
+ @inline function ddiv{T<:IEEEFloat}(x::T, y::T)
+ ry = 1/y
+ r = x*ry
+ hx, lx = splitprec(r)
+ hy, ly = splitprec(y)
+ Double(r, (((-hx*hy+r*y) - lx*hy - hx*ly) - lx*ly)*ry)
+ end
+
+
+ # 1/x
+ @inline function ddrec{T<:IEEEFloat}(x::T)
+ c = 1/x
+ u = dmul(c,x)
+ Double(c, (one(T) - u.hi - u.lo)*c)
+ end
+
+ @inline function ddrec{T<:IEEEFloat}(x::Double{T})
+ c = 1/x.hi
+ u = dmul(c,x.hi)
+ Double(c, (one(T) - u.hi - u.lo - c*x.lo)*c)
+ end
+
+end
diff --git a/src/exp.jl b/src/exp.jl
new file mode 100644
index 0000000..f81a21b
--- /dev/null
+++ b/src/exp.jl
@@ -0,0 +1,106 @@
+# exported exponential functions
+
+"""
+ ldexp(a, n::Int) -> IEEEFloat
+
+Computes `a × 2^n`
+"""
+ldexp(x::IEEEFloat, q::Int) = ldexpk(x,q)
+
+
+
+over_e2(::Type{Float64}) = 1024
+over_e2(::Type{Float32}) = 128f0
+
+"""
+ exp2(x)
+
+Compute the base-`2` exponential of `x`, that is `2ˣ`.
+"""
+function exp2{T<:IEEEFloat}(x::T)
+ u = expk(dmul(MDLN2(T), x))
+ x > over_e2(T) && (u = T(Inf))
+ isninf(x) && (u = T(0))
+ return u
+end
+
+
+
+over_e10(::Type{Float64}) = 308
+over_e10(::Type{Float32}) = 38f0
+
+"""
+ exp10(x)
+
+Compute the base-`10` exponential of `x`, that is `10ˣ`.
+"""
+function exp10{T<:IEEEFloat}(x::T)
+ u = expk(dmul(MDLN10(T), x))
+ x > over_e10(T) && (u = T(Inf))
+ isninf(x) && (u = T(0))
+ return u
+end
+
+
+
+over_em1(::Type{Float64}) = 700.0
+over_em1(::Type{Float32}) = 88f0
+under_em1(::Type{Float64}) = -0.36043653389117156089696070315825181539851971360337e2
+under_em1(::Type{Float32}) = -0.15942385152878742116596338793538061065739925620174f2
+
+"""
+ expm1(x)
+
+Compute `eˣ- 1` accurately for small values of `x`.
+"""
+function expm1{T<:IEEEFloat}(x::T)
+ u = T(dadd2(expk2(Double(x)), -T(1)))
+ x > over_em1(T) && (u = T(Inf))
+ x < under_em1(T) && (u = -T(1))
+ return u
+end
+
+
+
+"""
+ exp(x)
+
+Compute the base-`e` exponential of `x`, that is `eˣ`.
+"""
+function exp end
+
+let
+global exp
+
+const c11d = 2.08860621107283687536341e-09
+const c10d = 2.51112930892876518610661e-08
+const c9d = 2.75573911234900471893338e-07
+const c8d = 2.75572362911928827629423e-06
+const c7d = 2.4801587159235472998791e-05
+const c6d = 0.000198412698960509205564975
+const c5d = 0.00138888888889774492207962
+const c4d = 0.00833333333331652721664984
+const c3d = 0.0416666666666665047591422
+const c2d = 0.166666666666666851703837
+const c1d = 0.50
+
+const c5f = 0.00136324646882712841033936f0
+const c4f = 0.00836596917361021041870117f0
+const c3f = 0.0416710823774337768554688f0
+const c2f = 0.166665524244308471679688f0
+const c1f = 0.499999850988388061523438f0
+
+global @inline _exp(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d
+global @inline _exp(x::Float32) = @horner x c1f c2f c3f c4f c5f
+
+function exp{T<:IEEEFloat}(x::T)
+ q = roundi(T(MLN2E)*x)
+ s = muladd(q, -LN2U(T), x)
+ s = muladd(q, -LN2L(T), s)
+ u =_exp(s)
+ u = s*s*u + s + 1
+ u = ldexpk(u,q)
+ isninf(x) && (u = T(0))
+ return u
+end
+end
diff --git a/src/hyp.jl b/src/hyp.jl
new file mode 100644
index 0000000..8c30aa7
--- /dev/null
+++ b/src/hyp.jl
@@ -0,0 +1,113 @@
+# exported hyperbolic functions
+
+over_sch(::Type{Float64}) = 710.0
+over_sch(::Type{Float32}) = 89f0
+
+"""
+ sinh(x)
+
+Compute hyperbolic sine of `x`.
+"""
+function sinh{T<:IEEEFloat}(x::T)
+ u = abs(x)
+ d = expk2(Double(u))
+ d = dsub(d, ddrec(d))
+ u = T(d)*T(0.5)
+ u = abs(x) > over_sch(T) ? T(Inf) : u
+ u = isnan(u) ? T(Inf) : u
+ u = flipsign(u,x)
+ u = isnan(x) ? T(NaN) : u
+ return u
+end
+
+
+
+"""
+ cosh(x)
+
+Compute hyperbolic cosine of `x`.
+"""
+function cosh{T<:IEEEFloat}(x::T)
+ u = abs(x)
+ d = expk2(Double(u))
+ d = dadd(d, ddrec(d))
+ u = T(d)*T(0.5)
+ u = abs(x) > over_sch(T) ? T(Inf) : u
+ u = isnan(u) ? T(Inf) : u
+ u = isnan(x) ? T(NaN) : u
+ return u
+end
+
+
+
+over_th(::Type{Float64}) = 18.714973875
+over_th(::Type{Float32}) = 8.664339742f0
+
+"""
+ tanh(x)
+
+Compute hyperbolic tangent of `x`.
+"""
+function tanh{T<:IEEEFloat}(x::T)
+ u = abs(x)
+ d = expk2(Double(u))
+ e = ddrec(d)
+ d = ddiv(dsub(d,e), dadd(d,e))
+ u = T(d)
+ u = abs(x) > over_th(T) ? T(1) : u
+ u = isnan(u) ? T(1) : u
+ u = flipsign(u,x)
+ u = isnan(x) ? T(NaN) : u
+ return u
+end
+
+
+
+"""
+ asinh(x)
+
+Compute the inverse hyperbolic sine of `x`.
+"""
+function asinh{T<:IEEEFloat}(x::T)
+ u = abs(x)
+ d = logk2(dadd(dsqrt(dadd2(dsqu(u), T(1))), u))
+ u = T(d)
+ u = isinf(x) || isnan(u) ? T(Inf) : u
+ u = flipsign(u,x)
+ u = isnan(x) ? T(NaN) : u
+ return u
+end
+
+
+
+"""
+ acosh(x)
+
+Compute the inverse hyperbolic cosine of `x`.
+"""
+function acosh{T<:IEEEFloat}(x::T)
+ d = logk2(dadd2(dsqrt(dsub2(dsqu(x), T(1))), x))
+ u = T(d)
+ u = isinf(x) || isnan(u) ? T(Inf) : u
+ u = x == T(1) ? T(0) : u
+ u = x < T(1) ? T(NaN) : u
+ u = isnan(x) ? T(NaN) : u
+ return u
+end
+
+
+
+"""
+ atanh(x)
+
+Compute the inverse hyperbolic tangent of `x`.
+"""
+function atanh{T<:IEEEFloat}(x::T)
+ u = abs(x)
+ d = logk2(ddiv(dadd2(T(1), u), dsub2(T(1), u)))
+ u = u > T(1) ? T(NaN) : (u == T(1) ? T(Inf) : T(d)*T(0.5))
+ u = isinf(x) || isnan(u) ? T(NaN) : u
+ u = flipsign(u,x)
+ u = isnan(x) ? T(NaN) : u
+ return u
+end
diff --git a/src/log.jl b/src/log.jl
new file mode 100644
index 0000000..d2708e8
--- /dev/null
+++ b/src/log.jl
@@ -0,0 +1,139 @@
+# exported logarithmic functions
+
+"""
+ ilog2(x)
+
+Returns the integral part of the logarithm of `abs(x)`, using base 2 for the
+logarithm. In other words, this computes the binary exponent of `x` such that
+
+ x = significand × 2^exponent,
+
+where `significand ∈ [1, 2)`.
+
+* Exceptional cases (where `Int` is the machine wordsize)
+ * `x = 0` returns `typemin(Int)`
+ * `x = Inf` returns `typemax(Int)`
+ * `x = NaN` returns `typemax(Int)`
+"""
+function ilog2{T<:IEEEFloat}(x::T)
+ e = ilog2k(abs(x)) - 1
+ isnan(x) && (e = typemax(Int))
+ isinf(x) && (e = typemax(Int))
+ x == 0 && (e = typemin(Int))
+ return e
+end
+
+
+"""
+ log10(x)
+
+Returns the base `10` logarithm of `x`.
+"""
+function log10{T<:IEEEFloat}(x::T)
+ u = T(dmul(logk(x), MDLN10E(T)))
+ isinf(x) && (u = T(Inf))
+ x < 0 && (u = T(NaN))
+ x == 0 && (u = T(-Inf))
+ return u
+end
+
+
+"""
+ log2(x)
+
+Returns the base `2` logarithm of `x`.
+"""
+function log2{T<:IEEEFloat}(x::T)
+ u = T(dmul(logk(x), MDLN2E(T)))
+ isinf(x) && (u = T(Inf))
+ x < 0 && (u = T(NaN))
+ x == 0 && (u = T(-Inf))
+ return u
+end
+
+
+"""
+ log1p(x)
+
+Accurately compute the natural logarithm of 1+x.
+"""
+function log1p{T<:IEEEFloat}(x::T)
+ u = T(logk2(dadd2(x, T(1))))
+ isinf(x) && (u = T(Inf))
+ x < -1 && (u = T(NaN))
+ x == -1 && (u = -T(Inf))
+ return copysign(u,x) # return correct sign for -0.0
+end
+
+
+"""
+ log(x)
+
+Compute the natural logarithm of `x`. The inverse of the natural logarithm is
+the natural expoenential function `exp(x)`
+"""
+function log{T<:IEEEFloat}(x::T)
+ u = T(logk(x))
+ isinf(x) && (u = T(Inf))
+ x < 0 && (u = T(NaN))
+ x == 0 && (u = -T(Inf))
+ return u
+end
+
+# First we split the argument to its mantissa `m` and integer exponent `e` so
+# that `d = m \times 2^e`, where `m \in [0.5, 1)` then we apply the polynomial
+# approximant on this reduced argument `m` before putting back the exponent
+# in. This first part is done with the help of the private function
+# `ilog2k(x)` and we put the exponent back using
+
+# `\log(m \times 2^e) = \log(m) + \log 2^e = \log(m) + e\times MLN2
+
+# The polynomial we evaluate is based on coefficients from
+
+# `log_2(x) = 2\sum_{n=0}^\infty \frac{1}{2n+1} \bigl(\frac{x-1}{x+1}^{2n+1}\bigr)`
+
+# That being said, since this converges faster when the argument is close to
+# 1, we multiply `m` by `2` and subtract 1 for the exponent `e` when `m` is
+# less than `sqrt(2)/2`
+"""
+ log_fast(x)
+
+Compute the natural logarithm of `x`. The inverse of the natural logarithm is
+the natural expoenential function `exp(x)`
+"""
+function log_fast end
+
+let
+global log_fast
+
+const c8d = 0.148197055177935105296783
+const c7d = 0.153108178020442575739679
+const c6d = 0.181837339521549679055568
+const c5d = 0.22222194152736701733275
+const c4d = 0.285714288030134544449368
+const c3d = 0.399999999989941956712869
+const c2d = 0.666666666666685503450651
+const c1d = 2.0
+
+const c5f = 0.2371599674224853515625f0
+const c4f = 0.285279005765914916992188f0
+const c3f = 0.400005519390106201171875f0
+const c2f = 0.666666567325592041015625f0
+const c1f = 2f0
+
+global @inline _log_fast(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d
+global @inline _log_fast(x::Float32) = @horner x c1f c2f c3f c4f c5f
+
+function log_fast{T<:IEEEFloat}(x::T)
+ e = ilog2k(T(M1SQRT2)*x)
+ m = ldexpk(x,-e)
+ u = (m-1)/(m+1)
+ u2 = u*u
+ t =_log_fast(u2)
+ u = muladd(u, t, T(MLN2)*e)
+ isinf(x) && (u = T(Inf))
+ x < 0 && (u = T(NaN))
+ x == 0 && (u = -T(Inf))
+ return u
+end
+end
diff --git a/src/misc.jl b/src/misc.jl
new file mode 100644
index 0000000..0a20ea4
--- /dev/null
+++ b/src/misc.jl
@@ -0,0 +1,119 @@
+
+"""
+ pow(x, y)
+
+Exponentiation operator, returns `x` raised to the power `y`.
+"""
+function pow{T<:IEEEFloat}(x::T, y::T)
+ yi = unsafe_trunc(Int,y)
+ yint = yi == y
+ yodd = isodd(yi) && yint
+ z = expk(dmul(logk(abs(x)), y))
+ z = isnan(z) ? T(Inf) : z
+ z *= (x >= 0 ? T(1) : (!yint ? T(NaN) : (yodd ? -T(1) : T(1))))
+ efx = flipsign(abs(x) - 1, y)
+ isinf(y) && (z = efx < 0 ? T(0) : (efx == 0 ? T(1) : T(Inf)))
+ if isinf(x) || x == 0
+ z = (yodd ? _sign(x) : T(1)) * ((x == 0 ? -y : y) < 0 ? T(0) : T(Inf))
+ end
+ (isnan(x) || isnan(y)) && (z = T(NaN))
+ (y == 0 || x == 1) && (z = T(1))
+ return z
+end
+
+
+
+let
+global cbrt_fast
+global cbrt
+
+const c6d = -0.640245898480692909870982
+const c5d = 2.96155103020039511818595
+const c4d = -5.73353060922947843636166
+const c3d = 6.03990368989458747961407
+const c2d = -3.85841935510444988821632
+const c1d = 2.2307275302496609725722
+
+const c6f = -0.601564466953277587890625f0
+const c5f = 2.8208892345428466796875f0
+const c4f = -5.532182216644287109375f0
+const c3f = 5.898262500762939453125f0
+const c2f = -3.8095417022705078125f0
+const c1f = 2.2241256237030029296875f0
+
+global @inline _cbrt(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d
+global @inline _cbrt(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f
+
+"""
+ cbrt_fast(x)
+
+Return `x^{1/3}`.
+"""
+function cbrt_fast{T<:IEEEFloat}(d::T)
+ e = ilog2k(d)
+ d = ldexpk(d,-e)
+ r = (e + 6144) % 3
+ q = r == 1 ? T(M2P13) : T(1)
+ q = r == 2 ? T(M2P23) : q
+ q = ldexpk(q, (e + 6144) ÷ 3 - 2048)
+ q = flipsign(q,d)
+ d = abs(d)
+ x =_cbrt(d)
+ y = x*x
+ y = y*y
+ x -= (d*y - x)*T(1/3)
+ y = d*x*x
+ y = (y - T(2/3)*y*(y*x - 1))*q
+end
+
+
+"""
+ cbrt(x)
+
+Return `x^{1/3}`. The prefix operator `∛` is equivalent to `cbrt`.
+"""
+function cbrt{T<:IEEEFloat}(d::T)
+ e = ilog2k(d)
+ d = ldexpk(d,-e)
+ r = (e + 6144) % 3
+ q2 = r == 1 ? MD2P13(T) : Double(T(1))
+ q2 = r == 2 ? MD2P23(T) : q2
+ q2 = flipsign(q2,d)
+ d = abs(d)
+ x =_cbrt(d)
+ y = x*x
+ y = y*y
+ x -= (d*y - x)*T(1/3)
+ z = x
+ u = dsqu(x)
+ u = dsqu(u)
+ u = dmul(u, d)
+ u = dsub(u,x)
+ y = T(u)
+ y = -T(2/3)*y*z
+ v = dadd(dsqu(z), y)
+ v = dmul(v,d)
+ v = dmul(v,q2)
+ z = ldexp(T(v), (e + 6144) ÷ 3 - 2048)
+ isinf(d) && (z = flipsign(T(Inf), q2.hi))
+ d == 0 && (z = flipsign(T(0), q2.hi))
+ return z
+end
+end
+
+
+
+"""
+ hypot(x,y)
+
+Compute the hypotenuse `\sqrt{x^2+y^2}` avoiding overflow and underflow.
+"""
+function hypot{T<:IEEEFloat}(x::T, y::T)
+ x = abs(x)
+ y = abs(y)
+ if x < y
+ x, y = y, x
+ end
+ r = (x == 0) ? y : y/x
+ x*sqrt(T(1) + r*r)
+end
diff --git a/src/priv.jl b/src/priv.jl
new file mode 100644
index 0000000..17fae2c
--- /dev/null
+++ b/src/priv.jl
@@ -0,0 +1,231 @@
+# private math functions
+
+"""
+A helper function for `ldexpk`
+
+First note that `r = (q >> n) << n` clears the lowest n bits of q, i.e. returns 2^n where n is the
+largest integer such that q >= 2^n
+
+For numbers q less than 2^m the following code does the same as the above snippet
+ `r = ( (q>>v + q) >> n - q>>v ) << n`
+For numbers larger than or equal to 2^v this subtracts 2^n from q for q>>n times.
+
+The function returns q(input) := q(output) + offset*r
+
+In the code for ldexpk we actually use
+ `m = ( (m>>n + m) >> n - m>>m ) << (n-2)`.
+So that x has to be multplied by u four times `x = x*u*u*u*u` to put the value of the offset
+exponent amount back in.
+"""
+@inline function _split_exponent(q, n, v, offset)
+ m = q >> v
+ m = (((m + q) >> n) - m) << (n-offset)
+ q = q - (m << offset)
+ m, q
+end
+@inline split_exponent(::Type{Float64}, q::Int) = _split_exponent(q, UInt(9), UInt(31), UInt(2))
+@inline split_exponent(::Type{Float32}, q::Int) = _split_exponent(q, UInt(6), UInt(31), UInt(2))
+
+"""
+ ldexpk(a::IEEEFloat, n::Int) -> IEEEFloat
+
+Computes `a × 2^n`.
+"""
+@inline function ldexpk{T<:IEEEFloat}(x::T, q::Int)
+ bias = exponent_bias(T)
+ emax = exponent_max(T)
+ m, q = split_exponent(T,q)
+ m += bias
+ m = ifelse(m < 0, 0, m)
+ m = ifelse(m > emax, emax, m)
+ q += bias
+ u = integer2float(T, m)
+ x = x*u*u*u*u
+ u = integer2float(T, q)
+ x*u
+end
+
+@inline ldexpk2{T<:IEEEFloat}(x::T, e::Int) = x * pow2i(T, e >> UInt(1)) * pow2i(T, e - (e >> UInt(1)))
+
+
+
+# The following define threshold values for `ilog2k`
+threshold_exponent(::Type{Float64}) = 300
+threshold_exponent(::Type{Float32}) = 64
+
+"""
+ ilog2k(x::IEEEFloat) -> Int
+
+Returns the integral part of the logarithm of `|x|`, using 2 as base for the logarithm; in other
+words this returns the binary exponent of `x` so that
+
+ x = significand × 2^exponent
+
+where `significand ∈ [0.5, 1)`.
+"""
+@inline function ilog2k{T<:IEEEFloat}(d::T)
+ m = d < T(2)^-threshold_exponent(T)
+ d = ifelse(m, d*T(2)^threshold_exponent(T), d)
+ q = float2integer(d) & exponent_max(T)
+ q = ifelse(m, q - (threshold_exponent(T) + exponent_bias(T) - 1), q - (exponent_bias(T) - 1)) # subtract 1 since we want 2^q
+end
+
+
+let
+const c20d = 1.06298484191448746607415e-05
+const c19d = -0.000125620649967286867384336
+const c18d = 0.00070557664296393412389774
+const c17d = -0.00251865614498713360352999
+const c16d = 0.00646262899036991172313504
+const c15d = -0.0128281333663399031014274
+const c14d = 0.0208024799924145797902497
+const c13d = -0.0289002344784740315686289
+const c12d = 0.0359785005035104590853656
+const c11d = -0.041848579703592507506027
+const c10d = 0.0470843011653283988193763
+const c9d = -0.0524914210588448421068719
+const c8d = 0.0587946590969581003860434
+const c7d = -0.0666620884778795497194182
+const c6d = 0.0769225330296203768654095
+const c5d = -0.0909090442773387574781907
+const c4d = 0.111111108376896236538123
+const c3d = -0.142857142756268568062339
+const c2d = 0.199999999997977351284817
+const c1d = -0.333333333333317605173818
+
+const c9f = -0.00176397908944636583328247f0
+const c8f = 0.0107900900766253471374512f0
+const c7f = -0.0309564601629972457885742f0
+const c6f = 0.0577365085482597351074219f0
+const c5f = -0.0838950723409652709960938f0
+const c4f = 0.109463557600975036621094f0
+const c3f = -0.142626821994781494140625f0
+const c2f = 0.199983194470405578613281f0
+const c1f = -0.333332866430282592773438f0
+
+global @inline _atan2k_fast(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d c17d c18d c19d c20d
+global @inline _atan2k_fast(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f c7f c8f c9f
+
+global @inline function atan2k_fast{T<:IEEEFloat}(y::T, x::T)
+ q = 0
+ if x < 0
+ x = -x
+ q = -2
+ end
+ if y > x
+ t = x; x = y
+ y = -t
+ q += 1
+ end
+ s = y/x
+ t = s*s
+ u =_atan2k_fast(t)
+ t = u*t*s + s
+ q*T(MPI2) + t
+end
+
+
+global @inline _atan2k(x::Double{Float64}) = @horner x.hi c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d c17d c18d c19d c20d
+global @inline _atan2k(x::Double{Float32}) = dadd(c1f, x.hi*(@horner x.hi c2f c3f c4f c5f c6f c7f c8f c9f))
+
+global @inline function atan2k{T<:IEEEFloat}(y::Double{T}, x::Double{T})
+ q = 0
+ if x < 0
+ x = -x
+ q = -2
+ end
+ if y > x
+ t = x; x = y
+ y = -t
+ q += 1
+ end
+ s = ddiv(y,x)
+ t = dsqu(s)
+ u =_atan2k(t)
+ t = dmul(s, dadd(T(1), dmul(t, u)))
+ dadd(dmul(T(q), MDPI2(T)), t)
+end
+end
+
+
+let
+const c10d = 2.51069683420950419527139e-08
+const c9d = 2.76286166770270649116855e-07
+const c8d = 2.75572496725023574143864e-06
+const c7d = 2.48014973989819794114153e-05
+const c6d = 0.000198412698809069797676111
+const c5d = 0.0013888888939977128960529
+const c4d = 0.00833333333332371417601081
+const c3d = 0.0416666666665409524128449
+const c2d = 0.166666666666666740681535
+const c1d = 0.500000000000000999200722
+
+const c5f = 0.00136324646882712841033936f0
+const c4f = 0.00836596917361021041870117f0
+const c3f = 0.0416710823774337768554688f0
+const c2f = 0.166665524244308471679688f0
+const c1f = 0.499999850988388061523438f0
+
+global @inline _expk(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d
+global @inline _expk(x::Float32) = @horner x c1f c2f c3f c4f c5f
+
+global @inline function expk{T<:IEEEFloat}(d::Double{T})
+ q = round(T(d)*T(MLN2E))
+ s = dadd(d, -q*LN2U(T))
+ s = dadd(s, -q*LN2L(T))
+ u =_expk(T(s))
+ t = dadd(s, dmul(dsqu(s), u))
+ t = dadd(T(1), t)
+ ldexpk(T(t), unsafe_trunc(Int,q))
+end
+
+
+global @inline function expk2{T<:IEEEFloat}(d::Double{T})
+ q = round(T(d)*T(MLN2E))
+ s = dadd(d, -q*LN2U(T))
+ s = dadd(s, -q*LN2L(T))
+ u =_expk(s.hi)
+ t = dadd(s, dmul(dsqu(s), u))
+ t = dadd(T(1), t)
+ scale(t, pow2i(T, unsafe_trunc(Int,q)))
+end
+end
+
+
+let
+const c8d = 0.134601987501262130076155
+const c7d = 0.132248509032032670243288
+const c6d = 0.153883458318096079652524
+const c5d = 0.181817427573705403298686
+const c4d = 0.222222231326187414840781
+const c3d = 0.285714285651261412873718
+const c2d = 0.400000000000222439910458
+const c1d = 0.666666666666666371239645
+
+const c4f = 0.2371599674224853515625f0
+const c3f = 0.285279005765914916992188f0
+const c2f = 0.400005519390106201171875f0
+const c1f = 0.666666567325592041015625f0
+
+global @inline _logk(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d
+global @inline _logk(x::Float32) = @horner x c1f c2f c3f c4f
+
+global @inline function logk{T<:IEEEFloat}(d::T)
+ e = ilog2k(d*T(M1SQRT2))
+ m = ldexpk(d,-e)
+ x = ddiv(dsub2(m, T(1)), dadd2(T(1), m))
+ x2 = dsqu(x)
+ t =_logk(x2.hi)
+ dadd(dmul(MDLN2(T), T(e)), dadd(scale(x, T(2)), dmul(dmul(x2, x), t)))
+end
+
+
+global @inline function logk2{T<:IEEEFloat}(d::Double{T})
+ e = ilog2k(d.hi*T(M1SQRT2))
+ m = scale(d, pow2i(T,-e))
+ x = ddiv(dsub2(m, T(1)), dadd2(m, T(1)))
+ x2 = dsqu(x)
+ t =_logk(x2.hi)
+ dadd(dmul(MDLN2(T), T(e)), dadd(scale(x, T(2)), dmul(dmul(x2, x), t)))
+end
+end
diff --git a/src/trig.jl b/src/trig.jl
new file mode 100644
index 0000000..0621572
--- /dev/null
+++ b/src/trig.jl
@@ -0,0 +1,493 @@
+# exported trigonometric functions
+
+"""
+ sin(x)
+
+Compute the sine of `x`, where the output is in radians.
+"""
+function sin end
+
+"""
+ cos(x)
+
+Compute the cosine of `x`, where the output is in radians.
+"""
+function cos end
+
+let
+global sin
+global cos
+
+const c8d = 2.72052416138529567917983e-15
+const c7d = -7.64292594113954471900203e-13
+const c6d = 1.60589370117277896211623e-10
+const c5d = -2.5052106814843123359368e-08
+const c4d = 2.75573192104428224777379e-06
+const c3d = -0.000198412698412046454654947
+const c2d = 0.00833333333333318056201922
+const c1d = -0.166666666666666657414808
+
+const c4f = 2.6083159809786593541503f-06
+const c3f = -0.0001981069071916863322258f0
+const c2f = 0.00833307858556509017944336f0
+const c1f = -0.166666597127914428710938f0
+
+global @inline _sincos(x::Double{Float64}) = dadd(c1d, x.hi*(@horner x.hi c2d c3d c4d c5d c6d c7d c8d))
+global @inline _sincos(x::Double{Float32}) = dadd(c1f, x.hi*(@horner x.hi c2f c3f c4f))
+
+function sin{T<:IEEEFloat}(x::T)
+ d = abs(x)
+ q = round(d*T(M1PI))
+ s = dsub2(d, q*PI4A(T)*4)
+ s = dsub2(s, q*PI4B(T)*4)
+ s = dsub2(s, q*PI4C(T)*4)
+ s = dsub2(s, q*PI4D(T)*4)
+ t = s
+ s = dsqu(s)
+ w =_sincos(s)
+ v = dmul(t, dadd(T(1), dmul(w,s)))
+ u = T(v)
+ qi = unsafe_trunc(Int,q)
+ qi & 1 != 0 && (u = -u)
+ return flipsign(u,x)
+end
+
+function cos{T<:IEEEFloat}(x::T)
+ x = abs(x)
+ q = muladd(T(2), round(x*T(M1PI) - T(0.5)), T(1))
+ s = dsub2(x, q*PI4A(T)*2)
+ s = dsub2(s, q*PI4B(T)*2)
+ s = dsub2(s, q*PI4C(T)*2)
+ s = dsub2(s, q*PI4D(T)*2)
+ t = s
+ s = dsqu(s)
+ w =_sincos(s)
+ v = dmul(t, dadd(T(1), dmul(w,s)))
+ u = T(v)
+ qi = unsafe_trunc(Int,q)
+ qi & 2 == 0 && (u = -u)
+ return u
+end
+end
+
+
+"""
+ sin_fast(x)
+
+Compute the sine of `x`, where the output is in radians.
+"""
+function sin_fast end
+
+"""
+ cos_fast(x)
+
+Compute the cosine of `x`, where the output is in radians.
+"""
+function cos_fast end
+
+let
+global sin_fast
+global cos_fast
+
+const c9d = -7.97255955009037868891952e-18
+const c8d = 2.81009972710863200091251e-15
+const c7d = -7.64712219118158833288484e-13
+const c6d = 1.60590430605664501629054e-10
+const c5d = -2.50521083763502045810755e-08
+const c4d = 2.75573192239198747630416e-06
+const c3d = -0.000198412698412696162806809
+const c2d = 0.00833333333333332974823815
+const c1d = -0.166666666666666657414808
+
+# c5f is 0f0 to handle Inf32 case, Float64 doesn't need this since it comes
+# out properly (add another neg constant and remove this zero constant)
+const c5f = 0f0
+const c4f = 2.6083159809786593541503f-06
+const c3f = -0.0001981069071916863322258f0
+const c2f = 0.00833307858556509017944336f0
+const c1f = -0.166666597127914428710938f0
+
+# Argument is first reduced to the domain 0 < s < π/4
+
+# We return the correct sign using `q & 1 != 0` i.e. q is odd (this works for
+# positive and negative q) and if this condition is true we flip the sign since
+# we are now in the negative branch of sin(x). Recall that q is just the integer
+# part of d/π and thus we can determine the correct sign using this information.
+
+global @inline _sincos_fast(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d
+global @inline _sincos_fast(x::Float32) = @horner x c1f c2f c3f c4f c5f
+
+function sin_fast{T<:IEEEFloat}(x::T)
+ d = abs(x)
+ q = roundi(d*T(M1PI))
+ d = muladd(q, -PI4A(T)*4, d)
+ d = muladd(q, -PI4B(T)*4, d)
+ d = muladd(q, -PI4C(T)*4, d)
+ d = muladd(q, -PI4D(T)*4, d)
+ s = d*d
+ q & 1 != 0 && (d = -d)
+ u =_sincos_fast(s)
+ u = muladd(s, u*d, d)
+ flipsign(u,x)
+end
+
+function cos_fast{T<:IEEEFloat}(x::T)
+ q = muladd(2, roundi(x*T(M1PI)-T(0.5)), 1)
+ x = muladd(q, -PI4A(T)*2, x)
+ x = muladd(q, -PI4B(T)*2, x)
+ x = muladd(q, -PI4C(T)*2, x)
+ x = muladd(q, -PI4D(T)*2, x)
+ s = x*x
+ q & 2 == 0 && (x = -x)
+ u =_sincos_fast(s)
+ muladd(s, u*x, x)
+end
+end
+
+
+
+"""
+ sincos(x)
+
+Compute the sin and cosine of `x` simultaneously, where the output is in
+radians, returning a tuple.
+"""
+function sincos end
+
+"""
+ sincos_fast(x)
+
+Compute the sin and cosine of `x` simultaneously, where the output is in
+radians, returning a tuple.
+"""
+function sincos_fast end
+
+let
+global sincos
+global sincos_fast
+
+const a6d = 1.58938307283228937328511e-10
+const a5d = -2.50506943502539773349318e-08
+const a4d = 2.75573131776846360512547e-06
+const a3d = -0.000198412698278911770864914
+const a2d = 0.0083333333333191845961746
+const a1d = -0.166666666666666130709393
+
+const a3f = -0.000195169282960705459117889f0
+const a2f = 0.00833215750753879547119141f0
+const a1f = -0.166666537523269653320312f0
+
+const b7d = -1.13615350239097429531523e-11
+const b6d = 2.08757471207040055479366e-09
+const b5d = -2.75573144028847567498567e-07
+const b4d = 2.48015872890001867311915e-05
+const b3d = -0.00138888888888714019282329
+const b2d = 0.0416666666666665519592062
+const b1d = -0.50
+
+const b5f = -2.71811842367242206819355f-07
+const b4f = 2.47990446951007470488548f-05
+const b3f = -0.00138888787478208541870117f0
+const b2f = 0.0416666641831398010253906f0
+const b1f = -0.5f0
+
+global @inline _sincos_a(x::Float64) = @horner x a1d a2d a3d a4d a5d a6d
+global @inline _sincos_a(x::Float32) = @horner x a1f a2f a3f
+global @inline _sincos_b(x::Float64) = @horner x b1d b2d b3d b4d b5d b6d b7d
+global @inline _sincos_b(x::Float32) = @horner x b1f b2f b3f b4f b5f
+
+function sincos_fast{T<:IEEEFloat}(x::T)
+ d = abs(x)
+ q = roundi(d*T(M2PI))
+ s = d
+ s = muladd(q, -PI4A(T)*2, s)
+ s = muladd(q, -PI4B(T)*2, s)
+ s = muladd(q, -PI4C(T)*2, s)
+ s = muladd(q, -PI4D(T)*2, s)
+ t = s
+ s = s*s
+ u =_sincos_a(s)
+ u = u * s * t
+ rx = t + u
+ u =_sincos_b(s)
+ ry = u * s + T(1)
+ q & 1 != 0 && (s = ry; ry = rx; rx = s)
+ q & 2 != 0 && (rx = -rx)
+ (q+1) & 2 != 0 && (ry = -ry)
+ isinf(d) && (rx = ry = T(NaN))
+ convert(Tuple{T,T}, Double(flipsign(rx,x), ry))
+end
+
+function sincos{T<:IEEEFloat}(x::T)
+ d = abs(x)
+ q = roundi(d*2*T(M1PI))
+ s = dsub2(d, q*PI4A(T)*2)
+ s = dsub2(s, q*PI4B(T)*2)
+ s = dsub2(s, q*PI4C(T)*2)
+ s = dsub2(s, q*PI4D(T)*2)
+ t = s
+ s = dsqu(s)
+ sx = T(s)
+ u =_sincos_a(sx)
+ u *= sx * t.hi
+ v = dadd(t, u)
+ rx = T(v)
+ u =_sincos_b(sx)
+ v = dadd(T(1), dmul(sx, u))
+ ry = T(v)
+ q & 1 != 0 && (u = ry; ry = rx; rx = u)
+ q & 2 != 0 && (rx = -rx)
+ (q+1) & 2 != 0 && (ry = -ry)
+ isinf(d) && (rx = ry = T(NaN))
+ convert(Tuple{T,T}, Double(flipsign(rx, x), ry))
+end
+end
+
+
+"""
+ tan(x)
+
+Compute the tangent of `x`, where the output is in radians.
+"""
+function tan end
+
+"""
+ tan_fast(x)
+
+Compute the tangent of `x`, where the output is in radians.
+"""
+function tan_fast end
+
+let
+global tan
+global tan_fast
+
+const c15d = 1.01419718511083373224408e-05
+const c14d = -2.59519791585924697698614e-05
+const c13d = 5.23388081915899855325186e-05
+const c12d = -3.05033014433946488225616e-05
+const c11d = 7.14707504084242744267497e-05
+const c10d = 8.09674518280159187045078e-05
+const c9d = 0.000244884931879331847054404
+const c8d = 0.000588505168743587154904506
+const c7d = 0.001456127889228124279788480
+const c6d = 0.003592087438369066191429240
+const c5d = 0.008863239443624016181133560
+const c4d = 0.021869488285384638959207800
+const c3d = 0.053968253978129841763600200
+const c2d = 0.133333333333125941821962000
+const c1d = 0.333333333333334980164153000
+
+const c7f = 0.00446636462584137916564941f0
+const c6f = -8.3920182078145444393158f-05
+const c5f = 0.0109639242291450500488281f0
+const c4f = 0.0212360303848981857299805f0
+const c3f = 0.0540687143802642822265625f0
+const c2f = 0.133325666189193725585938f0
+const c1f = 0.33333361148834228515625f0
+
+global @inline _tan_fast(x::Float64) = @horner_split x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d
+global @inline _tan_fast(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f c7f
+
+function tan_fast{T<:IEEEFloat}(d::T)
+ w = abs(d)
+ q = roundi(w*T(M2PI))
+ x = muladd(q, -PI4A(T)*2, w)
+ x = muladd(q, -PI4B(T)*2, x)
+ x = muladd(q, -PI4C(T)*2, x)
+ x = muladd(q, -PI4D(T)*2, x)
+ q & 1 != 0 && (x = -x)
+ s = x*x
+ u =_tan_fast(s)
+ u = muladd(s, u*x, x)
+ q & 1 != 0 && (u = 1/u)
+ isinf(w) && (u = T(NaN))
+ return flipsign(u,d)
+end
+
+global @inline _tan(x::Double{Float64}) = dadd(c1d, x.hi*(@horner_split x.hi c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d))
+global @inline _tan(x::Double{Float32}) = dadd(c1f, dmul(x, @horner x.hi c2f c3f c4f c5f c6f c7f))
+# global @inline _tan(x::Double{Float32}) = dadd(c1f, dmul(x.hi, dadd(c2f, x.hi*(@horner x.hi c3f c4f c5f c6f c7f))))
+
+function tan{T<:IEEEFloat}(d::T)
+ w = abs(d)
+ q = round(w*T(M2PI))
+ x = dsub2(w, q*PI4A(T)*2)
+ x = dsub2(x, q*PI4B(T)*2)
+ x = dsub2(x, q*PI4C(T)*2)
+ x = dsub2(x, q*PI4D(T)*2)
+ qi = unsafe_trunc(Int,q)
+ qi & 1 != 0 && (x = -x)
+ s = dsqu(x)
+ u =_tan(s)
+ u = dmul(x, dadd(T(1), dmul(u, s)))
+ qi & 1 != 0 && (u = ddrec(u))
+ return flipsign(T(u),d)
+end
+end
+
+
+
+"""
+ atan(x)
+
+Compute the inverse tangent of `x`, where the output is in radians.
+"""
+function atan{T<:IEEEFloat}(x::T)
+ u = T(atan2k(Double(abs(x)), Double(T(1))))
+ isinf(x) && (u = T(MPI2))
+ flipsign(u,x)
+end
+
+
+"""
+ atan_fast(x)
+
+Compute the inverse tangent of `x`, where the output is in radians.
+"""
+function atan_fast end
+let
+global atan_fast
+
+const c19d = -1.88796008463073496563746e-05
+const c18d = 0.000209850076645816976906797
+const c17d = -0.00110611831486672482563471
+const c16d = 0.00370026744188713119232403
+const c15d = -0.00889896195887655491740809
+const c14d = 0.016599329773529201970117
+const c13d = -0.0254517624932312641616861
+const c12d = 0.0337852580001353069993897
+const c11d = -0.0407629191276836500001934
+const c10d = 0.0466667150077840625632675
+const c9d = -0.0523674852303482457616113
+const c8d = 0.0587666392926673580854313
+const c7d = -0.0666573579361080525984562
+const c6d = 0.0769219538311769618355029
+const c5d = -0.090908995008245008229153
+const c4d = 0.111111105648261418443745
+const c3d = -0.14285714266771329383765
+const c2d = 0.199999999996591265594148
+const c1d = -0.333333333333311110369124
+
+const c8f = 0.00282363896258175373077393f0
+const c7f = -0.0159569028764963150024414f0
+const c6f = 0.0425049886107444763183594f0
+const c5f = -0.0748900920152664184570312f0
+const c4f = 0.106347933411598205566406f0
+const c3f = -0.142027363181114196777344f0
+const c2f = 0.199926957488059997558594f0
+const c1f = -0.333331018686294555664062f0
+
+global @inline _atan_fast(x::Float64) = @horner_split x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d c17d c18d c19d
+global @inline _atan_fast(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f c7f c8f
+
+function atan_fast{T<:IEEEFloat}(x::T)
+ q = 0
+ if x < 0
+ x = -x
+ q = 2
+ end
+ if x > 1
+ x = 1/x
+ q |= 1
+ end
+ t = x*x
+ u =_atan_fast(t)
+ t = x + x*t*u
+ q & 1 != 0 && (t = T(MPI2) - t)
+ q & 2 != 0 && (t = -t)
+ return t
+end
+end
+
+
+
+"""
+ atan2(x, y)
+
+Compute the inverse tangent of `x/y`, using the signs of both `x` and `y` to determine the quadrant of the return value.
+"""
+function atan2{T<:IEEEFloat}(x::T, y::T)
+ r = T(atan2k(Double(abs(x)), Double(y)))
+ r = flipsign(r,y)
+ if isinf(y) || y == 0
+ r = T(MPI2) - (isinf(y) ? _sign(y)*T(MPI2) : T(0))
+ end
+ if isinf(x)
+ r = T(MPI2) - (isinf(y) ? _sign(y)*T(MPI4) : T(0))
+ end
+ if x == 0
+ r = _sign(y) == -1 ? T(MPI) : T(0)
+ end
+ return isnan(y) || isnan(x) ? T(NaN) : flipsign(r,x)
+end
+
+
+"""
+ atan2_fast(x, y)
+
+Compute the inverse tangent of `x/y`, using the signs of both `x` and `y` to determine the quadrant of the return value.
+"""
+function atan2_fast{T<:IEEEFloat}(x::T, y::T)
+ r = atan2k_fast(abs(x), y)
+ r = flipsign(r,y)
+ if isinf(y) || y == 0
+ r = T(MPI2) - (isinf(y) ? _sign(y)*T(MPI2) : T(0))
+ end
+ if isinf(x)
+ r = T(MPI2) - (isinf(y) ? _sign(y)*T(MPI4) : T(0))
+ end
+ if x == 0
+ r = _sign(y) == -1 ? T(MPI) : T(0)
+ end
+ return isnan(y) || isnan(x) ? T(NaN) : flipsign(r,x)
+end
+
+
+
+"""
+ asin(x)
+
+Compute the inverse sine of `x`, where the output is in radians.
+"""
+function asin{T<:IEEEFloat}(x::T)
+ d = atan2k(Double(abs(x)), dsqrt(dmul(dadd(T(1), x), dsub(T(1), x))))
+ u = T(d)
+ abs(x) == 1 && (u = T(MPI2))
+ flipsign(u,x)
+end
+
+
+"""
+ asin_fast(x)
+
+Compute the inverse sine of `x`, where the output is in radians.
+"""
+function asin_fast{T<:IEEEFloat}(x::T)
+ flipsign(atan2k_fast(abs(x), _sqrt((1+x)*(1-x))), x)
+end
+
+
+
+"""
+ acos(x)
+
+Compute the inverse cosine of `x`, where the output is in radians.
+"""
+function acos{T<:IEEEFloat}(x::T)
+ d = atan2k(dsqrt(dmul(dadd(T(1), x), dsub(T(1), x))), Double(abs(x)))
+ d = flipsign(d,x)
+ abs(x) == 1 && (d = Double(T(0)))
+ x < 0 && (d = dadd(MDPI(T), d))
+ return T(d)
+end
+
+
+"""
+ acos_fast(x)
+
+Compute the inverse cosine of `x`, where the output is in radians.
+"""
+function acos_fast{T<:IEEEFloat}(x::T)
+ flipsign(atan2k_fast(_sqrt((1+x)*(1-x)), abs(x)), x) + (x < 0 ? T(MPI) : T(0))
+end
diff --git a/src/utils.jl b/src/utils.jl
new file mode 100644
index 0000000..9f76db7
--- /dev/null
+++ b/src/utils.jl
@@ -0,0 +1,63 @@
+## utility functions mainly used by the private math functions in priv.jl
+
+function is_fma_fast end
+for T in (Float32, Float64)
+ @eval is_fma_fast(::Type{$T}) = $(muladd(nextfloat(one(T)),nextfloat(one(T)),-nextfloat(one(T),2)) != zero(T))
+end
+const FMA_FAST = is_fma_fast(Float64) && is_fma_fast(Float32)
+
+
+@inline exponent_max{T<:IEEEFloat}(::Type{T}) = Int(exponent_mask(T) >> significand_bits(T))
+
+# _sign emits better native code than sign but does not properly handle the Inf/NaN cases
+@inline _sign{T<:IEEEFloat}(d::T) = flipsign(one(T), d)
+
+@inline roundi{T<:IEEEFloat}(x::T) = unsafe_trunc(Int, round(x))
+
+@inline integer2float(::Type{Float64}, m::Int) = reinterpret(Float64, (m % Int64) << significand_bits(Float64))
+@inline integer2float(::Type{Float32}, m::Int) = reinterpret(Float32, (m % Int32) << significand_bits(Float32))
+
+@inline float2integer(d::Float64) = (reinterpret(Int64, d) >> significand_bits(Float64)) % Int
+@inline float2integer(d::Float32) = (reinterpret(Int32, d) >> significand_bits(Float32)) % Int
+
+@inline pow2i{T<:IEEEFloat}(::Type{T}, q::Int) = integer2float(T, q + exponent_bias(T))
+
+# sqrt without the domain checks which we don't need since we handle the checks ourselves
+_sqrt{T<:IEEEFloat}(x::T) = Base.sqrt_llvm_fast(x)
+
+@inline ispinf{T<:IEEEFloat}(x::T) = x == T(Inf)
+@inline isninf{T<:IEEEFloat}(x::T) = x == -T(Inf)
+
+# Similar to @horner, but split into even and odd coefficients. This is typically less
+# accurate, but faster due to out of order execution.
+macro horner_split(x,p...)
+ t1 = gensym("x1")
+ t2 = gensym("x2")
+ blk = quote
+ $t1 = $(esc(x))
+ $t2 = $(esc(x)) * $(esc(x))
+ end
+ n = length(p)
+ p0 = esc(p[1])
+ if isodd(n)
+ ex_o = esc(p[end-1])
+ ex_e = esc(p[end])
+ for i = n-3:-2:2
+ ex_o = :(muladd($(t2), $ex_o, $(esc(p[i]))))
+ end
+ for i = n-2:-2:2
+ ex_e = :(muladd($(t2), $ex_e, $(esc(p[i]))))
+ end
+ elseif iseven(n)
+ ex_o = esc(p[end])
+ ex_e = esc(p[end-1])
+ for i = n-2:-2:2
+ ex_o = :(muladd($(t2), $ex_o, $(esc(p[i]))))
+ end
+ for i = n-3:-2:2
+ ex_e = :(muladd($(t2), $ex_e, $(esc(p[i]))))
+ end
+ end
+ push!(blk.args,:($(p0) + $(t1)*$(ex_o) + $(t2)*$(ex_e)))
+ blk
+end
diff --git a/test/accuracy.jl b/test/accuracy.jl
new file mode 100644
index 0000000..c7e335b
--- /dev/null
+++ b/test/accuracy.jl
@@ -0,0 +1,177 @@
+
+MRANGE(::Type{Float64}) = 10000000
+MRANGE(::Type{Float32}) = 10000
+IntF(::Type{Float64}) = Int64
+IntF(::Type{Float32}) = Int32
+
+
+@testset "Accuracy (max error in ulp) for $T" for T in (Float64, Float32)
+ println("Accuracy tests for $T")
+
+
+ xx = map(T, vcat(-10:0.0002:10, -1000:0.1:1000))
+ fun_table = Dict(Sleef.exp => Base.exp)
+ tol = 1
+ test_acc(T, fun_table, xx, tol)
+
+
+
+ # xx = map(T, vcat(-10:0.0002:10, -1000:0.02:1000))
+ # fun_table = Dict(Sleef.asinh => Base.asinh, Sleef.atanh => Base.atanh)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+
+ # xx = map(T, vcat(1:0.0002:10, 1:0.02:1000))
+ # fun_table = Dict(Sleef.acosh => Base.acosh)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+
+ # xx = T[]
+ # for i = 1:10000
+ # s = reinterpret(T, reinterpret(IntF(T), T(pi)/4 * i) - IntF(T)(20))
+ # e = reinterpret(T, reinterpret(IntF(T), T(pi)/4 * i) + IntF(T)(20))
+ # d = s
+ # while d <= e
+ # append!(xx, d)
+ # d = reinterpret(T, reinterpret(IntF(T), d) + IntF(T)(1))
+ # end
+ # end
+ # xx = append!(xx, -10:0.0002:10)
+ # xx = append!(xx, -MRANGE(T):200.1:MRANGE(T))
+
+ # fun_table = Dict(Sleef.sin => Base.sin, Sleef.cos => Base.cos, Sleef.tan => Base.tan)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+ # fun_table = Dict(Sleef.sin_fast => Base.sin, Sleef.cos_fast => Base.cos, Sleef.tan_fast => Base.tan)
+ # tol = 4
+ # test_acc(T, fun_table, xx, tol)
+
+
+ # sin_sincos_fast(x) = Sleef.sincos_fast(x)[1]
+ # cos_sincos_fast(x) = Sleef.sincos_fast(x)[2]
+ # fun_table = Dict(sin_sincos_fast => Base.sin, cos_sincos_fast => Base.cos)
+ # tol = 4
+ # test_acc(T, fun_table, xx, tol)
+
+ # sin_sincos(x) = Sleef.sincos(x)[1]
+ # cos_sincos(x) = Sleef.sincos(x)[2]
+ # fun_table = Dict(sin_sincos => Base.sin, cos_sincos => Base.cos)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+
+ # xx = map(T, vcat(-1:0.00002:1))
+ # fun_table = Dict(Sleef.asin_fast => Base.asin, Sleef.acos_fast => Base.acos)
+ # tol = 3
+ # test_acc(T, fun_table, xx, tol)
+
+ # fun_table = Dict(Sleef.asin => asin, Sleef.acos => Base.acos)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+
+ # xx = map(T, vcat(-10:0.0002:10, -10000:0.2:10000, -10000:0.201:10000))
+ # fun_table = Dict(Sleef.atan_fast => Base.atan)
+ # tol = 3
+ # test_acc(T, fun_table, xx, tol)
+
+ # fun_table = Dict(Sleef.atan => Base.atan)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+
+ # xx1 = map(Tuple{T,T}, [zip(-10:0.050:10, -10:0.050:10)...])
+ # xx2 = map(Tuple{T,T}, [zip(-10:0.051:10, -10:0.052:10)...])
+ # xx3 = map(Tuple{T,T}, [zip(-100:0.51:100, -100:0.51:100)...])
+ # xx4 = map(Tuple{T,T}, [zip(-100:0.51:100, -100:0.52:100)...])
+ # xx = vcat(xx1, xx2, xx3, xx4)
+
+ # fun_table = Dict(Sleef.atan2_fast => Base.atan2)
+ # tol = 2.5
+ # test_acc(T, fun_table, xx, tol)
+
+ # fun_table = Dict(Sleef.atan2 => Base.atan2)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+
+ # xx = map(T, vcat(0.0001:0.0001:10, 0.001:0.1:10000, 1.1.^(-1000:1000), 2.1.^(-1000:1000)))
+ # fun_table = Dict(Sleef.log_fast => Base.log)
+ # tol = 3
+ # test_acc(T, fun_table, xx, tol)
+
+ # fun_table = Dict(Sleef.log => Base.log)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+
+ # xx = map(T, vcat(0.0001:0.0001:10, 0.0001:0.1:10000))
+ # fun_table = Dict(Sleef.log10 => Base.log10, Sleef.log2 => Base.log2)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+
+ # xx = map(T, vcat(0.0001:0.0001:10, 0.0001:0.1:10000, 10.0.^-(0:0.02:300), -10.0.^-(0:0.02:300)))
+ # fun_table = Dict(Sleef.log1p => Base.log1p)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+
+ # xx1 = map(Tuple{T,T}, [(x,y) for x = -100:0.20:100, y = 0.1:0.20:100])[:]
+ # xx2 = map(Tuple{T,T}, [(x,y) for x = -100:0.21:100, y = 0.1:0.22:100])[:]
+ # xx3 = map(Tuple{T,T}, [(x,y) for x = 2.1, y = -1000:0.1:1000])
+ # xx = vcat(xx1, xx2, xx2)
+ # fun_table = Dict(Sleef.pow => Base.:^)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+
+ # xx = map(T, vcat(-10000:0.2:10000, 1.1.^(-1000:1000), 2.1.^(-1000:1000)))
+ # fun_table = Dict(Sleef.cbrt_fast => Base.cbrt)
+ # tol = 2
+ # test_acc(T, fun_table, xx, tol)
+
+ # fun_table = Dict(Sleef.cbrt => Base.cbrt)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+
+
+ # xx = map(T, vcat(-10:0.0002:10, -120:0.023:1000, -1000:0.02:2000))
+ # fun_table = Dict(Sleef.exp2 => Base.exp2)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+
+ # xx = map(T, vcat(-10:0.0002:10, -35:0.023:1000, -300:0.01:300))
+ # fun_table = Dict(Sleef.exp10 => Base.exp10)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+
+ # xx = map(T, vcat(-10:0.0002:10, -1000:0.021:1000, -1000:0.023:1000,
+ # 10.0.^-(0:0.02:300), -10.0.^-(0:0.02:300), 10.0.^(0:0.021:300), -10.0.^-(0:0.021:300)))
+ # fun_table = Dict(Sleef.expm1 => Base.expm1)
+ # tol = 2
+ # test_acc(T, fun_table, xx, tol)
+
+
+ # xx = map(T, vcat(-10:0.0002:10, -1000:0.02:1000))
+ # fun_table = Dict(Sleef.sinh => Base.sinh, Sleef.cosh => Base.cosh, Sleef.tanh => Base.tanh)
+ # tol = 1
+ # test_acc(T, fun_table, xx, tol)
+
+
+
+ @testset "xilog2 at arbitrary values" begin
+ xd = Dict{T,Int}(T(1e-30) => -100, T(2.31e-11) => -36, T(-1.0) => 0, T(1.0) => 0,
+ T(2.31e11) => 37, T(1e30) => 99)
+ for (i,j) in xd
+ @test Sleef.ilog2(i) === j
+ end
+ end
+
+end
diff --git a/test/dnml_nan.jl b/test/dnml_nan.jl
new file mode 100644
index 0000000..4dd1a24
--- /dev/null
+++ b/test/dnml_nan.jl
@@ -0,0 +1,437 @@
+@testset "denormal/nonnumber $T" for T in (Float32, Float64)
+
+@testset "denormal/nonnumber $xatan2" for xatan2 in (Sleef.atan2_fast, Sleef.atan2)
+
+ @test xatan2(T(0.0), T(-0.0)) === T(pi)
+ @test xatan2(T(-0.0), T(-0.0)) === -T(pi)
+ @test ispzero(xatan2(T(0.0), T(0.0)))
+ @test isnzero(xatan2(T(-0.0), T(0.0)))
+ @test xatan2( T(Inf), -T(Inf)) === T(3*pi/4)
+ @test xatan2(-T(Inf), -T(Inf)) === T(-3*pi/4)
+ @test xatan2( T(Inf), T(Inf)) === T(pi/4)
+ @test xatan2(-T(Inf), T(Inf)) === T(-pi/4)
+
+
+ y = T(0.0)
+ xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5]
+ for x in xa
+ @test xatan2(y,x) === T(pi)
+ end
+
+
+ y = T(-0.0)
+ xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5]
+ for x in xa
+ @test xatan2(y,x) === T(-pi)
+ end
+
+
+ ya = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5]
+ xa = T[T(0.0), T(-0.0)]
+ for x in xa, y in ya
+ @test xatan2(y,x) === T(-pi/2)
+ end
+
+
+ ya = T[100000.5, 100000, 3, 2.5, 2, 1.5, 1.0, 0.5]
+ xa = T[T(0.0), T(-0.0)]
+ for x in xa, y in ya
+ @test xatan2(y,x) === T(pi/2)
+ end
+
+
+ y = T(Inf)
+ xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5]
+ for x in xa
+ @test xatan2(y,x) === T(pi/2)
+ end
+
+
+ y = T(-Inf)
+ xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5]
+ for x in xa
+ @test xatan2(y,x) === T(-pi/2)
+ end
+
+
+ ya = T[0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5]
+ x = T(Inf)
+ for y in ya
+ @test ispzero(xatan2(y,x))
+ end
+
+
+ ya = T[-0.5, -1.5, -2.0, -2.5, -3.0, -100000, -100000.5]
+ x = T(Inf)
+ for y in ya
+ @test isnzero(xatan2(y,x))
+ end
+
+
+ ya = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, NaN]
+ x = T(NaN)
+ for y in ya
+ @test isnan(xatan2(y,x))
+ end
+
+
+ y = T(NaN)
+ xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, NaN]
+ for x in xa
+ @test isnan(xatan2(y,x))
+ end
+
+end # denormal/nonumber atan2
+
+
+@testset "denormal/nonnumber xpow" begin
+
+ @test Sleef.pow(T(1), T(NaN)) === T(1)
+ @test Sleef.pow( T(NaN), T(0)) === T(1)
+ @test Sleef.pow(-T(1), T(Inf)) === T(1)
+ @test Sleef.pow(-T(1), T(-Inf)) === T(1)
+
+
+ xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5]
+ ya = T[-100000.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 100000.5]
+ for x in xa, y in ya
+ @test isnan(Sleef.pow(x,y))
+ end
+
+
+ x = T(NaN)
+ ya = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5]
+ for y in ya
+ @test isnan(Sleef.pow(x,y))
+ end
+
+
+ xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5]
+ y = T(NaN)
+ for x in xa
+ @test isnan(Sleef.pow(x,y))
+ end
+
+
+ x = T(0.0)
+ ya = T[1, 3, 5, 7, 100001]
+ for y in ya
+ @test ispzero(Sleef.pow(x,y))
+ end
+
+
+ x = T(-0.0)
+ ya = T[1, 3, 5, 7, 100001]
+ for y in ya
+ @test isnzero(Sleef.pow(x,y))
+ end
+
+
+ xa = T[0.0, -0.0]
+ ya = T[0.5, 1.5, 2.0, 2.5, 4.0, 100000, 100000.5]
+ for x in xa, y in ya
+ @test ispzero(Sleef.pow(x,y))
+ end
+
+
+ xa = T[-0.999, -0.5, -0.0, +0.0, +0.5, +0.999]
+ y = T(-Inf)
+ for x in xa
+ @test Sleef.pow(x,y) === T(Inf)
+ end
+
+
+ xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5]
+ y = T(-Inf)
+ for x in xa
+ @test ispzero(Sleef.pow(x,y))
+ end
+
+
+ xa = T[-0.999, -0.5, -0.0, +0.0, +0.5, +0.999]
+ y = T(Inf)
+ for x in xa
+ @test ispzero(Sleef.pow(x,y))
+ end
+
+
+ xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5]
+ y = T(Inf)
+ for x in xa
+ @test Sleef.pow(x,y) === T(Inf)
+ end
+
+
+ x = T(-Inf)
+ ya = T[-100001, -5, -3, -1]
+ for y in ya
+ @test isnzero(Sleef.pow(x,y))
+ end
+
+
+ x = T(-Inf)
+ ya = T[-100000.5, -100000, -4, -2.5, -2, -1.5, -0.5]
+ for y in ya
+ @test ispzero(Sleef.pow(x,y))
+ end
+
+
+ x = T(-Inf)
+ ya = T[1, 3, 5, 7, 100001]
+ for y in ya
+ @test Sleef.pow(x,y) === T(-Inf)
+ end
+
+
+ x = T(-Inf)
+ ya = T[0.5, 1.5, 2, 2.5, 3.5, 4, 100000, 100000.5]
+ for y in ya
+ @test Sleef.pow(x,y) === T(Inf)
+ end
+
+
+ x = T(Inf)
+ ya = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5]
+ for y in ya
+ @test ispzero(Sleef.pow(x,y))
+ end
+
+
+ x = T(Inf)
+ ya = T[0.5, 1, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5]
+ for y in ya
+ @test Sleef.pow(x,y) === T(Inf)
+ end
+
+
+ x = T(0.0)
+ ya = T[-100001, -5, -3, -1]
+ for y in ya
+ @test Sleef.pow(x,y) === T(Inf)
+ end
+
+
+ x = T(-0.0)
+ ya = T[-100001, -5, -3, -1]
+ for y in ya
+ @test Sleef.pow(x,y) === T(-Inf)
+ end
+
+
+ xa = T[0.0, -0.0]
+ ya = T[-100000.5, -100000, -4, -2.5, -2, -1.5, -0.5]
+ for x in xa, y in ya
+ @test Sleef.pow(x,y) === T(Inf)
+ end
+
+
+ xa = T[1000, -1000]
+ ya = T[1000, 1000.5, 1001]
+ for x in xa, y in ya
+ @test cmpdenorm(Sleef.pow(x,y), Base.:^(BigFloat(x),BigFloat(y)))
+ end
+
+end # denormal/nonumber pow
+
+
+fun_table = Dict(Sleef.sin_fast => Base.sin, Sleef.sin => Base.sin)
+@testset "denormal/nonnumber $xtrig" for (xtrig, trig) in fun_table
+ xa = T[NaN, -0.0, 0.0, Inf, -Inf]
+ for x in xa
+ @test cmpdenorm(xtrig(x), trig(BigFloat(x)))
+ end
+end
+
+
+fun_table = Dict(Sleef.cos_fast => Base.cos, Sleef.cos => Base.cos)
+@testset "denormal/nonnumber $xtrig" for (xtrig, trig) in fun_table
+ xa = T[NaN, Inf, -Inf]
+ for x in xa
+ @test cmpdenorm(xtrig(x), trig(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber sin in $xsincos"for xsincos in (Sleef.sincos_fast, Sleef.sincos)
+ xa = T[NaN, -0.0, 0.0, Inf, -Inf]
+ for x in xa
+ q = xsincos(x)[1]
+ @test cmpdenorm(q, Base.sin(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber cos in $xsincos"for xsincos in (Sleef.sincos_fast, Sleef.sincos)
+ xa = T[NaN, Inf, -Inf]
+ for x in xa
+ q = xsincos(x)[2]
+ @test cmpdenorm(q, Base.cos(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber $xtan" for xtan in (Sleef.tan_fast, Sleef.tan)
+ xa = T[NaN, Inf, -Inf, -0.0, 0.0, pi/2, -pi/2]
+ for x in xa
+ @test cmpdenorm(xtan(x), Base.tan(BigFloat(x)))
+ end
+end
+
+
+fun_table = Dict(Sleef.asin => Base.asin, Sleef.asin_fast => Base.asin, Sleef.acos => Base.acos, Sleef.acos_fast => Base.acos)
+@testset "denormal/nonnumber $xatrig" for (xatrig, atrig) in fun_table
+ xa = T[NaN, Inf, -Inf, 2, -2, 1, -1]
+ for x in xa
+ @test cmpdenorm(xatrig(x), atrig(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber $xatan" for xatan in (Sleef.atan, Sleef.atan_fast)
+ xa = T[NaN, Inf, -Inf]
+ for x in xa
+ @test cmpdenorm(xatan(x), Base.atan(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber exp" begin
+ xa = T[NaN, Inf, -Inf, 10000, -10000]
+ for x in xa
+ @test cmpdenorm(Sleef.exp(x), Base.exp(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber sinh" begin
+ xa = T[NaN, 0.0, -0.0, Inf, -Inf, 10000, -10000]
+ for x in xa
+ @test cmpdenorm(Sleef.sinh(x), Base.sinh(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber cosh" begin
+ xa = T[NaN, 0.0, -0.0, Inf, -Inf, 10000, -10000]
+ for x in xa
+ @test cmpdenorm(Sleef.cosh(x), Base.cosh(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber tanh" begin
+ xa = T[NaN, 0.0, -0.0, Inf, -Inf, 10000, -10000]
+ for x in xa
+ @test cmpdenorm(Sleef.tanh(x), Base.tanh(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber asinh" begin
+ xa = T[NaN, 0.0, -0.0, Inf, -Inf, 10000, -10000]
+ for x in xa
+ @test cmpdenorm(Sleef.asinh(x), Base.asinh(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber acosh" begin
+ xa = T[NaN, 0.0, -0.0, 1.0, Inf, -Inf, 10000, -10000]
+ for x in xa
+ @test cmpdenorm(Sleef.acosh(x), Base.acosh(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber atanh" begin
+ xa = T[NaN, 0.0, -0.0, 1.0, -1.0, Inf, -Inf, 10000, -10000]
+ for x in xa
+ @test cmpdenorm(Sleef.atanh(x), Base.atanh(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber $xcbrt" for xcbrt = (Sleef.cbrt, Sleef.cbrt_fast)
+ xa = T[NaN, Inf, -Inf, 0.0, -0.0]
+ for x in xa
+ @test cmpdenorm(Sleef.cbrt(x), Base.cbrt(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber exp2" begin
+ xa = T[NaN, Inf, -Inf]
+ for x in xa
+ @test cmpdenorm(Sleef.exp2(x), Base.exp2(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber exp10" begin
+ xa = T[NaN, Inf, -Inf]
+ for x in xa
+ @test cmpdenorm(Sleef.exp10(x), Base.exp10(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber expm1" begin
+ xa = T[NaN, Inf, -Inf]
+ for x in xa
+ @test cmpdenorm(Sleef.expm1(x), Base.expm1(BigFloat(x)))
+ end
+end
+
+
+
+@testset "denormal/nonnumber $xlog" for xlog in (Sleef.log, Sleef.log_fast)
+ xa = T[NaN, Inf, -Inf, 0, -1]
+ for x in xa
+ @test cmpdenorm(xlog(x), Base.log(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber log10" begin
+ xa = T[NaN, Inf, -Inf, 0, -1]
+ for x in xa
+ @test cmpdenorm(Sleef.log10(x), Base.log10(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber log2" begin
+ xa = T[NaN, Inf, -Inf, 0, -1]
+ for x in xa
+ @test cmpdenorm(Sleef.log2(x), Base.log2(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber log1p" begin
+ xa = T[NaN, Inf, -Inf, 0.0, -0.0, -1.0, -2.0]
+ for x in xa
+ @test cmpdenorm(Sleef.log1p(x), Base.log1p(BigFloat(x)))
+ end
+end
+
+
+@testset "denormal/nonnumber ldexp" begin
+ for i = -10000:10000
+ a = Sleef.ldexp(T(1.0), i)
+ b = Base.ldexp(BigFloat(1.0), i)
+ @test (isfinite(b) && a == b || cmpdenorm(a,b))
+ end
+end
+
+
+@testset "denormal/nonnumber ilog2" begin
+ @test Sleef.ilog2(+T(Inf)) == typemax(Int)
+ @test Sleef.ilog2(-T(Inf)) == typemax(Int)
+ @test Sleef.ilog2(+T(0.0)) == typemin(Int)
+ @test Sleef.ilog2(-T(0.0)) == typemin(Int)
+ @test Sleef.ilog2( T(NaN)) == typemax(Int)
+end
+
+
+end #denormal/nonnumber
diff --git a/test/runtests.jl b/test/runtests.jl
new file mode 100644
index 0000000..166f6c9
--- /dev/null
+++ b/test/runtests.jl
@@ -0,0 +1,115 @@
+using Sleef
+using Base.Test
+
+isnzero{T<:AbstractFloat}(x::T) = signbit(x)
+ispzero{T<:AbstractFloat}(x::T) = !signbit(x)
+
+function cmpdenorm{Tx<:AbstractFloat, Ty<:AbstractFloat}(x::Tx, y::Ty)
+ sizeof(Tx) < sizeof(Ty) ? y = Tx(y) : x = Ty(x) # cast larger type to smaller type
+ (isnan(x) && isnan(y)) && return true
+ (isnan(x) || isnan(y)) && return false
+ (isinf(x) != isinf(y)) && return false
+ (x == Tx(Inf) && y == Ty(Inf)) && return true
+ (x == Tx(-Inf) && y == Ty(-Inf)) && return true
+ if y == 0
+ (ispzero(x) && ispzero(y)) && return true
+ (isnzero(x) && isnzero(y)) && return true
+ return false
+ end
+ (!isnan(x) && !isnan(y) && !isinf(x) && !isinf(y)) && return sign(x) == sign(y)
+ return false
+end
+
+# the following compares the ulp between x and y.
+# First it promotes them to the larger of the two types x,y
+infh(::Type{Float64}) = 1e+300
+infh(::Type{Float32}) = 1e+37
+function countulp(T, x::AbstractFloat, y::AbstractFloat)
+ X,Y = promote(x,y)
+ x, y = T(X), T(Y) # Cast to smaller type
+ (isnan(x) && isnan(y)) && return 0
+ (isnan(x) || isnan(y)) && return 10000
+ if isinf(x)
+ (sign(x) == sign(y) && abs(y) > infh(T)) && return 0 # Relaxed infinity handling
+ return 10001
+ end
+ (x == Inf && y == Inf) && return 0
+ (x == -Inf && y == -Inf) && return 0
+ if y == 0
+ (x == 0) && return 0
+ return 10002
+ end
+ if isfinite(x) && isfinite(y)
+ return T(abs(X - Y)/eps(y))
+ end
+ return 10003
+end
+countulp{T<:AbstractFloat}(x::T, y::T) = countulp(T,x,y)
+
+# get rid off annoying warnings from overwritten function
+macro nowarn(expr)
+ quote
+ stderr = STDERR
+ stream = open("null", "w")
+ redirect_stderr(stream)
+ result = $(esc(expr))
+ redirect_stderr(stderr)
+ close(stream)
+ result
+ end
+end
+
+# overide domain checking that base adheres to
+using Base.MPFR.ROUNDING_MODE
+for f in (:sin, :cos, :tan, :asin, :acos, :atan, :asinh, :acosh, :atanh, :log, :log10, :log2, :log1p)
+ @eval begin
+ import Base.$f
+ @nowarn function ($f)(x::BigFloat)
+ z = BigFloat()
+ ccall($(string(:mpfr_,f), :libmpfr), Int32, (Ptr{BigFloat}, Ptr{BigFloat}, Int32), &z, &x, ROUNDING_MODE[])
+ return z
+ end
+ end
+end
+
+strip_module_name(f::Function) = last(split(string(f), '.')) # strip module name from function f
+
+# test the accuracy of a function where fun_table is a Dict mapping the function you want
+# to test to a reference function
+# xx is an array of values (which may be tuples for multiple arugment functions)
+# tol is the acceptable tolerance to test against
+function test_acc(T, fun_table, xx, tol; debug=false, tol_debug=5)
+ @testset "accuracy $(strip_module_name(xfun))" for (xfun, fun) in fun_table
+ rmax = 0.0
+ rmean = 0.0
+ xmax = map(zero, first(xx))
+ for x in xx
+ q = xfun(x...)
+ c = fun(map(BigFloat,x)...)
+ u = countulp(T,q,c)
+ rmax = max(rmax, u)
+ xmax = rmax == u ? x : xmax
+ rmean += u
+ if debug && u > tol_debug
+ @printf("%s = %.20g\n%s = %.20g\nx = %.20g\nulp = %g\n", strip_module_name(xfun), q, strip_module_name(fun), T(c), x, ulp(T(c)))
+ end
+ end
+ rmean = rmean/length(xx)
+
+ t = @test trunc(rmax,1) <= tol
+
+ fmtxloc = isa(xmax, Tuple) ? string('(', join((@sprintf("%.5f", x) for x in xmax), ", "), ')') : @sprintf("%.5f", xmax)
+ println(rpad(strip_module_name(xfun), 18, " "), ": max ", @sprintf("%f", rmax),
+ rpad(" at x = "*fmtxloc, 40, " "),
+ ": mean ", @sprintf("%f", rmean))
+ end
+end
+
+function runtests()
+ @testset "Sleef" begin
+ include(joinpath(@__DIR__,"dnml_nan.jl"))
+ include(joinpath(@__DIR__,"accuracy.jl"))
+ end
+end
+
+runtests()