diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c143632 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.jl.cov +*.jl.*.cov +*.jl.mem + diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..c7b3940 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,17 @@ +# Documentation: http://docs.travis-ci.com/user/languages/julia/ +language: julia +os: + - linux + - osx +julia: + - 0.6 + - nightly +notifications: + email: false +# uncomment the following lines to override the default test script +#script: +# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi + # - julia -e 'versioninfo(); Pkg.clone(pwd()); Pkg.build("Sleef"); Pkg.test("Sleef"; coverage=true)' +after_success: + # push coverage results + - julia -e 'cd(Pkg.dir("Sleef")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder()); Codecov.submit(Codecov.process_folder())' diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..bac3d25 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,28 @@ +The Sleef.jl package is licensed under the MIT "Expat" License: + +> Copyright (c) 2016: Mustafa Mohamad and other contributors: +> +> https://github.com/musm/Sleef.jl/graphs/contributors +> +> Permission is hereby granted, free of charge, to any person obtaining a copy +> of this software and associated documentation files (the "Software"), to deal +> in the Software without restriction, including without limitation the rights +> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +> copies of the Software, and to permit persons to whom the Software is +> furnished to do so, subject to the following conditions: +> +> The above copyright notice and this permission notice shall be included in all +> copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +> SOFTWARE. +> + +Sleef.jl includes ported code from the following project + +- [sleef](https://github.com/shibatch/sleef) [public domain] Author Naoki Shibata \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..1e85d58 --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ +
+ +[![Travis Build Status](https://travis-ci.org/musm/Sleef.jl.svg?branch=master)](https://travis-ci.org/musm/Sleef.jl) +[![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/j7lpafn4uf1trlfi/branch/master?svg=true)](https://ci.appveyor.com/project/musm/sleef-jl/branch/master) +[![Coverage Status](https://coveralls.io/repos/github/musm/Sleef.jl/badge.svg?branch=master)](https://coveralls.io/github/musm/Sleef.jl?branch=master) +[![codecov.io](http://codecov.io/github/musm/Sleef.jl/coverage.svg?branch=master)](http://codecov.io/github/musm/Sleef.jl?branch=master) + +A port of the [SLEEF math library](https://github.com/shibatch/sleef) (author Naoki Shibata) in pure Julia. This port includes a few extras including an `exp10` function and many bug fixes over the original code ([see](https://github.com/JuliaMath/Sleef.jl/issues) for a list of remanining issues that have not been fixed upstream). The library supports `Float32` and `Float64` types. + +Based on SLEEF version 2.80 (with additional bugfixes). + +# Usage + +We recommend running Julia with `-O3` for maximal performance using `Sleef.jl` and to also build a custom system image by running +```julia +# Pkg.add("WinRPM"); WinRPM.install("gcc") # on Windows please first run this line +julia> include(joinpath(dirname(JULIA_HOME), "share", "julia", "build_sysimg.jl")) +julia> build_sysimg(force=true) +``` +and then to restart `julia`; this will ensure you are taking full advantage of hardware [FMA](https://en.wikipedia.org/wiki/FMA_instruction_set) if your CPU supports it. + + +To use `Sleef.jl` +```julia +julia> Pkg.clone("https://github.com/JuliaMath/Sleef.jl.git") + +julia> using Sleef + +julia> Sleef.sin(2.3) +0.7457052121767203 + +julia> Sleef.sin(2.3f0) +0.74570525f0 + +julia> Sleef.exp(3.0) +20.085536923187668 + +julia> Sleef.exp(3f0) +20.085537f0 +``` + +The available functions include (within 1 ulp) +```julia +sin, cos, tan, asin, acos, atan, atan2, sincos, sinh, cosh, tanh, + asinh, acosh, atanh, log, log2, log10, log1p, ilog2, exp, exp2, exp10, expm1, ldexp, cbrt, pow + ``` + Faster variants include (within 3 ulp) + + ```julia +sin_fast, cos_fast, tan_fast, sincos_fast, asin_fast, acos_fast, atan_fast, atan2_fast, log_fast, cbrt_fast +``` + +# Benchmarks + +You can benchmark the performance of the Sleef.jl math library on your machine by running + +```julia +include(joinpath(Pkg.dir("Sleef"), "benchmark", "benchmark.jl")) +``` diff --git a/REQUIRE b/REQUIRE new file mode 100644 index 0000000..137767a --- /dev/null +++ b/REQUIRE @@ -0,0 +1 @@ +julia 0.6 diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..ca76fcf --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,32 @@ +environment: + matrix: + - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe" + - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe" + - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe" + - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe" + +branches: + only: + - master + - /release-.*/ + +notifications: + - provider: Email + on_build_success: false + on_build_failure: false + on_build_status_changed: false + +install: + - ps: "[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.SecurityProtocolType]::Tls12" +# Download most recent Julia Windows binary + - ps: (new-object net.webclient).DownloadFile($env:JULIA_URL, "C:\projects\julia-binary.exe") +# Run installer silently, output to C:\projects\julia + - C:\projects\julia-binary.exe /S /D=C:\projects\julia + +build_script: +# Need to convert from shallow to complete for Pkg.clone to work + - IF EXIST .git\shallow (git fetch --unshallow) + - C:\projects\julia\bin\julia -e "versioninfo(); Pkg.clone(pwd(), \"Sleef\"); Pkg.build(\"Sleef\")" + +test_script: + - C:\projects\julia\bin\julia -e "Pkg.test(\"Sleef\")" \ No newline at end of file diff --git a/benchmark/benchmark.jl b/benchmark/benchmark.jl new file mode 100644 index 0000000..3c53c1a --- /dev/null +++ b/benchmark/benchmark.jl @@ -0,0 +1,132 @@ +using Libm +using BenchmarkTools +using JLD, DataStructures + +RETUNE = false +VERBOSE = true +DETAILS = false + +test_types = (Float64, Float32) # Which types do you want to bench? + +const bench = ("Base","Libm") +const suite = BenchmarkGroup() +for n in bench + suite[n] = BenchmarkGroup([n]) +end + + +bench_reduce(f::Function, X) = mapreduce(x -> reinterpret(Unsigned,x), |, f(x) for x in X) + +typealias IEEEFloat Union{Float32,Float64} +MRANGE(::Type{Float64}) = 10000000 +MRANGE(::Type{Float32}) = 10000 +IntF(::Type{Float64}) = Int64 +IntF(::Type{Float32}) = Int32 +x_trig{T<:IEEEFloat}(::Type{T}) = begin + x_trig = T[] + for i = 1:10000 + s = reinterpret(T, reinterpret(IntF(T), T(pi)/4 * i) - IntF(T)(20)) + e = reinterpret(T, reinterpret(IntF(T), T(pi)/4 * i) + IntF(T)(20)) + d = s + while d <= e + append!(x_trig, d) + d = reinterpret(T, reinterpret(IntF(T), d) + IntF(T)(1)) + end + end + x_trig = append!(x_trig, -10:0.0002:10) + x_trig = append!(x_trig, -MRANGE(T):200.1:MRANGE(T)) +end +x_exp{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10:0.0002:10, -1000:0.1:1000)) +x_exp2{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10:0.0002:10, -120:0.023:1000, -1000:0.02:2000)) +x_exp10{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10:0.0002:10, -35:0.023:1000, -300:0.01:300)) +x_expm1{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10:0.0002:10, -1000:0.021:1000, -1000:0.023:1000, 10.0.^-(0:0.02:300), -10.0.^-(0:0.02:300), 10.0.^(0:0.021:300), -10.0.^-(0:0.021:300))) +x_log{T<:IEEEFloat}(::Type{T}) = map(T, vcat(0.0001:0.0001:10, 0.001:0.1:10000, 1.1.^(-1000:1000), 2.1.^(-1000:1000))) +x_log10{T<:IEEEFloat}(::Type{T}) = map(T, vcat(0.0001:0.0001:10, 0.0001:0.1:10000)) +x_log1p{T<:IEEEFloat}(::Type{T}) = map(T, vcat(0.0001:0.0001:10, 0.0001:0.1:10000, 10.0.^-(0:0.02:300), -10.0.^-(0:0.02:300))) +x_atrig{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-1:0.00002:1)) +x_atan{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10:0.0002:10, -10000:0.2:10000, -10000:0.201:10000)) +x_cbrt{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10000:0.2:10000, 1.1.^(-1000:1000), 2.1.^(-1000:1000))) +x_trigh{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10:0.0002:10, -1000:0.02:1000)) +x_asinhatanh{T<:IEEEFloat}(::Type{T}) = map(T, vcat(-10:0.0002:10, -1000:0.02:1000)) +x_acosh{T<:IEEEFloat}(::Type{T}) = map(T, vcat(1:0.0002:10, 1:0.02:1000)) +x_pow{T<:IEEEFloat}(::Type{T}) = begin + xx1 = map(Tuple{T,T}, [(x,y) for x = -100:0.20:100, y = 0.1:0.20:100])[:] + xx2 = map(Tuple{T,T}, [(x,y) for x = -100:0.21:100, y = 0.1:0.22:100])[:] + xx3 = map(Tuple{T,T}, [(x,y) for x = 2.1, y = -1000:0.1:1000]) + xx = vcat(xx1, xx2, xx2) +end + +import Base.atanh +for f in (:atanh,) + @eval begin + ($f)(x::Float64) = ccall(($(string(f)),Base.libm_name), Float64, (Float64,), x) + ($f)(x::Float32) = ccall(($(string(f,"f")),Base.libm_name), Float32, (Float32,), x) + end +end + +const micros = OrderedDict( + "sin" => x_trig, + "cos" => x_trig, + "tan" => x_trig, + "asin" => x_atrig, + "acos" => x_atrig, + "atan" => x_atan, + "exp" => x_exp, + "exp2" => x_exp2, + "exp10" => x_exp10, + "expm1" => x_expm1, + "log" => x_log, + "log2" => x_log10, + "log10" => x_log10, + "log1p" => x_log1p, + "sinh" => x_trigh, + "cosh" => x_trigh, + "tanh" => x_trigh, + "asinh" => x_asinhatanh, + "acosh" => x_acosh, + "atanh" => x_asinhatanh, + "cbrt" => x_cbrt + ) + +for n in ("Base","Libm") + for (f,x) in micros + suite[n][f] = BenchmarkGroup([f]) + for T in test_types + fex = Expr(:., Symbol(n), QuoteNode(Symbol(f))) + suite[n][f][string(T)] = @benchmarkable bench_reduce($fex, $(x(T))) + end + end +end + + +tune_params = joinpath(dirname(@__FILE__), "params.jld") +if !isfile(tune_params) || RETUNE + tune!(suite; verbose=VERBOSE, seconds = 2) + save(tune_params, "suite", params(suite)) + println("Saving tuned parameters.") +else + println("Loading pretuned parameters.") + loadparams!(suite, load(tune_params, "suite"), :evals, :samples) +end + +println("Running micro benchmarks...") +results = run(suite; verbose=VERBOSE, seconds = 2) + +print_with_color(:blue, "Benchmarks: median ratio Libm/Base\n") +for f in keys(micros) + print_with_color(:magenta, string(f)) + for T in test_types + println() + print("time: ", ) + tratio = ratio(median(results["Libm"][f][string(T)]), median(results["Base"][f][string(T)])).time + tcolor = tratio > 3 ? :red : tratio < 1.5 ? :green : :blue + print_with_color(tcolor, @sprintf("%.2f",tratio), " ", string(T)) + if DETAILS + print_with_color(:blue, "details Libm/Base\n") + println(results["Libm"][f][string(T)]) + println(results["Base"][f][string(T)]) + println() + end + end + println("\n") +end diff --git a/doc/src/assets/logo.svg b/doc/src/assets/logo.svg new file mode 100644 index 0000000..7eb3f3a --- /dev/null +++ b/doc/src/assets/logo.svg @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/Sleef.jl b/src/Sleef.jl new file mode 100644 index 0000000..3c17936 --- /dev/null +++ b/src/Sleef.jl @@ -0,0 +1,88 @@ +__precompile__() +module Sleef + +# export sin, cos, tan, asin, acos, atan, atan2, sincos, sinh, cosh, tanh, +# asinh, acosh, atanh, log, log2, log10, log1p, ilog2, exp, exp2, exp10, expm1, ldexp, cbrt, pow + +# fast variants (within 3 ulp) +# export sin_fast, cos_fast, tan_fast, sincos_fast, asin_fast, acos_fast, atan_fast, atan2_fast, log_fast, cbrt_fast + +using Base: Math.@horner, Math.exponent_bias, exponent_mask, Math.significand_bits, Math.IEEEFloat + +## constants (refactor later to all use dispatch) + +const MLN2 = 6.931471805599453094172321214581765680755001343602552541206800094933936219696955e-01 # log(2) +const MLN2E = 1.442695040888963407359924681001892137426645954152985934135449406931109219181187 # log2(e) + +const MPI = 3.141592653589793238462643383279502884197169399375105820974944592307816406286198 # pi +const MPI2 = 1.570796326794896619231321691639751442098584699687552910487472296153908203143099 # pi/2 +const MPI4 = 7.853981633974483096156608458198757210492923498437764552437361480769541015715495e-01 # pi/4 +const M1PI = 3.183098861837906715377675267450287240689192914809128974953346881177935952684543e-01 # 1/pi +const M2PI = 6.366197723675813430755350534900574481378385829618257949906693762355871905369086e-01 # 2/pi +const M4PI = 1.273239544735162686151070106980114896275677165923651589981338752471174381073817 # 4/pi + +const M1SQRT2 = 7.07106781186547524400844362104849039284835937688474036588339868995366239231051e-01 # 1/sqrt(2) + +const M2P13 = 1.259921049894873164767210607278228350570251464701507980081975112155299676513956 # 2^1/3 +const M2P23 = 1.587401051968199474751705639272308260391493327899853009808285761825216505624206 # 2^2/3 + +## constants (dispatch) + +MDLN10E(::Type{Float64}) = Double(0.4342944819032518, 1.098319650216765e-17) # log10(e) +MDLN10E(::Type{Float32}) = Double(0.4342945f0, -1.010305f-8) + +MDLN2E(::Type{Float64}) = Double(1.4426950408889634, 2.0355273740931033e-17) # log2(e) +MDLN2E(::Type{Float32}) = Double(1.442695f0, 1.925963f-8) + +MDLN10(::Type{Float64}) = Double(2.302585092994046, -2.1707562233822494e-16) # log(10) +MDLN10(::Type{Float32}) = Double(2.3025851f0, -3.1975436f-8) + +MDLN2(::Type{Float64}) = Double(0.6931471805599453, 2.3190468138462996e-17) # log(2) +MDLN2(::Type{Float32}) = Double(0.6931472f0, -1.9046542f-9) + +MDPI(::Type{Float64}) = Double(3.141592653589793, 1.2246467991473532e-16) # pi +MDPI(::Type{Float32}) = Double(3.1415927f0, -8.742278f-8) +MDPI2(::Type{Float64}) = Double(1.5707963267948966, 6.123233995736766e-17) # pi/2 +MDPI2(::Type{Float32}) = Double(1.5707964f0, -4.371139f-8) + +MD2P13(::Type{Float64}) = Double(1.2599210498948732, -2.589933375300507e-17) # 2^1/3 +MD2P13(::Type{Float32}) = Double(1.2599211f0, -2.4018702f-8) + +MD2P23(::Type{Float64}) = Double(1.5874010519681996, -1.0869008194197823e-16) # 2^2/3 +MD2P23(::Type{Float32}) = Double(1.587401f0, 1.9520385f-8) + +# Split 4/pi into four parts (each is 26 bits) +PI4A(::Type{Float64}) = 0.78539816290140151978 +PI4B(::Type{Float64}) = 4.9604678871439933374e-10 +PI4C(::Type{Float64}) = 1.1258708853173288931e-18 +PI4D(::Type{Float64}) = 1.7607799325916000908e-27 + +PI4A(::Type{Float32}) = 0.78515625f0 +PI4B(::Type{Float32}) = 0.00024187564849853515625f0 +PI4C(::Type{Float32}) = 3.7747668102383613586f-08 +PI4D(::Type{Float32}) = 1.2816720341285448015f-12 + +# Split log(2) into upper and lower parts +LN2U(::Type{Float64}) = 0.69314718055966295651160180568695068359375 +LN2L(::Type{Float64}) = 0.28235290563031577122588448175013436025525412068e-12 + +LN2U(::Type{Float32}) = 0.693145751953125f0 +LN2L(::Type{Float32}) = 1.428606765330187045f-06 + +include("utils.jl") # utility functions +include("double.jl") # Dekker style Double implementation +include("priv.jl") # private math functions +include("exp.jl") # exponential functions +include("log.jl") # logarithmic functions +include("trig.jl") # trigonometric and inverse trigonometric functions +include("hyp.jl") # hyperbolic and inverse hyperbolic functions +include("misc.jl") # miscallenous math functions including pow and cbrt + +# fallback definitions #FIXME add all +for func in (:sin, :cos, :tan, :asin, :acos, :atan, :sinh, :cosh, :tanh, :log, :log2, :log10, :log1p, :exp, :exp2, :exp10, :expm1) + @eval begin + $func(x::Real) = $func(float(x)) + end +end + +end diff --git a/src/double.jl b/src/double.jl new file mode 100644 index 0000000..ccf4dd3 --- /dev/null +++ b/src/double.jl @@ -0,0 +1,271 @@ +import Base: -, <, copysign, flipsign, normalize, convert + +immutable Double{T<:IEEEFloat} <: Number + hi::T + lo::T +end +Double{T<:IEEEFloat}(x::T) = Double(x, zero(T)) + +convert{T<:IEEEFloat}(::Type{Tuple{T,T}}, x::Double{T}) = (x.hi, x.lo) +convert{T<:IEEEFloat}(::Type{T}, x::Double) = x.hi + x.lo + + +@inline trunclo(x::Float64) = reinterpret(Float64, reinterpret(UInt64, x) & 0xffff_ffff_f800_0000) # clear lower 27 bits (leave upper 26 bits) +@inline trunclo(x::Float32) = reinterpret(Float32, reinterpret(UInt32, x) & 0xffff_f000) # clear lowest 12 bits (leave upper 12 bits) + +@inline function splitprec(x::IEEEFloat) + hx = trunclo(x) + hx, x-hx +end + +@inline function normalize{T}(x::Double{T}) + r = x.hi + x.lo + Double(r, (x.hi - r) + x.lo) +end + +@inline flipsign{T<:IEEEFloat}(x::Double{T}, y::T) = Double(flipsign(x.hi, y), flipsign(x.lo, y)) + +@inline scale{T<:IEEEFloat}(x::Double{T}, s::T) = Double(s*x.hi, s*x.lo) + + +@inline -{T<:IEEEFloat}(x::Double{T}) = Double(-x.hi,-x.lo) + +@inline function <{T<:IEEEFloat}(x::Double{T}, y::Double{T}) + x.hi < y.hi +end + +@inline function <{T<:IEEEFloat}(x::Double{T}, y::Number) + x.hi < y +end + +@inline function <{T<:IEEEFloat}(x::Number, y::Double{T}) + x < y.hi +end + +# quick-two-sum x+y +@inline function dadd{T<:IEEEFloat}(x::T, y::T) #WARNING |x| >= |y| + s = x + y + Double(s, (x - s) + y) +end + +@inline function dadd{T<:IEEEFloat}(x::T, y::Double{T}) #WARNING |x| >= |y| + s = x + y.hi + Double(s, (x - s) + y.hi + y.lo) +end + +@inline function dadd{T<:IEEEFloat}(x::Double{T}, y::T) #WARNING |x| >= |y| + s = x.hi + y + Double(s, (x.hi - s) + y + x.lo) +end + +@inline function dadd{T<:IEEEFloat}(x::Double{T}, y::Double{T}) #WARNING |x| >= |y| + s = x.hi + y.hi + Double(s, (x.hi - s) + y.hi + y.lo + x.lo) +end + +@inline function dsub{T<:IEEEFloat}(x::Double{T}, y::Double{T}) #WARNING |x| >= |y| + s = x.hi - y.hi + Double(s, (x.hi - s) - y.hi - y.lo + x.lo) +end + +@inline function dsub{T<:IEEEFloat}(x::Double{T}, y::T) #WARNING |x| >= |y| + s = x.hi - y + Double(s, (x.hi - s) - y + x.lo) +end + +@inline function dsub{T<:IEEEFloat}(x::T, y::Double{T}) #WARNING |x| >= |y| + s = x - y.hi + Double(s, (x - s) - y.hi - y.lo) +end + +@inline function dsub{T<:IEEEFloat}(x::T, y::T) #WARNING |x| >= |y| + s = x - y + Double(s, (x - s) - y) +end + + +# two-sum x+y NO BRANCH +@inline function dadd2{T<:IEEEFloat}(x::T, y::T) + s = x + y + v = s - x + Double(s, (x - (s - v)) + (y - v)) +end + +@inline function dadd2{T<:IEEEFloat}(x::T, y::Double{T}) + s = x + y.hi + v = s - x + Double(s, (x - (s - v)) + (y.hi - v) + y.lo) +end + +@inline dadd2{T<:IEEEFloat}(x::Double{T}, y::T) = dadd2(y,x) + +@inline function dadd2{T<:IEEEFloat}(x::Double{T}, y::Double{T}) + s = x.hi + y.hi + v = s - x.hi + Double(s,(x.hi - (s - v)) + (y.hi - v) + x.lo + y.lo) +end + +@inline function dsub2{T<:IEEEFloat}(x::T, y::T) + s = x - y + v = s - x + Double(s, (x - (s - v)) + (-y - v)) +end + +@inline function dsub2{T<:IEEEFloat}(x::T, y::Double{T}) + s = x - y.hi + v = s - x + Double(s, (x - (s - v)) + (-y.hi - v) - y.lo) +end + +@inline function dsub2{T<:IEEEFloat}(x::Double{T}, y::T) + s = x.hi - y + v = s - x.hi + Double(s,(x.hi - (s - v)) + (-y - v) + x.lo) +end + +@inline function dsub2{T<:IEEEFloat}(x::Double{T}, y::Double{T}) + s = x.hi - y.hi + v = s - x.hi + Double(s,(x.hi - (s - v)) + (-y.hi - v) + x.lo - y.lo) +end + + + +if FMA_FAST + + # two-prod-fma + @inline function dmul{T<:IEEEFloat}(x::T, y::T) + z = x*y + Double(z, fma(x, y, -z)) + end + + @inline function dmul{T<:IEEEFloat}(x::Double{T}, y::T) + z = x.hi*y + Double(z, fma(x.hi, y, -z) + x.lo*y) + end + + @inline dmul{T<:IEEEFloat}(x::T, y::Double{T}) = dmul(y,x) + + @inline function dmul{T<:IEEEFloat}(x::Double{T}, y::Double{T}) + z = x.hi*y.hi + Double(z, fma(x.hi, y.hi, -z) + x.hi*y.lo + x.lo*y.hi) + end + + # x^2 + @inline function dsqu{T<:IEEEFloat}(x::T) + z = x*x + Double(z, fma(x,x,-z)) + end + + @inline function dsqu{T<:IEEEFloat}(x::Double{T}) + z = x.hi*x.hi + Double(z, fma(x.hi, x.hi, -z) + x.hi*(x.lo+x.lo)) + end + + # sqrt(x) + @inline function dsqrt{T<:IEEEFloat}(x::Double{T}) + zhi = _sqrt(x.hi) + Double(zhi, (x.lo + fma(-zhi, zhi, x.hi))/(zhi+zhi)) + end + + # x/y + @inline function ddiv{T<:IEEEFloat}(x::Double{T}, y::Double{T}) + invy = 1/y.hi + zhi = x.hi*invy + Double(zhi, (fma(-zhi, y.hi, x.hi) + fma(-zhi, y.lo, x.lo))*invy) + end + + @inline function ddiv{T<:IEEEFloat}(x::T, y::T) + ry = 1/y + r = x*ry + Double(r, fma(-r,y,x)*ry) + end + + # 1/x + @inline function ddrec{T<:IEEEFloat}(x::T) + zhi = 1/x + Double(zhi, fma(-zhi, x, one(T))*zhi) + end + + @inline function ddrec{T<:IEEEFloat}(x::Double{T}) + zhi = 1/x.hi + Double(zhi, (fma(-zhi, x.hi, one(T)) + -zhi*x.lo)*zhi) + end + +else + + #two-prod x*y + @inline function dmul{T<:IEEEFloat}(x::T, y::T) + hx, lx = splitprec(x) + hy, ly = splitprec(y) + z = x*y + Double(z, ((hx*hy-z) + lx*hy + hx*ly) + lx*ly) + end + + @inline function dmul{T<:IEEEFloat}(x::Double{T}, y::T) + hx, lx = splitprec(x.hi) + hy, ly = splitprec(y) + z = x.hi*y + Double(z, (hx*hy-z) + lx*hy + hx*ly + lx*ly + x.lo*y) + end + + @inline dmul{T<:IEEEFloat}(x::T, y::Double{T}) = dmul(y,x) + + @inline function dmul{T<:IEEEFloat}(x::Double{T}, y::Double{T}) + hx, lx = splitprec(x.hi) + hy, ly = splitprec(y.hi) + z = x.hi*y.hi + Double(z, (((hx*hy-z) + lx*hy + hx*ly) + lx*ly) + x.hi*y.lo + x.lo*y.hi) + end + + # x^2 + @inline function dsqu{T<:IEEEFloat}(x::T) + hx, lx = splitprec(x) + z = x*x + Double(z, (hx*hx-z) + lx*(hx + hx) + lx*lx) + end + + @inline function dsqu{T<:IEEEFloat}(x::Double{T}) + hx, lx = splitprec(x.hi) + z = x.hi*x.hi + Double(z, (hx*hx-z) + lx*(hx+hx) + lx*lx + x.hi*(x.lo+x.lo)) + end + + # sqrt(x) + @inline function dsqrt{T<:IEEEFloat}(x::Double{T}) + c = _sqrt(x.hi) + u = dsqu(c) + Double(c, (x.hi - u.hi - u.lo + x.lo)/(c+c)) + end + + # x/y + @inline function ddiv{T<:IEEEFloat}(x::Double{T}, y::Double{T}) + invy = 1/y.hi + c = x.hi*invy + u = dmul(c, y.hi) + Double(c,((((x.hi - u.hi) - u.lo) + x.lo) - c*y.lo)*invy) + end + + @inline function ddiv{T<:IEEEFloat}(x::T, y::T) + ry = 1/y + r = x*ry + hx, lx = splitprec(r) + hy, ly = splitprec(y) + Double(r, (((-hx*hy+r*y) - lx*hy - hx*ly) - lx*ly)*ry) + end + + + # 1/x + @inline function ddrec{T<:IEEEFloat}(x::T) + c = 1/x + u = dmul(c,x) + Double(c, (one(T) - u.hi - u.lo)*c) + end + + @inline function ddrec{T<:IEEEFloat}(x::Double{T}) + c = 1/x.hi + u = dmul(c,x.hi) + Double(c, (one(T) - u.hi - u.lo - c*x.lo)*c) + end + +end diff --git a/src/exp.jl b/src/exp.jl new file mode 100644 index 0000000..f81a21b --- /dev/null +++ b/src/exp.jl @@ -0,0 +1,106 @@ +# exported exponential functions + +""" + ldexp(a, n::Int) -> IEEEFloat + +Computes `a × 2^n` +""" +ldexp(x::IEEEFloat, q::Int) = ldexpk(x,q) + + + +over_e2(::Type{Float64}) = 1024 +over_e2(::Type{Float32}) = 128f0 + +""" + exp2(x) + +Compute the base-`2` exponential of `x`, that is `2ˣ`. +""" +function exp2{T<:IEEEFloat}(x::T) + u = expk(dmul(MDLN2(T), x)) + x > over_e2(T) && (u = T(Inf)) + isninf(x) && (u = T(0)) + return u +end + + + +over_e10(::Type{Float64}) = 308 +over_e10(::Type{Float32}) = 38f0 + +""" + exp10(x) + +Compute the base-`10` exponential of `x`, that is `10ˣ`. +""" +function exp10{T<:IEEEFloat}(x::T) + u = expk(dmul(MDLN10(T), x)) + x > over_e10(T) && (u = T(Inf)) + isninf(x) && (u = T(0)) + return u +end + + + +over_em1(::Type{Float64}) = 700.0 +over_em1(::Type{Float32}) = 88f0 +under_em1(::Type{Float64}) = -0.36043653389117156089696070315825181539851971360337e2 +under_em1(::Type{Float32}) = -0.15942385152878742116596338793538061065739925620174f2 + +""" + expm1(x) + +Compute `eˣ- 1` accurately for small values of `x`. +""" +function expm1{T<:IEEEFloat}(x::T) + u = T(dadd2(expk2(Double(x)), -T(1))) + x > over_em1(T) && (u = T(Inf)) + x < under_em1(T) && (u = -T(1)) + return u +end + + + +""" + exp(x) + +Compute the base-`e` exponential of `x`, that is `eˣ`. +""" +function exp end + +let +global exp + +const c11d = 2.08860621107283687536341e-09 +const c10d = 2.51112930892876518610661e-08 +const c9d = 2.75573911234900471893338e-07 +const c8d = 2.75572362911928827629423e-06 +const c7d = 2.4801587159235472998791e-05 +const c6d = 0.000198412698960509205564975 +const c5d = 0.00138888888889774492207962 +const c4d = 0.00833333333331652721664984 +const c3d = 0.0416666666666665047591422 +const c2d = 0.166666666666666851703837 +const c1d = 0.50 + +const c5f = 0.00136324646882712841033936f0 +const c4f = 0.00836596917361021041870117f0 +const c3f = 0.0416710823774337768554688f0 +const c2f = 0.166665524244308471679688f0 +const c1f = 0.499999850988388061523438f0 + +global @inline _exp(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d +global @inline _exp(x::Float32) = @horner x c1f c2f c3f c4f c5f + +function exp{T<:IEEEFloat}(x::T) + q = roundi(T(MLN2E)*x) + s = muladd(q, -LN2U(T), x) + s = muladd(q, -LN2L(T), s) + u =_exp(s) + u = s*s*u + s + 1 + u = ldexpk(u,q) + isninf(x) && (u = T(0)) + return u +end +end diff --git a/src/hyp.jl b/src/hyp.jl new file mode 100644 index 0000000..8c30aa7 --- /dev/null +++ b/src/hyp.jl @@ -0,0 +1,113 @@ +# exported hyperbolic functions + +over_sch(::Type{Float64}) = 710.0 +over_sch(::Type{Float32}) = 89f0 + +""" + sinh(x) + +Compute hyperbolic sine of `x`. +""" +function sinh{T<:IEEEFloat}(x::T) + u = abs(x) + d = expk2(Double(u)) + d = dsub(d, ddrec(d)) + u = T(d)*T(0.5) + u = abs(x) > over_sch(T) ? T(Inf) : u + u = isnan(u) ? T(Inf) : u + u = flipsign(u,x) + u = isnan(x) ? T(NaN) : u + return u +end + + + +""" + cosh(x) + +Compute hyperbolic cosine of `x`. +""" +function cosh{T<:IEEEFloat}(x::T) + u = abs(x) + d = expk2(Double(u)) + d = dadd(d, ddrec(d)) + u = T(d)*T(0.5) + u = abs(x) > over_sch(T) ? T(Inf) : u + u = isnan(u) ? T(Inf) : u + u = isnan(x) ? T(NaN) : u + return u +end + + + +over_th(::Type{Float64}) = 18.714973875 +over_th(::Type{Float32}) = 8.664339742f0 + +""" + tanh(x) + +Compute hyperbolic tangent of `x`. +""" +function tanh{T<:IEEEFloat}(x::T) + u = abs(x) + d = expk2(Double(u)) + e = ddrec(d) + d = ddiv(dsub(d,e), dadd(d,e)) + u = T(d) + u = abs(x) > over_th(T) ? T(1) : u + u = isnan(u) ? T(1) : u + u = flipsign(u,x) + u = isnan(x) ? T(NaN) : u + return u +end + + + +""" + asinh(x) + +Compute the inverse hyperbolic sine of `x`. +""" +function asinh{T<:IEEEFloat}(x::T) + u = abs(x) + d = logk2(dadd(dsqrt(dadd2(dsqu(u), T(1))), u)) + u = T(d) + u = isinf(x) || isnan(u) ? T(Inf) : u + u = flipsign(u,x) + u = isnan(x) ? T(NaN) : u + return u +end + + + +""" + acosh(x) + +Compute the inverse hyperbolic cosine of `x`. +""" +function acosh{T<:IEEEFloat}(x::T) + d = logk2(dadd2(dsqrt(dsub2(dsqu(x), T(1))), x)) + u = T(d) + u = isinf(x) || isnan(u) ? T(Inf) : u + u = x == T(1) ? T(0) : u + u = x < T(1) ? T(NaN) : u + u = isnan(x) ? T(NaN) : u + return u +end + + + +""" + atanh(x) + +Compute the inverse hyperbolic tangent of `x`. +""" +function atanh{T<:IEEEFloat}(x::T) + u = abs(x) + d = logk2(ddiv(dadd2(T(1), u), dsub2(T(1), u))) + u = u > T(1) ? T(NaN) : (u == T(1) ? T(Inf) : T(d)*T(0.5)) + u = isinf(x) || isnan(u) ? T(NaN) : u + u = flipsign(u,x) + u = isnan(x) ? T(NaN) : u + return u +end diff --git a/src/log.jl b/src/log.jl new file mode 100644 index 0000000..d2708e8 --- /dev/null +++ b/src/log.jl @@ -0,0 +1,139 @@ +# exported logarithmic functions + +""" + ilog2(x) + +Returns the integral part of the logarithm of `abs(x)`, using base 2 for the +logarithm. In other words, this computes the binary exponent of `x` such that + + x = significand × 2^exponent, + +where `significand ∈ [1, 2)`. + +* Exceptional cases (where `Int` is the machine wordsize) + * `x = 0` returns `typemin(Int)` + * `x = Inf` returns `typemax(Int)` + * `x = NaN` returns `typemax(Int)` +""" +function ilog2{T<:IEEEFloat}(x::T) + e = ilog2k(abs(x)) - 1 + isnan(x) && (e = typemax(Int)) + isinf(x) && (e = typemax(Int)) + x == 0 && (e = typemin(Int)) + return e +end + + +""" + log10(x) + +Returns the base `10` logarithm of `x`. +""" +function log10{T<:IEEEFloat}(x::T) + u = T(dmul(logk(x), MDLN10E(T))) + isinf(x) && (u = T(Inf)) + x < 0 && (u = T(NaN)) + x == 0 && (u = T(-Inf)) + return u +end + + +""" + log2(x) + +Returns the base `2` logarithm of `x`. +""" +function log2{T<:IEEEFloat}(x::T) + u = T(dmul(logk(x), MDLN2E(T))) + isinf(x) && (u = T(Inf)) + x < 0 && (u = T(NaN)) + x == 0 && (u = T(-Inf)) + return u +end + + +""" + log1p(x) + +Accurately compute the natural logarithm of 1+x. +""" +function log1p{T<:IEEEFloat}(x::T) + u = T(logk2(dadd2(x, T(1)))) + isinf(x) && (u = T(Inf)) + x < -1 && (u = T(NaN)) + x == -1 && (u = -T(Inf)) + return copysign(u,x) # return correct sign for -0.0 +end + + +""" + log(x) + +Compute the natural logarithm of `x`. The inverse of the natural logarithm is +the natural expoenential function `exp(x)` +""" +function log{T<:IEEEFloat}(x::T) + u = T(logk(x)) + isinf(x) && (u = T(Inf)) + x < 0 && (u = T(NaN)) + x == 0 && (u = -T(Inf)) + return u +end + +# First we split the argument to its mantissa `m` and integer exponent `e` so +# that `d = m \times 2^e`, where `m \in [0.5, 1)` then we apply the polynomial +# approximant on this reduced argument `m` before putting back the exponent +# in. This first part is done with the help of the private function +# `ilog2k(x)` and we put the exponent back using + +# `\log(m \times 2^e) = \log(m) + \log 2^e = \log(m) + e\times MLN2 + +# The polynomial we evaluate is based on coefficients from + +# `log_2(x) = 2\sum_{n=0}^\infty \frac{1}{2n+1} \bigl(\frac{x-1}{x+1}^{2n+1}\bigr)` + +# That being said, since this converges faster when the argument is close to +# 1, we multiply `m` by `2` and subtract 1 for the exponent `e` when `m` is +# less than `sqrt(2)/2` +""" + log_fast(x) + +Compute the natural logarithm of `x`. The inverse of the natural logarithm is +the natural expoenential function `exp(x)` +""" +function log_fast end + +let +global log_fast + +const c8d = 0.148197055177935105296783 +const c7d = 0.153108178020442575739679 +const c6d = 0.181837339521549679055568 +const c5d = 0.22222194152736701733275 +const c4d = 0.285714288030134544449368 +const c3d = 0.399999999989941956712869 +const c2d = 0.666666666666685503450651 +const c1d = 2.0 + +const c5f = 0.2371599674224853515625f0 +const c4f = 0.285279005765914916992188f0 +const c3f = 0.400005519390106201171875f0 +const c2f = 0.666666567325592041015625f0 +const c1f = 2f0 + +global @inline _log_fast(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d +global @inline _log_fast(x::Float32) = @horner x c1f c2f c3f c4f c5f + +function log_fast{T<:IEEEFloat}(x::T) + e = ilog2k(T(M1SQRT2)*x) + m = ldexpk(x,-e) + u = (m-1)/(m+1) + u2 = u*u + t =_log_fast(u2) + u = muladd(u, t, T(MLN2)*e) + isinf(x) && (u = T(Inf)) + x < 0 && (u = T(NaN)) + x == 0 && (u = -T(Inf)) + return u +end +end diff --git a/src/misc.jl b/src/misc.jl new file mode 100644 index 0000000..0a20ea4 --- /dev/null +++ b/src/misc.jl @@ -0,0 +1,119 @@ + +""" + pow(x, y) + +Exponentiation operator, returns `x` raised to the power `y`. +""" +function pow{T<:IEEEFloat}(x::T, y::T) + yi = unsafe_trunc(Int,y) + yint = yi == y + yodd = isodd(yi) && yint + z = expk(dmul(logk(abs(x)), y)) + z = isnan(z) ? T(Inf) : z + z *= (x >= 0 ? T(1) : (!yint ? T(NaN) : (yodd ? -T(1) : T(1)))) + efx = flipsign(abs(x) - 1, y) + isinf(y) && (z = efx < 0 ? T(0) : (efx == 0 ? T(1) : T(Inf))) + if isinf(x) || x == 0 + z = (yodd ? _sign(x) : T(1)) * ((x == 0 ? -y : y) < 0 ? T(0) : T(Inf)) + end + (isnan(x) || isnan(y)) && (z = T(NaN)) + (y == 0 || x == 1) && (z = T(1)) + return z +end + + + +let +global cbrt_fast +global cbrt + +const c6d = -0.640245898480692909870982 +const c5d = 2.96155103020039511818595 +const c4d = -5.73353060922947843636166 +const c3d = 6.03990368989458747961407 +const c2d = -3.85841935510444988821632 +const c1d = 2.2307275302496609725722 + +const c6f = -0.601564466953277587890625f0 +const c5f = 2.8208892345428466796875f0 +const c4f = -5.532182216644287109375f0 +const c3f = 5.898262500762939453125f0 +const c2f = -3.8095417022705078125f0 +const c1f = 2.2241256237030029296875f0 + +global @inline _cbrt(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d +global @inline _cbrt(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f + +""" + cbrt_fast(x) + +Return `x^{1/3}`. +""" +function cbrt_fast{T<:IEEEFloat}(d::T) + e = ilog2k(d) + d = ldexpk(d,-e) + r = (e + 6144) % 3 + q = r == 1 ? T(M2P13) : T(1) + q = r == 2 ? T(M2P23) : q + q = ldexpk(q, (e + 6144) ÷ 3 - 2048) + q = flipsign(q,d) + d = abs(d) + x =_cbrt(d) + y = x*x + y = y*y + x -= (d*y - x)*T(1/3) + y = d*x*x + y = (y - T(2/3)*y*(y*x - 1))*q +end + + +""" + cbrt(x) + +Return `x^{1/3}`. The prefix operator `∛` is equivalent to `cbrt`. +""" +function cbrt{T<:IEEEFloat}(d::T) + e = ilog2k(d) + d = ldexpk(d,-e) + r = (e + 6144) % 3 + q2 = r == 1 ? MD2P13(T) : Double(T(1)) + q2 = r == 2 ? MD2P23(T) : q2 + q2 = flipsign(q2,d) + d = abs(d) + x =_cbrt(d) + y = x*x + y = y*y + x -= (d*y - x)*T(1/3) + z = x + u = dsqu(x) + u = dsqu(u) + u = dmul(u, d) + u = dsub(u,x) + y = T(u) + y = -T(2/3)*y*z + v = dadd(dsqu(z), y) + v = dmul(v,d) + v = dmul(v,q2) + z = ldexp(T(v), (e + 6144) ÷ 3 - 2048) + isinf(d) && (z = flipsign(T(Inf), q2.hi)) + d == 0 && (z = flipsign(T(0), q2.hi)) + return z +end +end + + + +""" + hypot(x,y) + +Compute the hypotenuse `\sqrt{x^2+y^2}` avoiding overflow and underflow. +""" +function hypot{T<:IEEEFloat}(x::T, y::T) + x = abs(x) + y = abs(y) + if x < y + x, y = y, x + end + r = (x == 0) ? y : y/x + x*sqrt(T(1) + r*r) +end diff --git a/src/priv.jl b/src/priv.jl new file mode 100644 index 0000000..17fae2c --- /dev/null +++ b/src/priv.jl @@ -0,0 +1,231 @@ +# private math functions + +""" +A helper function for `ldexpk` + +First note that `r = (q >> n) << n` clears the lowest n bits of q, i.e. returns 2^n where n is the +largest integer such that q >= 2^n + +For numbers q less than 2^m the following code does the same as the above snippet + `r = ( (q>>v + q) >> n - q>>v ) << n` +For numbers larger than or equal to 2^v this subtracts 2^n from q for q>>n times. + +The function returns q(input) := q(output) + offset*r + +In the code for ldexpk we actually use + `m = ( (m>>n + m) >> n - m>>m ) << (n-2)`. +So that x has to be multplied by u four times `x = x*u*u*u*u` to put the value of the offset +exponent amount back in. +""" +@inline function _split_exponent(q, n, v, offset) + m = q >> v + m = (((m + q) >> n) - m) << (n-offset) + q = q - (m << offset) + m, q +end +@inline split_exponent(::Type{Float64}, q::Int) = _split_exponent(q, UInt(9), UInt(31), UInt(2)) +@inline split_exponent(::Type{Float32}, q::Int) = _split_exponent(q, UInt(6), UInt(31), UInt(2)) + +""" + ldexpk(a::IEEEFloat, n::Int) -> IEEEFloat + +Computes `a × 2^n`. +""" +@inline function ldexpk{T<:IEEEFloat}(x::T, q::Int) + bias = exponent_bias(T) + emax = exponent_max(T) + m, q = split_exponent(T,q) + m += bias + m = ifelse(m < 0, 0, m) + m = ifelse(m > emax, emax, m) + q += bias + u = integer2float(T, m) + x = x*u*u*u*u + u = integer2float(T, q) + x*u +end + +@inline ldexpk2{T<:IEEEFloat}(x::T, e::Int) = x * pow2i(T, e >> UInt(1)) * pow2i(T, e - (e >> UInt(1))) + + + +# The following define threshold values for `ilog2k` +threshold_exponent(::Type{Float64}) = 300 +threshold_exponent(::Type{Float32}) = 64 + +""" + ilog2k(x::IEEEFloat) -> Int + +Returns the integral part of the logarithm of `|x|`, using 2 as base for the logarithm; in other +words this returns the binary exponent of `x` so that + + x = significand × 2^exponent + +where `significand ∈ [0.5, 1)`. +""" +@inline function ilog2k{T<:IEEEFloat}(d::T) + m = d < T(2)^-threshold_exponent(T) + d = ifelse(m, d*T(2)^threshold_exponent(T), d) + q = float2integer(d) & exponent_max(T) + q = ifelse(m, q - (threshold_exponent(T) + exponent_bias(T) - 1), q - (exponent_bias(T) - 1)) # subtract 1 since we want 2^q +end + + +let +const c20d = 1.06298484191448746607415e-05 +const c19d = -0.000125620649967286867384336 +const c18d = 0.00070557664296393412389774 +const c17d = -0.00251865614498713360352999 +const c16d = 0.00646262899036991172313504 +const c15d = -0.0128281333663399031014274 +const c14d = 0.0208024799924145797902497 +const c13d = -0.0289002344784740315686289 +const c12d = 0.0359785005035104590853656 +const c11d = -0.041848579703592507506027 +const c10d = 0.0470843011653283988193763 +const c9d = -0.0524914210588448421068719 +const c8d = 0.0587946590969581003860434 +const c7d = -0.0666620884778795497194182 +const c6d = 0.0769225330296203768654095 +const c5d = -0.0909090442773387574781907 +const c4d = 0.111111108376896236538123 +const c3d = -0.142857142756268568062339 +const c2d = 0.199999999997977351284817 +const c1d = -0.333333333333317605173818 + +const c9f = -0.00176397908944636583328247f0 +const c8f = 0.0107900900766253471374512f0 +const c7f = -0.0309564601629972457885742f0 +const c6f = 0.0577365085482597351074219f0 +const c5f = -0.0838950723409652709960938f0 +const c4f = 0.109463557600975036621094f0 +const c3f = -0.142626821994781494140625f0 +const c2f = 0.199983194470405578613281f0 +const c1f = -0.333332866430282592773438f0 + +global @inline _atan2k_fast(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d c17d c18d c19d c20d +global @inline _atan2k_fast(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f c7f c8f c9f + +global @inline function atan2k_fast{T<:IEEEFloat}(y::T, x::T) + q = 0 + if x < 0 + x = -x + q = -2 + end + if y > x + t = x; x = y + y = -t + q += 1 + end + s = y/x + t = s*s + u =_atan2k_fast(t) + t = u*t*s + s + q*T(MPI2) + t +end + + +global @inline _atan2k(x::Double{Float64}) = @horner x.hi c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d c17d c18d c19d c20d +global @inline _atan2k(x::Double{Float32}) = dadd(c1f, x.hi*(@horner x.hi c2f c3f c4f c5f c6f c7f c8f c9f)) + +global @inline function atan2k{T<:IEEEFloat}(y::Double{T}, x::Double{T}) + q = 0 + if x < 0 + x = -x + q = -2 + end + if y > x + t = x; x = y + y = -t + q += 1 + end + s = ddiv(y,x) + t = dsqu(s) + u =_atan2k(t) + t = dmul(s, dadd(T(1), dmul(t, u))) + dadd(dmul(T(q), MDPI2(T)), t) +end +end + + +let +const c10d = 2.51069683420950419527139e-08 +const c9d = 2.76286166770270649116855e-07 +const c8d = 2.75572496725023574143864e-06 +const c7d = 2.48014973989819794114153e-05 +const c6d = 0.000198412698809069797676111 +const c5d = 0.0013888888939977128960529 +const c4d = 0.00833333333332371417601081 +const c3d = 0.0416666666665409524128449 +const c2d = 0.166666666666666740681535 +const c1d = 0.500000000000000999200722 + +const c5f = 0.00136324646882712841033936f0 +const c4f = 0.00836596917361021041870117f0 +const c3f = 0.0416710823774337768554688f0 +const c2f = 0.166665524244308471679688f0 +const c1f = 0.499999850988388061523438f0 + +global @inline _expk(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d +global @inline _expk(x::Float32) = @horner x c1f c2f c3f c4f c5f + +global @inline function expk{T<:IEEEFloat}(d::Double{T}) + q = round(T(d)*T(MLN2E)) + s = dadd(d, -q*LN2U(T)) + s = dadd(s, -q*LN2L(T)) + u =_expk(T(s)) + t = dadd(s, dmul(dsqu(s), u)) + t = dadd(T(1), t) + ldexpk(T(t), unsafe_trunc(Int,q)) +end + + +global @inline function expk2{T<:IEEEFloat}(d::Double{T}) + q = round(T(d)*T(MLN2E)) + s = dadd(d, -q*LN2U(T)) + s = dadd(s, -q*LN2L(T)) + u =_expk(s.hi) + t = dadd(s, dmul(dsqu(s), u)) + t = dadd(T(1), t) + scale(t, pow2i(T, unsafe_trunc(Int,q))) +end +end + + +let +const c8d = 0.134601987501262130076155 +const c7d = 0.132248509032032670243288 +const c6d = 0.153883458318096079652524 +const c5d = 0.181817427573705403298686 +const c4d = 0.222222231326187414840781 +const c3d = 0.285714285651261412873718 +const c2d = 0.400000000000222439910458 +const c1d = 0.666666666666666371239645 + +const c4f = 0.2371599674224853515625f0 +const c3f = 0.285279005765914916992188f0 +const c2f = 0.400005519390106201171875f0 +const c1f = 0.666666567325592041015625f0 + +global @inline _logk(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d +global @inline _logk(x::Float32) = @horner x c1f c2f c3f c4f + +global @inline function logk{T<:IEEEFloat}(d::T) + e = ilog2k(d*T(M1SQRT2)) + m = ldexpk(d,-e) + x = ddiv(dsub2(m, T(1)), dadd2(T(1), m)) + x2 = dsqu(x) + t =_logk(x2.hi) + dadd(dmul(MDLN2(T), T(e)), dadd(scale(x, T(2)), dmul(dmul(x2, x), t))) +end + + +global @inline function logk2{T<:IEEEFloat}(d::Double{T}) + e = ilog2k(d.hi*T(M1SQRT2)) + m = scale(d, pow2i(T,-e)) + x = ddiv(dsub2(m, T(1)), dadd2(m, T(1))) + x2 = dsqu(x) + t =_logk(x2.hi) + dadd(dmul(MDLN2(T), T(e)), dadd(scale(x, T(2)), dmul(dmul(x2, x), t))) +end +end diff --git a/src/trig.jl b/src/trig.jl new file mode 100644 index 0000000..0621572 --- /dev/null +++ b/src/trig.jl @@ -0,0 +1,493 @@ +# exported trigonometric functions + +""" + sin(x) + +Compute the sine of `x`, where the output is in radians. +""" +function sin end + +""" + cos(x) + +Compute the cosine of `x`, where the output is in radians. +""" +function cos end + +let +global sin +global cos + +const c8d = 2.72052416138529567917983e-15 +const c7d = -7.64292594113954471900203e-13 +const c6d = 1.60589370117277896211623e-10 +const c5d = -2.5052106814843123359368e-08 +const c4d = 2.75573192104428224777379e-06 +const c3d = -0.000198412698412046454654947 +const c2d = 0.00833333333333318056201922 +const c1d = -0.166666666666666657414808 + +const c4f = 2.6083159809786593541503f-06 +const c3f = -0.0001981069071916863322258f0 +const c2f = 0.00833307858556509017944336f0 +const c1f = -0.166666597127914428710938f0 + +global @inline _sincos(x::Double{Float64}) = dadd(c1d, x.hi*(@horner x.hi c2d c3d c4d c5d c6d c7d c8d)) +global @inline _sincos(x::Double{Float32}) = dadd(c1f, x.hi*(@horner x.hi c2f c3f c4f)) + +function sin{T<:IEEEFloat}(x::T) + d = abs(x) + q = round(d*T(M1PI)) + s = dsub2(d, q*PI4A(T)*4) + s = dsub2(s, q*PI4B(T)*4) + s = dsub2(s, q*PI4C(T)*4) + s = dsub2(s, q*PI4D(T)*4) + t = s + s = dsqu(s) + w =_sincos(s) + v = dmul(t, dadd(T(1), dmul(w,s))) + u = T(v) + qi = unsafe_trunc(Int,q) + qi & 1 != 0 && (u = -u) + return flipsign(u,x) +end + +function cos{T<:IEEEFloat}(x::T) + x = abs(x) + q = muladd(T(2), round(x*T(M1PI) - T(0.5)), T(1)) + s = dsub2(x, q*PI4A(T)*2) + s = dsub2(s, q*PI4B(T)*2) + s = dsub2(s, q*PI4C(T)*2) + s = dsub2(s, q*PI4D(T)*2) + t = s + s = dsqu(s) + w =_sincos(s) + v = dmul(t, dadd(T(1), dmul(w,s))) + u = T(v) + qi = unsafe_trunc(Int,q) + qi & 2 == 0 && (u = -u) + return u +end +end + + +""" + sin_fast(x) + +Compute the sine of `x`, where the output is in radians. +""" +function sin_fast end + +""" + cos_fast(x) + +Compute the cosine of `x`, where the output is in radians. +""" +function cos_fast end + +let +global sin_fast +global cos_fast + +const c9d = -7.97255955009037868891952e-18 +const c8d = 2.81009972710863200091251e-15 +const c7d = -7.64712219118158833288484e-13 +const c6d = 1.60590430605664501629054e-10 +const c5d = -2.50521083763502045810755e-08 +const c4d = 2.75573192239198747630416e-06 +const c3d = -0.000198412698412696162806809 +const c2d = 0.00833333333333332974823815 +const c1d = -0.166666666666666657414808 + +# c5f is 0f0 to handle Inf32 case, Float64 doesn't need this since it comes +# out properly (add another neg constant and remove this zero constant) +const c5f = 0f0 +const c4f = 2.6083159809786593541503f-06 +const c3f = -0.0001981069071916863322258f0 +const c2f = 0.00833307858556509017944336f0 +const c1f = -0.166666597127914428710938f0 + +# Argument is first reduced to the domain 0 < s < π/4 + +# We return the correct sign using `q & 1 != 0` i.e. q is odd (this works for +# positive and negative q) and if this condition is true we flip the sign since +# we are now in the negative branch of sin(x). Recall that q is just the integer +# part of d/π and thus we can determine the correct sign using this information. + +global @inline _sincos_fast(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d +global @inline _sincos_fast(x::Float32) = @horner x c1f c2f c3f c4f c5f + +function sin_fast{T<:IEEEFloat}(x::T) + d = abs(x) + q = roundi(d*T(M1PI)) + d = muladd(q, -PI4A(T)*4, d) + d = muladd(q, -PI4B(T)*4, d) + d = muladd(q, -PI4C(T)*4, d) + d = muladd(q, -PI4D(T)*4, d) + s = d*d + q & 1 != 0 && (d = -d) + u =_sincos_fast(s) + u = muladd(s, u*d, d) + flipsign(u,x) +end + +function cos_fast{T<:IEEEFloat}(x::T) + q = muladd(2, roundi(x*T(M1PI)-T(0.5)), 1) + x = muladd(q, -PI4A(T)*2, x) + x = muladd(q, -PI4B(T)*2, x) + x = muladd(q, -PI4C(T)*2, x) + x = muladd(q, -PI4D(T)*2, x) + s = x*x + q & 2 == 0 && (x = -x) + u =_sincos_fast(s) + muladd(s, u*x, x) +end +end + + + +""" + sincos(x) + +Compute the sin and cosine of `x` simultaneously, where the output is in +radians, returning a tuple. +""" +function sincos end + +""" + sincos_fast(x) + +Compute the sin and cosine of `x` simultaneously, where the output is in +radians, returning a tuple. +""" +function sincos_fast end + +let +global sincos +global sincos_fast + +const a6d = 1.58938307283228937328511e-10 +const a5d = -2.50506943502539773349318e-08 +const a4d = 2.75573131776846360512547e-06 +const a3d = -0.000198412698278911770864914 +const a2d = 0.0083333333333191845961746 +const a1d = -0.166666666666666130709393 + +const a3f = -0.000195169282960705459117889f0 +const a2f = 0.00833215750753879547119141f0 +const a1f = -0.166666537523269653320312f0 + +const b7d = -1.13615350239097429531523e-11 +const b6d = 2.08757471207040055479366e-09 +const b5d = -2.75573144028847567498567e-07 +const b4d = 2.48015872890001867311915e-05 +const b3d = -0.00138888888888714019282329 +const b2d = 0.0416666666666665519592062 +const b1d = -0.50 + +const b5f = -2.71811842367242206819355f-07 +const b4f = 2.47990446951007470488548f-05 +const b3f = -0.00138888787478208541870117f0 +const b2f = 0.0416666641831398010253906f0 +const b1f = -0.5f0 + +global @inline _sincos_a(x::Float64) = @horner x a1d a2d a3d a4d a5d a6d +global @inline _sincos_a(x::Float32) = @horner x a1f a2f a3f +global @inline _sincos_b(x::Float64) = @horner x b1d b2d b3d b4d b5d b6d b7d +global @inline _sincos_b(x::Float32) = @horner x b1f b2f b3f b4f b5f + +function sincos_fast{T<:IEEEFloat}(x::T) + d = abs(x) + q = roundi(d*T(M2PI)) + s = d + s = muladd(q, -PI4A(T)*2, s) + s = muladd(q, -PI4B(T)*2, s) + s = muladd(q, -PI4C(T)*2, s) + s = muladd(q, -PI4D(T)*2, s) + t = s + s = s*s + u =_sincos_a(s) + u = u * s * t + rx = t + u + u =_sincos_b(s) + ry = u * s + T(1) + q & 1 != 0 && (s = ry; ry = rx; rx = s) + q & 2 != 0 && (rx = -rx) + (q+1) & 2 != 0 && (ry = -ry) + isinf(d) && (rx = ry = T(NaN)) + convert(Tuple{T,T}, Double(flipsign(rx,x), ry)) +end + +function sincos{T<:IEEEFloat}(x::T) + d = abs(x) + q = roundi(d*2*T(M1PI)) + s = dsub2(d, q*PI4A(T)*2) + s = dsub2(s, q*PI4B(T)*2) + s = dsub2(s, q*PI4C(T)*2) + s = dsub2(s, q*PI4D(T)*2) + t = s + s = dsqu(s) + sx = T(s) + u =_sincos_a(sx) + u *= sx * t.hi + v = dadd(t, u) + rx = T(v) + u =_sincos_b(sx) + v = dadd(T(1), dmul(sx, u)) + ry = T(v) + q & 1 != 0 && (u = ry; ry = rx; rx = u) + q & 2 != 0 && (rx = -rx) + (q+1) & 2 != 0 && (ry = -ry) + isinf(d) && (rx = ry = T(NaN)) + convert(Tuple{T,T}, Double(flipsign(rx, x), ry)) +end +end + + +""" + tan(x) + +Compute the tangent of `x`, where the output is in radians. +""" +function tan end + +""" + tan_fast(x) + +Compute the tangent of `x`, where the output is in radians. +""" +function tan_fast end + +let +global tan +global tan_fast + +const c15d = 1.01419718511083373224408e-05 +const c14d = -2.59519791585924697698614e-05 +const c13d = 5.23388081915899855325186e-05 +const c12d = -3.05033014433946488225616e-05 +const c11d = 7.14707504084242744267497e-05 +const c10d = 8.09674518280159187045078e-05 +const c9d = 0.000244884931879331847054404 +const c8d = 0.000588505168743587154904506 +const c7d = 0.001456127889228124279788480 +const c6d = 0.003592087438369066191429240 +const c5d = 0.008863239443624016181133560 +const c4d = 0.021869488285384638959207800 +const c3d = 0.053968253978129841763600200 +const c2d = 0.133333333333125941821962000 +const c1d = 0.333333333333334980164153000 + +const c7f = 0.00446636462584137916564941f0 +const c6f = -8.3920182078145444393158f-05 +const c5f = 0.0109639242291450500488281f0 +const c4f = 0.0212360303848981857299805f0 +const c3f = 0.0540687143802642822265625f0 +const c2f = 0.133325666189193725585938f0 +const c1f = 0.33333361148834228515625f0 + +global @inline _tan_fast(x::Float64) = @horner_split x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d +global @inline _tan_fast(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f c7f + +function tan_fast{T<:IEEEFloat}(d::T) + w = abs(d) + q = roundi(w*T(M2PI)) + x = muladd(q, -PI4A(T)*2, w) + x = muladd(q, -PI4B(T)*2, x) + x = muladd(q, -PI4C(T)*2, x) + x = muladd(q, -PI4D(T)*2, x) + q & 1 != 0 && (x = -x) + s = x*x + u =_tan_fast(s) + u = muladd(s, u*x, x) + q & 1 != 0 && (u = 1/u) + isinf(w) && (u = T(NaN)) + return flipsign(u,d) +end + +global @inline _tan(x::Double{Float64}) = dadd(c1d, x.hi*(@horner_split x.hi c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d)) +global @inline _tan(x::Double{Float32}) = dadd(c1f, dmul(x, @horner x.hi c2f c3f c4f c5f c6f c7f)) +# global @inline _tan(x::Double{Float32}) = dadd(c1f, dmul(x.hi, dadd(c2f, x.hi*(@horner x.hi c3f c4f c5f c6f c7f)))) + +function tan{T<:IEEEFloat}(d::T) + w = abs(d) + q = round(w*T(M2PI)) + x = dsub2(w, q*PI4A(T)*2) + x = dsub2(x, q*PI4B(T)*2) + x = dsub2(x, q*PI4C(T)*2) + x = dsub2(x, q*PI4D(T)*2) + qi = unsafe_trunc(Int,q) + qi & 1 != 0 && (x = -x) + s = dsqu(x) + u =_tan(s) + u = dmul(x, dadd(T(1), dmul(u, s))) + qi & 1 != 0 && (u = ddrec(u)) + return flipsign(T(u),d) +end +end + + + +""" + atan(x) + +Compute the inverse tangent of `x`, where the output is in radians. +""" +function atan{T<:IEEEFloat}(x::T) + u = T(atan2k(Double(abs(x)), Double(T(1)))) + isinf(x) && (u = T(MPI2)) + flipsign(u,x) +end + + +""" + atan_fast(x) + +Compute the inverse tangent of `x`, where the output is in radians. +""" +function atan_fast end +let +global atan_fast + +const c19d = -1.88796008463073496563746e-05 +const c18d = 0.000209850076645816976906797 +const c17d = -0.00110611831486672482563471 +const c16d = 0.00370026744188713119232403 +const c15d = -0.00889896195887655491740809 +const c14d = 0.016599329773529201970117 +const c13d = -0.0254517624932312641616861 +const c12d = 0.0337852580001353069993897 +const c11d = -0.0407629191276836500001934 +const c10d = 0.0466667150077840625632675 +const c9d = -0.0523674852303482457616113 +const c8d = 0.0587666392926673580854313 +const c7d = -0.0666573579361080525984562 +const c6d = 0.0769219538311769618355029 +const c5d = -0.090908995008245008229153 +const c4d = 0.111111105648261418443745 +const c3d = -0.14285714266771329383765 +const c2d = 0.199999999996591265594148 +const c1d = -0.333333333333311110369124 + +const c8f = 0.00282363896258175373077393f0 +const c7f = -0.0159569028764963150024414f0 +const c6f = 0.0425049886107444763183594f0 +const c5f = -0.0748900920152664184570312f0 +const c4f = 0.106347933411598205566406f0 +const c3f = -0.142027363181114196777344f0 +const c2f = 0.199926957488059997558594f0 +const c1f = -0.333331018686294555664062f0 + +global @inline _atan_fast(x::Float64) = @horner_split x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d c17d c18d c19d +global @inline _atan_fast(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f c7f c8f + +function atan_fast{T<:IEEEFloat}(x::T) + q = 0 + if x < 0 + x = -x + q = 2 + end + if x > 1 + x = 1/x + q |= 1 + end + t = x*x + u =_atan_fast(t) + t = x + x*t*u + q & 1 != 0 && (t = T(MPI2) - t) + q & 2 != 0 && (t = -t) + return t +end +end + + + +""" + atan2(x, y) + +Compute the inverse tangent of `x/y`, using the signs of both `x` and `y` to determine the quadrant of the return value. +""" +function atan2{T<:IEEEFloat}(x::T, y::T) + r = T(atan2k(Double(abs(x)), Double(y))) + r = flipsign(r,y) + if isinf(y) || y == 0 + r = T(MPI2) - (isinf(y) ? _sign(y)*T(MPI2) : T(0)) + end + if isinf(x) + r = T(MPI2) - (isinf(y) ? _sign(y)*T(MPI4) : T(0)) + end + if x == 0 + r = _sign(y) == -1 ? T(MPI) : T(0) + end + return isnan(y) || isnan(x) ? T(NaN) : flipsign(r,x) +end + + +""" + atan2_fast(x, y) + +Compute the inverse tangent of `x/y`, using the signs of both `x` and `y` to determine the quadrant of the return value. +""" +function atan2_fast{T<:IEEEFloat}(x::T, y::T) + r = atan2k_fast(abs(x), y) + r = flipsign(r,y) + if isinf(y) || y == 0 + r = T(MPI2) - (isinf(y) ? _sign(y)*T(MPI2) : T(0)) + end + if isinf(x) + r = T(MPI2) - (isinf(y) ? _sign(y)*T(MPI4) : T(0)) + end + if x == 0 + r = _sign(y) == -1 ? T(MPI) : T(0) + end + return isnan(y) || isnan(x) ? T(NaN) : flipsign(r,x) +end + + + +""" + asin(x) + +Compute the inverse sine of `x`, where the output is in radians. +""" +function asin{T<:IEEEFloat}(x::T) + d = atan2k(Double(abs(x)), dsqrt(dmul(dadd(T(1), x), dsub(T(1), x)))) + u = T(d) + abs(x) == 1 && (u = T(MPI2)) + flipsign(u,x) +end + + +""" + asin_fast(x) + +Compute the inverse sine of `x`, where the output is in radians. +""" +function asin_fast{T<:IEEEFloat}(x::T) + flipsign(atan2k_fast(abs(x), _sqrt((1+x)*(1-x))), x) +end + + + +""" + acos(x) + +Compute the inverse cosine of `x`, where the output is in radians. +""" +function acos{T<:IEEEFloat}(x::T) + d = atan2k(dsqrt(dmul(dadd(T(1), x), dsub(T(1), x))), Double(abs(x))) + d = flipsign(d,x) + abs(x) == 1 && (d = Double(T(0))) + x < 0 && (d = dadd(MDPI(T), d)) + return T(d) +end + + +""" + acos_fast(x) + +Compute the inverse cosine of `x`, where the output is in radians. +""" +function acos_fast{T<:IEEEFloat}(x::T) + flipsign(atan2k_fast(_sqrt((1+x)*(1-x)), abs(x)), x) + (x < 0 ? T(MPI) : T(0)) +end diff --git a/src/utils.jl b/src/utils.jl new file mode 100644 index 0000000..9f76db7 --- /dev/null +++ b/src/utils.jl @@ -0,0 +1,63 @@ +## utility functions mainly used by the private math functions in priv.jl + +function is_fma_fast end +for T in (Float32, Float64) + @eval is_fma_fast(::Type{$T}) = $(muladd(nextfloat(one(T)),nextfloat(one(T)),-nextfloat(one(T),2)) != zero(T)) +end +const FMA_FAST = is_fma_fast(Float64) && is_fma_fast(Float32) + + +@inline exponent_max{T<:IEEEFloat}(::Type{T}) = Int(exponent_mask(T) >> significand_bits(T)) + +# _sign emits better native code than sign but does not properly handle the Inf/NaN cases +@inline _sign{T<:IEEEFloat}(d::T) = flipsign(one(T), d) + +@inline roundi{T<:IEEEFloat}(x::T) = unsafe_trunc(Int, round(x)) + +@inline integer2float(::Type{Float64}, m::Int) = reinterpret(Float64, (m % Int64) << significand_bits(Float64)) +@inline integer2float(::Type{Float32}, m::Int) = reinterpret(Float32, (m % Int32) << significand_bits(Float32)) + +@inline float2integer(d::Float64) = (reinterpret(Int64, d) >> significand_bits(Float64)) % Int +@inline float2integer(d::Float32) = (reinterpret(Int32, d) >> significand_bits(Float32)) % Int + +@inline pow2i{T<:IEEEFloat}(::Type{T}, q::Int) = integer2float(T, q + exponent_bias(T)) + +# sqrt without the domain checks which we don't need since we handle the checks ourselves +_sqrt{T<:IEEEFloat}(x::T) = Base.sqrt_llvm_fast(x) + +@inline ispinf{T<:IEEEFloat}(x::T) = x == T(Inf) +@inline isninf{T<:IEEEFloat}(x::T) = x == -T(Inf) + +# Similar to @horner, but split into even and odd coefficients. This is typically less +# accurate, but faster due to out of order execution. +macro horner_split(x,p...) + t1 = gensym("x1") + t2 = gensym("x2") + blk = quote + $t1 = $(esc(x)) + $t2 = $(esc(x)) * $(esc(x)) + end + n = length(p) + p0 = esc(p[1]) + if isodd(n) + ex_o = esc(p[end-1]) + ex_e = esc(p[end]) + for i = n-3:-2:2 + ex_o = :(muladd($(t2), $ex_o, $(esc(p[i])))) + end + for i = n-2:-2:2 + ex_e = :(muladd($(t2), $ex_e, $(esc(p[i])))) + end + elseif iseven(n) + ex_o = esc(p[end]) + ex_e = esc(p[end-1]) + for i = n-2:-2:2 + ex_o = :(muladd($(t2), $ex_o, $(esc(p[i])))) + end + for i = n-3:-2:2 + ex_e = :(muladd($(t2), $ex_e, $(esc(p[i])))) + end + end + push!(blk.args,:($(p0) + $(t1)*$(ex_o) + $(t2)*$(ex_e))) + blk +end diff --git a/test/accuracy.jl b/test/accuracy.jl new file mode 100644 index 0000000..c7e335b --- /dev/null +++ b/test/accuracy.jl @@ -0,0 +1,177 @@ + +MRANGE(::Type{Float64}) = 10000000 +MRANGE(::Type{Float32}) = 10000 +IntF(::Type{Float64}) = Int64 +IntF(::Type{Float32}) = Int32 + + +@testset "Accuracy (max error in ulp) for $T" for T in (Float64, Float32) + println("Accuracy tests for $T") + + + xx = map(T, vcat(-10:0.0002:10, -1000:0.1:1000)) + fun_table = Dict(Sleef.exp => Base.exp) + tol = 1 + test_acc(T, fun_table, xx, tol) + + + + # xx = map(T, vcat(-10:0.0002:10, -1000:0.02:1000)) + # fun_table = Dict(Sleef.asinh => Base.asinh, Sleef.atanh => Base.atanh) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + + # xx = map(T, vcat(1:0.0002:10, 1:0.02:1000)) + # fun_table = Dict(Sleef.acosh => Base.acosh) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + + # xx = T[] + # for i = 1:10000 + # s = reinterpret(T, reinterpret(IntF(T), T(pi)/4 * i) - IntF(T)(20)) + # e = reinterpret(T, reinterpret(IntF(T), T(pi)/4 * i) + IntF(T)(20)) + # d = s + # while d <= e + # append!(xx, d) + # d = reinterpret(T, reinterpret(IntF(T), d) + IntF(T)(1)) + # end + # end + # xx = append!(xx, -10:0.0002:10) + # xx = append!(xx, -MRANGE(T):200.1:MRANGE(T)) + + # fun_table = Dict(Sleef.sin => Base.sin, Sleef.cos => Base.cos, Sleef.tan => Base.tan) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + # fun_table = Dict(Sleef.sin_fast => Base.sin, Sleef.cos_fast => Base.cos, Sleef.tan_fast => Base.tan) + # tol = 4 + # test_acc(T, fun_table, xx, tol) + + + # sin_sincos_fast(x) = Sleef.sincos_fast(x)[1] + # cos_sincos_fast(x) = Sleef.sincos_fast(x)[2] + # fun_table = Dict(sin_sincos_fast => Base.sin, cos_sincos_fast => Base.cos) + # tol = 4 + # test_acc(T, fun_table, xx, tol) + + # sin_sincos(x) = Sleef.sincos(x)[1] + # cos_sincos(x) = Sleef.sincos(x)[2] + # fun_table = Dict(sin_sincos => Base.sin, cos_sincos => Base.cos) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + + # xx = map(T, vcat(-1:0.00002:1)) + # fun_table = Dict(Sleef.asin_fast => Base.asin, Sleef.acos_fast => Base.acos) + # tol = 3 + # test_acc(T, fun_table, xx, tol) + + # fun_table = Dict(Sleef.asin => asin, Sleef.acos => Base.acos) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + + # xx = map(T, vcat(-10:0.0002:10, -10000:0.2:10000, -10000:0.201:10000)) + # fun_table = Dict(Sleef.atan_fast => Base.atan) + # tol = 3 + # test_acc(T, fun_table, xx, tol) + + # fun_table = Dict(Sleef.atan => Base.atan) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + + # xx1 = map(Tuple{T,T}, [zip(-10:0.050:10, -10:0.050:10)...]) + # xx2 = map(Tuple{T,T}, [zip(-10:0.051:10, -10:0.052:10)...]) + # xx3 = map(Tuple{T,T}, [zip(-100:0.51:100, -100:0.51:100)...]) + # xx4 = map(Tuple{T,T}, [zip(-100:0.51:100, -100:0.52:100)...]) + # xx = vcat(xx1, xx2, xx3, xx4) + + # fun_table = Dict(Sleef.atan2_fast => Base.atan2) + # tol = 2.5 + # test_acc(T, fun_table, xx, tol) + + # fun_table = Dict(Sleef.atan2 => Base.atan2) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + + # xx = map(T, vcat(0.0001:0.0001:10, 0.001:0.1:10000, 1.1.^(-1000:1000), 2.1.^(-1000:1000))) + # fun_table = Dict(Sleef.log_fast => Base.log) + # tol = 3 + # test_acc(T, fun_table, xx, tol) + + # fun_table = Dict(Sleef.log => Base.log) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + + # xx = map(T, vcat(0.0001:0.0001:10, 0.0001:0.1:10000)) + # fun_table = Dict(Sleef.log10 => Base.log10, Sleef.log2 => Base.log2) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + + # xx = map(T, vcat(0.0001:0.0001:10, 0.0001:0.1:10000, 10.0.^-(0:0.02:300), -10.0.^-(0:0.02:300))) + # fun_table = Dict(Sleef.log1p => Base.log1p) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + + # xx1 = map(Tuple{T,T}, [(x,y) for x = -100:0.20:100, y = 0.1:0.20:100])[:] + # xx2 = map(Tuple{T,T}, [(x,y) for x = -100:0.21:100, y = 0.1:0.22:100])[:] + # xx3 = map(Tuple{T,T}, [(x,y) for x = 2.1, y = -1000:0.1:1000]) + # xx = vcat(xx1, xx2, xx2) + # fun_table = Dict(Sleef.pow => Base.:^) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + + # xx = map(T, vcat(-10000:0.2:10000, 1.1.^(-1000:1000), 2.1.^(-1000:1000))) + # fun_table = Dict(Sleef.cbrt_fast => Base.cbrt) + # tol = 2 + # test_acc(T, fun_table, xx, tol) + + # fun_table = Dict(Sleef.cbrt => Base.cbrt) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + + + # xx = map(T, vcat(-10:0.0002:10, -120:0.023:1000, -1000:0.02:2000)) + # fun_table = Dict(Sleef.exp2 => Base.exp2) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + + # xx = map(T, vcat(-10:0.0002:10, -35:0.023:1000, -300:0.01:300)) + # fun_table = Dict(Sleef.exp10 => Base.exp10) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + + # xx = map(T, vcat(-10:0.0002:10, -1000:0.021:1000, -1000:0.023:1000, + # 10.0.^-(0:0.02:300), -10.0.^-(0:0.02:300), 10.0.^(0:0.021:300), -10.0.^-(0:0.021:300))) + # fun_table = Dict(Sleef.expm1 => Base.expm1) + # tol = 2 + # test_acc(T, fun_table, xx, tol) + + + # xx = map(T, vcat(-10:0.0002:10, -1000:0.02:1000)) + # fun_table = Dict(Sleef.sinh => Base.sinh, Sleef.cosh => Base.cosh, Sleef.tanh => Base.tanh) + # tol = 1 + # test_acc(T, fun_table, xx, tol) + + + + @testset "xilog2 at arbitrary values" begin + xd = Dict{T,Int}(T(1e-30) => -100, T(2.31e-11) => -36, T(-1.0) => 0, T(1.0) => 0, + T(2.31e11) => 37, T(1e30) => 99) + for (i,j) in xd + @test Sleef.ilog2(i) === j + end + end + +end diff --git a/test/dnml_nan.jl b/test/dnml_nan.jl new file mode 100644 index 0000000..4dd1a24 --- /dev/null +++ b/test/dnml_nan.jl @@ -0,0 +1,437 @@ +@testset "denormal/nonnumber $T" for T in (Float32, Float64) + +@testset "denormal/nonnumber $xatan2" for xatan2 in (Sleef.atan2_fast, Sleef.atan2) + + @test xatan2(T(0.0), T(-0.0)) === T(pi) + @test xatan2(T(-0.0), T(-0.0)) === -T(pi) + @test ispzero(xatan2(T(0.0), T(0.0))) + @test isnzero(xatan2(T(-0.0), T(0.0))) + @test xatan2( T(Inf), -T(Inf)) === T(3*pi/4) + @test xatan2(-T(Inf), -T(Inf)) === T(-3*pi/4) + @test xatan2( T(Inf), T(Inf)) === T(pi/4) + @test xatan2(-T(Inf), T(Inf)) === T(-pi/4) + + + y = T(0.0) + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5] + for x in xa + @test xatan2(y,x) === T(pi) + end + + + y = T(-0.0) + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5] + for x in xa + @test xatan2(y,x) === T(-pi) + end + + + ya = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5] + xa = T[T(0.0), T(-0.0)] + for x in xa, y in ya + @test xatan2(y,x) === T(-pi/2) + end + + + ya = T[100000.5, 100000, 3, 2.5, 2, 1.5, 1.0, 0.5] + xa = T[T(0.0), T(-0.0)] + for x in xa, y in ya + @test xatan2(y,x) === T(pi/2) + end + + + y = T(Inf) + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + for x in xa + @test xatan2(y,x) === T(pi/2) + end + + + y = T(-Inf) + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + for x in xa + @test xatan2(y,x) === T(-pi/2) + end + + + ya = T[0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + x = T(Inf) + for y in ya + @test ispzero(xatan2(y,x)) + end + + + ya = T[-0.5, -1.5, -2.0, -2.5, -3.0, -100000, -100000.5] + x = T(Inf) + for y in ya + @test isnzero(xatan2(y,x)) + end + + + ya = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, NaN] + x = T(NaN) + for y in ya + @test isnan(xatan2(y,x)) + end + + + y = T(NaN) + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, NaN] + for x in xa + @test isnan(xatan2(y,x)) + end + +end # denormal/nonumber atan2 + + +@testset "denormal/nonnumber xpow" begin + + @test Sleef.pow(T(1), T(NaN)) === T(1) + @test Sleef.pow( T(NaN), T(0)) === T(1) + @test Sleef.pow(-T(1), T(Inf)) === T(1) + @test Sleef.pow(-T(1), T(-Inf)) === T(1) + + + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5] + ya = T[-100000.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 100000.5] + for x in xa, y in ya + @test isnan(Sleef.pow(x,y)) + end + + + x = T(NaN) + ya = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + for y in ya + @test isnan(Sleef.pow(x,y)) + end + + + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + y = T(NaN) + for x in xa + @test isnan(Sleef.pow(x,y)) + end + + + x = T(0.0) + ya = T[1, 3, 5, 7, 100001] + for y in ya + @test ispzero(Sleef.pow(x,y)) + end + + + x = T(-0.0) + ya = T[1, 3, 5, 7, 100001] + for y in ya + @test isnzero(Sleef.pow(x,y)) + end + + + xa = T[0.0, -0.0] + ya = T[0.5, 1.5, 2.0, 2.5, 4.0, 100000, 100000.5] + for x in xa, y in ya + @test ispzero(Sleef.pow(x,y)) + end + + + xa = T[-0.999, -0.5, -0.0, +0.0, +0.5, +0.999] + y = T(-Inf) + for x in xa + @test Sleef.pow(x,y) === T(Inf) + end + + + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + y = T(-Inf) + for x in xa + @test ispzero(Sleef.pow(x,y)) + end + + + xa = T[-0.999, -0.5, -0.0, +0.0, +0.5, +0.999] + y = T(Inf) + for x in xa + @test ispzero(Sleef.pow(x,y)) + end + + + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + y = T(Inf) + for x in xa + @test Sleef.pow(x,y) === T(Inf) + end + + + x = T(-Inf) + ya = T[-100001, -5, -3, -1] + for y in ya + @test isnzero(Sleef.pow(x,y)) + end + + + x = T(-Inf) + ya = T[-100000.5, -100000, -4, -2.5, -2, -1.5, -0.5] + for y in ya + @test ispzero(Sleef.pow(x,y)) + end + + + x = T(-Inf) + ya = T[1, 3, 5, 7, 100001] + for y in ya + @test Sleef.pow(x,y) === T(-Inf) + end + + + x = T(-Inf) + ya = T[0.5, 1.5, 2, 2.5, 3.5, 4, 100000, 100000.5] + for y in ya + @test Sleef.pow(x,y) === T(Inf) + end + + + x = T(Inf) + ya = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5] + for y in ya + @test ispzero(Sleef.pow(x,y)) + end + + + x = T(Inf) + ya = T[0.5, 1, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + for y in ya + @test Sleef.pow(x,y) === T(Inf) + end + + + x = T(0.0) + ya = T[-100001, -5, -3, -1] + for y in ya + @test Sleef.pow(x,y) === T(Inf) + end + + + x = T(-0.0) + ya = T[-100001, -5, -3, -1] + for y in ya + @test Sleef.pow(x,y) === T(-Inf) + end + + + xa = T[0.0, -0.0] + ya = T[-100000.5, -100000, -4, -2.5, -2, -1.5, -0.5] + for x in xa, y in ya + @test Sleef.pow(x,y) === T(Inf) + end + + + xa = T[1000, -1000] + ya = T[1000, 1000.5, 1001] + for x in xa, y in ya + @test cmpdenorm(Sleef.pow(x,y), Base.:^(BigFloat(x),BigFloat(y))) + end + +end # denormal/nonumber pow + + +fun_table = Dict(Sleef.sin_fast => Base.sin, Sleef.sin => Base.sin) +@testset "denormal/nonnumber $xtrig" for (xtrig, trig) in fun_table + xa = T[NaN, -0.0, 0.0, Inf, -Inf] + for x in xa + @test cmpdenorm(xtrig(x), trig(BigFloat(x))) + end +end + + +fun_table = Dict(Sleef.cos_fast => Base.cos, Sleef.cos => Base.cos) +@testset "denormal/nonnumber $xtrig" for (xtrig, trig) in fun_table + xa = T[NaN, Inf, -Inf] + for x in xa + @test cmpdenorm(xtrig(x), trig(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber sin in $xsincos"for xsincos in (Sleef.sincos_fast, Sleef.sincos) + xa = T[NaN, -0.0, 0.0, Inf, -Inf] + for x in xa + q = xsincos(x)[1] + @test cmpdenorm(q, Base.sin(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber cos in $xsincos"for xsincos in (Sleef.sincos_fast, Sleef.sincos) + xa = T[NaN, Inf, -Inf] + for x in xa + q = xsincos(x)[2] + @test cmpdenorm(q, Base.cos(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber $xtan" for xtan in (Sleef.tan_fast, Sleef.tan) + xa = T[NaN, Inf, -Inf, -0.0, 0.0, pi/2, -pi/2] + for x in xa + @test cmpdenorm(xtan(x), Base.tan(BigFloat(x))) + end +end + + +fun_table = Dict(Sleef.asin => Base.asin, Sleef.asin_fast => Base.asin, Sleef.acos => Base.acos, Sleef.acos_fast => Base.acos) +@testset "denormal/nonnumber $xatrig" for (xatrig, atrig) in fun_table + xa = T[NaN, Inf, -Inf, 2, -2, 1, -1] + for x in xa + @test cmpdenorm(xatrig(x), atrig(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber $xatan" for xatan in (Sleef.atan, Sleef.atan_fast) + xa = T[NaN, Inf, -Inf] + for x in xa + @test cmpdenorm(xatan(x), Base.atan(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber exp" begin + xa = T[NaN, Inf, -Inf, 10000, -10000] + for x in xa + @test cmpdenorm(Sleef.exp(x), Base.exp(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber sinh" begin + xa = T[NaN, 0.0, -0.0, Inf, -Inf, 10000, -10000] + for x in xa + @test cmpdenorm(Sleef.sinh(x), Base.sinh(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber cosh" begin + xa = T[NaN, 0.0, -0.0, Inf, -Inf, 10000, -10000] + for x in xa + @test cmpdenorm(Sleef.cosh(x), Base.cosh(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber tanh" begin + xa = T[NaN, 0.0, -0.0, Inf, -Inf, 10000, -10000] + for x in xa + @test cmpdenorm(Sleef.tanh(x), Base.tanh(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber asinh" begin + xa = T[NaN, 0.0, -0.0, Inf, -Inf, 10000, -10000] + for x in xa + @test cmpdenorm(Sleef.asinh(x), Base.asinh(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber acosh" begin + xa = T[NaN, 0.0, -0.0, 1.0, Inf, -Inf, 10000, -10000] + for x in xa + @test cmpdenorm(Sleef.acosh(x), Base.acosh(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber atanh" begin + xa = T[NaN, 0.0, -0.0, 1.0, -1.0, Inf, -Inf, 10000, -10000] + for x in xa + @test cmpdenorm(Sleef.atanh(x), Base.atanh(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber $xcbrt" for xcbrt = (Sleef.cbrt, Sleef.cbrt_fast) + xa = T[NaN, Inf, -Inf, 0.0, -0.0] + for x in xa + @test cmpdenorm(Sleef.cbrt(x), Base.cbrt(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber exp2" begin + xa = T[NaN, Inf, -Inf] + for x in xa + @test cmpdenorm(Sleef.exp2(x), Base.exp2(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber exp10" begin + xa = T[NaN, Inf, -Inf] + for x in xa + @test cmpdenorm(Sleef.exp10(x), Base.exp10(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber expm1" begin + xa = T[NaN, Inf, -Inf] + for x in xa + @test cmpdenorm(Sleef.expm1(x), Base.expm1(BigFloat(x))) + end +end + + + +@testset "denormal/nonnumber $xlog" for xlog in (Sleef.log, Sleef.log_fast) + xa = T[NaN, Inf, -Inf, 0, -1] + for x in xa + @test cmpdenorm(xlog(x), Base.log(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber log10" begin + xa = T[NaN, Inf, -Inf, 0, -1] + for x in xa + @test cmpdenorm(Sleef.log10(x), Base.log10(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber log2" begin + xa = T[NaN, Inf, -Inf, 0, -1] + for x in xa + @test cmpdenorm(Sleef.log2(x), Base.log2(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber log1p" begin + xa = T[NaN, Inf, -Inf, 0.0, -0.0, -1.0, -2.0] + for x in xa + @test cmpdenorm(Sleef.log1p(x), Base.log1p(BigFloat(x))) + end +end + + +@testset "denormal/nonnumber ldexp" begin + for i = -10000:10000 + a = Sleef.ldexp(T(1.0), i) + b = Base.ldexp(BigFloat(1.0), i) + @test (isfinite(b) && a == b || cmpdenorm(a,b)) + end +end + + +@testset "denormal/nonnumber ilog2" begin + @test Sleef.ilog2(+T(Inf)) == typemax(Int) + @test Sleef.ilog2(-T(Inf)) == typemax(Int) + @test Sleef.ilog2(+T(0.0)) == typemin(Int) + @test Sleef.ilog2(-T(0.0)) == typemin(Int) + @test Sleef.ilog2( T(NaN)) == typemax(Int) +end + + +end #denormal/nonnumber diff --git a/test/runtests.jl b/test/runtests.jl new file mode 100644 index 0000000..166f6c9 --- /dev/null +++ b/test/runtests.jl @@ -0,0 +1,115 @@ +using Sleef +using Base.Test + +isnzero{T<:AbstractFloat}(x::T) = signbit(x) +ispzero{T<:AbstractFloat}(x::T) = !signbit(x) + +function cmpdenorm{Tx<:AbstractFloat, Ty<:AbstractFloat}(x::Tx, y::Ty) + sizeof(Tx) < sizeof(Ty) ? y = Tx(y) : x = Ty(x) # cast larger type to smaller type + (isnan(x) && isnan(y)) && return true + (isnan(x) || isnan(y)) && return false + (isinf(x) != isinf(y)) && return false + (x == Tx(Inf) && y == Ty(Inf)) && return true + (x == Tx(-Inf) && y == Ty(-Inf)) && return true + if y == 0 + (ispzero(x) && ispzero(y)) && return true + (isnzero(x) && isnzero(y)) && return true + return false + end + (!isnan(x) && !isnan(y) && !isinf(x) && !isinf(y)) && return sign(x) == sign(y) + return false +end + +# the following compares the ulp between x and y. +# First it promotes them to the larger of the two types x,y +infh(::Type{Float64}) = 1e+300 +infh(::Type{Float32}) = 1e+37 +function countulp(T, x::AbstractFloat, y::AbstractFloat) + X,Y = promote(x,y) + x, y = T(X), T(Y) # Cast to smaller type + (isnan(x) && isnan(y)) && return 0 + (isnan(x) || isnan(y)) && return 10000 + if isinf(x) + (sign(x) == sign(y) && abs(y) > infh(T)) && return 0 # Relaxed infinity handling + return 10001 + end + (x == Inf && y == Inf) && return 0 + (x == -Inf && y == -Inf) && return 0 + if y == 0 + (x == 0) && return 0 + return 10002 + end + if isfinite(x) && isfinite(y) + return T(abs(X - Y)/eps(y)) + end + return 10003 +end +countulp{T<:AbstractFloat}(x::T, y::T) = countulp(T,x,y) + +# get rid off annoying warnings from overwritten function +macro nowarn(expr) + quote + stderr = STDERR + stream = open("null", "w") + redirect_stderr(stream) + result = $(esc(expr)) + redirect_stderr(stderr) + close(stream) + result + end +end + +# overide domain checking that base adheres to +using Base.MPFR.ROUNDING_MODE +for f in (:sin, :cos, :tan, :asin, :acos, :atan, :asinh, :acosh, :atanh, :log, :log10, :log2, :log1p) + @eval begin + import Base.$f + @nowarn function ($f)(x::BigFloat) + z = BigFloat() + ccall($(string(:mpfr_,f), :libmpfr), Int32, (Ptr{BigFloat}, Ptr{BigFloat}, Int32), &z, &x, ROUNDING_MODE[]) + return z + end + end +end + +strip_module_name(f::Function) = last(split(string(f), '.')) # strip module name from function f + +# test the accuracy of a function where fun_table is a Dict mapping the function you want +# to test to a reference function +# xx is an array of values (which may be tuples for multiple arugment functions) +# tol is the acceptable tolerance to test against +function test_acc(T, fun_table, xx, tol; debug=false, tol_debug=5) + @testset "accuracy $(strip_module_name(xfun))" for (xfun, fun) in fun_table + rmax = 0.0 + rmean = 0.0 + xmax = map(zero, first(xx)) + for x in xx + q = xfun(x...) + c = fun(map(BigFloat,x)...) + u = countulp(T,q,c) + rmax = max(rmax, u) + xmax = rmax == u ? x : xmax + rmean += u + if debug && u > tol_debug + @printf("%s = %.20g\n%s = %.20g\nx = %.20g\nulp = %g\n", strip_module_name(xfun), q, strip_module_name(fun), T(c), x, ulp(T(c))) + end + end + rmean = rmean/length(xx) + + t = @test trunc(rmax,1) <= tol + + fmtxloc = isa(xmax, Tuple) ? string('(', join((@sprintf("%.5f", x) for x in xmax), ", "), ')') : @sprintf("%.5f", xmax) + println(rpad(strip_module_name(xfun), 18, " "), ": max ", @sprintf("%f", rmax), + rpad(" at x = "*fmtxloc, 40, " "), + ": mean ", @sprintf("%f", rmean)) + end +end + +function runtests() + @testset "Sleef" begin + include(joinpath(@__DIR__,"dnml_nan.jl")) + include(joinpath(@__DIR__,"accuracy.jl")) + end +end + +runtests()