/
utils.jl
174 lines (155 loc) · 5.1 KB
/
utils.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/usr/bin/julia
module Utils
# LGPLv3 implementation from libstdc++
function poly_laguerre_large_n{Tp}(n::Integer, α, x::Tp)
a::Tp = -n
b::Tp = α + 1
η::Tp = 2b - 4a
cos²th = x / η
sin²th = 1 - cos²th
costh = @fastmath sqrt(cos²th)
th = @fastmath acos(costh)
pre_h = (Tp(π / 2)^2) * η * η * cos²th * sin²th
lg_b = @fastmath lgamma(Tp(n + b))
lnfact = @fastmath lgamma(Tp(n + 1))
pre_term1 = @fastmath Tp(0.5) * (1 - b) * log(Tp(0.25) * x * η)
pre_term2 = @fastmath Tp(0.25) * log(pre_h)
lnpre = lg_b - lnfact + Tp(0.5) * x + pre_term1 - pre_term2
th2 = 2 * th
sin2th = @fastmath 2 * costh * sqrt(sin²th)
# From libstdc++
ser_term1 = 0 # @fastmath sinpi(a)
# This might be off by a minus sign or sth like that.
# Evaluating at `10000001, 10, 1.2` gives the wrong result
ser_term2 = @fastmath sin(Tp(0.25) * η * (th2 - sin2th) + Tp(π / 4))
ser::Tp = ser_term1 + ser_term2
return @fastmath exp(lnpre) * ser
end
function poly_laguerre_hyperg{Tp}(n::Integer, α, x::Tp)
b::Tp = Tp(α) + 1
mx = -x
tc_sgn::Tp = x < 0 ? 1 : ((n % 2 == 1) ? -1 : 1)
# Get |x|^n/n!
tc::Tp = 1
ax = abs(x)
for k in 1:n
tc *= ax / k
end
term::Tp = tc * tc_sgn
_sum::Tp = term
for k in (n - 1):-1:0
term *= ((b + Tp(k)) / Tp(n - k)) * Tp(k + 1) / mx
_sum += term
end
return _sum
end
function poly_laguerre_recursion{Tp}(n::Integer, α, x::Tp)
# Compute l_0.
l_0::Tp = 1
n == 0 && return l_0
# Compute l_1^alpha.
l_1::Tp = -x + 1 + α
n == 1 && return l_1
# Compute l_n^alpha by recursion on n.
l_n′::Tp = l_0
l_n::Tp = l_1
b::Tp = α - 1
a::Tp = b - x
@fastmath for nn in 2:n
fnn = Tp(nn)
l1 = muladd(a, l_n, -b * l_n′)
l2 = muladd(2, l_n, -l_n′)
l_n, l_n′ = l1 / fnn + l2, l_n
end
return l_n
end
function genlaguerre{Tp<:AbstractFloat}(n::Integer, α, x::Tp)::Tp
if x < 0
throw(DomainError())
elseif isnan(α) || isnan(x)
# Return NaN on NaN input.
return NaN
elseif n == 0
return 1
elseif n == 1
return Tp(1) + Tp(α) - x
elseif x == 0
prod::Tp = α + 1
for k in 2:n
prod *= Tp(α + k) / Tp(k)
end
return prod
elseif n > 10000000 && α > -1 && x < 2 * (α + 1) + 4n
return poly_laguerre_large_n(n, α, x)
elseif α >= 0 || (x > 0 && α < -(n + 1))
return poly_laguerre_recursion(n, α, x)
else
return poly_laguerre_hyperg(n, α, x)
end
end
genlaguerre(n::Integer, α, x) = genlaguerre(n, α, float(x))
function binomial_estimate{T<:AbstractFloat}(x, n, z::T=1.0)
if n <= 0
return T(0.5), T(0.5)
end
p = T(x / n)
z² = z^2
z²n = z² / n
p′::T = (p + z²n / 2) / (1 + z²n)
unc::T = sqrt(p * (1 - p) / n + z² / 4 / n^2) / (1 + z²n)
return p′, unc
end
@inline function sincos(v::Float64)
Base.llvmcall("""
%f = bitcast i8 *%1 to void (double, double *, double *)*
%pres = alloca [2 x double]
%p1 = getelementptr inbounds [2 x double], [2 x double]* %pres, i64 0, i64 0
%p2 = getelementptr inbounds [2 x double], [2 x double]* %pres, i64 0, i64 1
call void %f(double %0, double *nocapture noalias %p1, double *nocapture noalias %p2)
%res = load [2 x double], [2 x double]* %pres
ret [2 x double] %res
""", Tuple{Float64,Float64}, Tuple{Float64,Ptr{Void}}, v, cglobal((:sincos, Base.libm_name)))
end
@inline function sincos(v::Float32)
Base.llvmcall("""
%f = bitcast i8 *%1 to void (float, float *, float *)*
%pres = alloca [2 x float]
%p1 = getelementptr inbounds [2 x float], [2 x float]* %pres, i64 0, i64 0
%p2 = getelementptr inbounds [2 x float], [2 x float]* %pres, i64 0, i64 1
call void %f(float %0, float *nocapture noalias %p1, float *nocapture noalias %p2)
%res = load [2 x float], [2 x float]* %pres
ret [2 x float] %res
""", Tuple{Float32,Float32}, Tuple{Float32,Ptr{Void}}, v, cglobal((:sincosf, Base.libm_name)))
end
@inline function sincos(v)
@fastmath (sin(v), cos(v))
end
const ThreadRNG = MersenneTwister[]
const _interactive = Ref(true)
function __init__()
# Allocate the random number generator on the thread's own heap
# instead of the master thread heap to minimize memory conflict
nth = Threads.nthreads()
resize!(ThreadRNG, nth)
init_rng = function ()
tid = Threads.threadid()
ThreadRNG[tid] = MersenneTwister(0)
end
ccall(:jl_threading_run, Ref{Void}, (Any,), init_rng)
interactive_str = get(ENV, "NACS_INTERACT", "true")
if interactive_str == "false" || interactive_str == "0"
_interactive[] = false
end
end
@inline function thread_rng()
# Bypass bounds check and NULL check
Base.llvmcall(
"""
%p = load i8**, i8*** %0
ret i8** %p
""", MersenneTwister, Tuple{Ptr{Ptr{Ptr{Void}}}},
Ptr{Ptr{Ptr{Void}}}(pointer(ThreadRNG) + (Threads.threadid() - 1) * sizeof(Int)))
end
@inline trand() = rand(thread_rng())
@inline interactive() = _interactive[]
end