/
Stochasticity.jl
92 lines (70 loc) · 3.62 KB
/
Stochasticity.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# Part of submodule Utils of BetaML - the Beta Machine Learning Toolkit
# Various helper/ utility functions concerning stochastiticy management
"""
FIXEDSEED
Fixed seed to allow reproducible results.
This is the seed used to obtain the same results under unit tests.
Use it with:
- `myAlgorithm(;rng=FIXEDRNG)` # always produce the same sequence of results on each run of the script ("pulling" from the same rng object on different calls)
- `myAlgorithm(;rng=copy(FIXEDRNG)` # always produce the same result (new rng object on each call)
"""
const FIXEDSEED = 123
"""
FIXEDRNG
Fixed ring to allow reproducible results
Use it with:
- `myAlgorithm(;rng=FIXEDRNG)` # always produce the same sequence of results on each run of the script ("pulling" from the same rng object on different calls)
- `myAlgorithm(;rng=copy(FIXEDRNG))` # always produce the same result (new rng object on each function call)
"""
const FIXEDRNG = StableRNG(FIXEDSEED) #StableRNG(FIXEDSEED) Random.default_rng() #MersenneTwister(FIXEDSEED)
#const FIXEDRNG = MersenneTwister(FIXEDSEED) #StableRNG(FIXEDSEED) Random.default_rng()
"""
generateParallelRngs(rng::AbstractRNG, n::Integer;reSeed=false)
For multi-threaded models, return n independent random number generators (one per thread) to be used in threaded computations.
Note that each ring is a _copy_ of the original random ring. This means that code that _use_ these RNGs will not change the original RNG state.
Use it with `rngs = generateParallelRngs(rng,Threads.nthreads())` to have a separate rng per thread.
By default the function doesn't re-seed the RNG, as you may want to have a loop index based re-seeding strategy rather than a threadid-based one (to guarantee the same result independently of the number of threads).
If you prefer, you can instead re-seed the RNG here (using the parameter `reSeed=true`), such that each thread has a different seed. Be aware however that the stream of number generated will depend from the number of threads at run time.
"""
function generateParallelRngs(rng::AbstractRNG, n::Integer;reSeed=false)
if reSeed
seeds = [rand(rng,100:18446744073709551615) for i in 1:n] # some RNGs have issues with too small seed
rngs = [deepcopy(rng) for i in 1:n]
return Random.seed!.(rngs,seeds)
else
return [deepcopy(rng) for i in 1:n]
end
end
"""
shuffle(data;dims,rng)
Shuffle a vector of n-dimensional arrays across dimension `dims` keeping the same order between the arrays
# Parameters
- `data`: The vector of arrays to shuffle
- `dims`: The dimension over to apply the shuffle [def: `1`]
- `rng`: An `AbstractRNG` to apply for the shuffle
# Notes
- All the arrays must have the same size for the dimension to shuffle
# Example
```
julia> a = [1 2 30; 10 20 30]; b = [100 200 300];
julia> (aShuffled, bShuffled) = shuffle([a,b],dims=2)
2-element Vector{Matrix{Int64}}:
[1 30 2; 10 30 20]
[100 300 200]
```
"""
function shuffle(data::AbstractArray{T,1};dims=1,rng=Random.GLOBAL_RNG) where T <: AbstractArray
Ns = [size(m,dims) for m in data]
length(Set(Ns)) == 1 || @error "In `shuffle(arrays)` all individual arrays need to have the same size on the dimension specified"
N = Ns[1]
ridx = Random.shuffle(rng, 1:N)
out = similar(data)
for (i,a) in enumerate(data)
aidx = convert(Vector{Union{UnitRange{Int64},Vector{Int64}}},[1:i for i in size(a)])
#aidx = [collect(1:i) for i in size(a)]
aidx[dims] = ridx
out[i] = a[aidx...]
end
return out
end
shuffle(rng::AbstractRNG,data::AbstractArray{T,1};dims=1) where T <: AbstractArray = shuffle(data;dims=dims,rng=rng)