In [67]:
using Revise, Pkg
Pkg.activate("/home/louise/MSA/BpAlignGpu.jl")
using BpAlignGpu

[32m[1m  Activating[22m[39m project at `~/MSA/BpAlignGpu.jl`


In [68]:
using CUDA
CUDA.device!(1)

CuDevice(1): TITAN RTX

In [69]:
q=21
ctype=Symbol("amino")
typel=Symbol("bm")
T = Float32
muext = 0.50;
muint = 2.50;

In [70]:
damp=T(0.0)
tol=T(1e-5)
tolnorm=T(1e-5)
tmax=10
upscheme=:sequential # :random or :sequential
lr=:sce  # :sce or :mf 
beta=T(1.0)
verbose=true
pa = ParamAlgo(damp, tol, tolnorm, tmax, upscheme, lr, beta, verbose)



ParamAlgo{Float32}
-------------
damp=0.0
tol=1.0e-5
tolnorm=1.0e-5
tmax=10
upscheme=sequential
lr=sce
beta=1.0
verbose=true
-------------

In [71]:
(N, L) = (6, 5) 
#(N, L) = (161, 67)

using Random
header = "myseq"
myseq = randstring('A':'Z', N)
seq = Seq(header, myseq, ctype)

lambda_o = ones(L)
lambda_e = ones(L)
H = rand(q,L)
J = rand(q,q,L,L)
J = J .+ permutedims(J, (2,1,4,3));

In [72]:
pm = ParamModel{T}(N, L, q, muint, muext, lambda_o, lambda_e, H, J)

ParamModel{Float32}[L=5 N=6 q=21 size=43.547 KiB]

In [73]:
bpm = BPMessages(seq, pm)

BPMessages{Float32}[L=5 N=6 ongpu=true size=26.914 KiB]

In [74]:
bpb = BPBeliefs(N, L)

BPBeliefs{Float32}[L=5 N=6 ongpu=true size=30.625 KiB]

In [75]:
lrf = LongRangeFields(N, L)

LongRangeFields{Float32}[L=5 N=6 ongpu=true size=5.312 KiB]

In [76]:
af = AllFields(bpm, bpb, lrf)

AllFields{Float32}[L=5 N=6 ongpu=true size=62.852 KiB]

In [77]:
@time BpAlignGpu.update_F!(af, pm, pa)
@time BpAlignGpu.update_hF!(af, pm, pa)
@time BpAlignGpu.update_B!(af, pm, pa)
@time BpAlignGpu.update_hB!(af, pm, pa)
@time BpAlignGpu.update_beliefs!(af, pm, pa)
@time BpAlignGpu.update_jointchain!(af, pm, pa)
@time BpAlignGpu.update_conditional_chain!(af, pa)
@time BpAlignGpu.update_conditional_all!(af, pm)


  0.001281 seconds (752 allocations: 47.906 KiB)
  0.000916 seconds (777 allocations: 49.203 KiB)
  0.000791 seconds (752 allocations: 48.062 KiB)
  0.001097 seconds (824 allocations: 52.562 KiB)
  0.000523 seconds (378 allocations: 24.812 KiB)
  0.000795 seconds (417 allocations: 30.656 KiB)
  0.000452 seconds (372 allocations: 22.000 KiB)
  0.000708 seconds (528 allocations: 30.375 KiB)


In [78]:
@time resg = BpAlignGpu.update_g_lowmem!(af);

  0.001281 seconds (932 allocations: 48.281 KiB)


In [79]:
@time BpAlignGpu.update_g!(af)

  0.001232 seconds (849 allocations: 44.562 KiB)


In [80]:
sum(abs.(af.lrf.g - resg))

5.38826f-5

In [184]:
@time m1 = BpAlignGpu.update_f!(af);

  2.314681 seconds (2.85 M allocations: 148.348 MiB, 3.40% gc time, 85.67% compilation time)


In [185]:
@time resf = BpAlignGpu.update_f_lowmem!(af);

  0.000642 seconds (405 allocations: 21.766 KiB)


In [186]:
sum(abs.(resf - m1))

false

In [124]:
sum(abs.(af.lrf.f - resf))

119.821396f0

In [113]:
np1 = size(af.bpb.conditional, 1)
cond = reshape(permutedims(af.bpb.conditional, (1, 2, 5, 3, 4, 6)), L * 2 * np1, L * 2 * np1);

In [114]:
i=2
xi=1
ri = (1+(xi-1)np1+(i-1)*2*np1):(np1+(xi-1)np1+(i-1)*2*np1)
j=5
xj=2
rj = (1+(xj-1)np1+(j-1)*2*np1):(np1+(xj-1)np1+(j-1)*2*np1)

73:80

In [115]:
cond[ri, rj]

8×8 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 0.0  1.0  0.998874    0.967665     0.899388    0.857904    0.562443     0.0
 0.0  0.0  3.27287f-5  0.00092012   0.00278995  0.00304471  0.00240695   0.0
 0.0  0.0  0.0         0.000341878  0.00191536  0.00461174  0.00644836   0.0
 0.0  0.0  0.0         0.0          3.64467f-5  0.00039279  0.00357562   0.0
 0.0  0.0  0.0         0.0          0.0         4.3499f-6   0.000248098  0.0
 0.0  0.0  0.0         0.0          0.0         0.0         8.52414f-5   0.0
 0.0  0.0  0.0         0.0          0.0         0.0         0.0          0.0
 0.0  0.0  0.0         0.0          0.0         0.0         0.0          0.0

In [116]:
af.bpb.conditional[:, xi, :, xj, i, j]

8×8 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 0.0  1.0  0.998874    0.967665     0.899388    0.857904    0.562443     0.0
 0.0  0.0  3.27287f-5  0.00092012   0.00278995  0.00304471  0.00240695   0.0
 0.0  0.0  0.0         0.000341878  0.00191536  0.00461174  0.00644836   0.0
 0.0  0.0  0.0         0.0          3.64467f-5  0.00039279  0.00357562   0.0
 0.0  0.0  0.0         0.0          0.0         4.3499f-6   0.000248098  0.0
 0.0  0.0  0.0         0.0          0.0         0.0         8.52414f-5   0.0
 0.0  0.0  0.0         0.0          0.0         0.0         0.0          0.0
 0.0  0.0  0.0         0.0          0.0         0.0         0.0          0.0

In [117]:
C = cond';

In [118]:
cond[ri, rj]

8×8 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 0.0  1.0  0.998874    0.967665     0.899388    0.857904    0.562443     0.0
 0.0  0.0  3.27287f-5  0.00092012   0.00278995  0.00304471  0.00240695   0.0
 0.0  0.0  0.0         0.000341878  0.00191536  0.00461174  0.00644836   0.0
 0.0  0.0  0.0         0.0          3.64467f-5  0.00039279  0.00357562   0.0
 0.0  0.0  0.0         0.0          0.0         4.3499f-6   0.000248098  0.0
 0.0  0.0  0.0         0.0          0.0         0.0         8.52414f-5   0.0
 0.0  0.0  0.0         0.0          0.0         0.0         0.0          0.0
 0.0  0.0  0.0         0.0          0.0         0.0         0.0          0.0

In [119]:
C[ri, rj]

8×8 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 0.0  0.00021023  0.00569974   0.0418579    …  0.161311     0.144922   0.0
 0.0  0.0         0.000876587  0.0147588       0.226081     0.155462   0.0
 0.0  0.0         0.0          0.000810509     0.233398     0.212234   0.0
 0.0  0.0         0.0          0.0             0.024813     0.214387   0.0
 0.0  0.0         0.0          0.0             0.000289854  0.060285   0.0
 0.0  0.0         0.0          0.0          …  0.0          0.0164453  0.0
 0.0  0.0         0.0          0.0             0.0          0.0        0.0
 0.0  0.0         0.0          0.0             0.0          0.0        0.0

In [120]:
(af.bpb.conditional[:, xj, :, xi, j, i])'

8×8 adjoint(::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}) with eltype Float32:
 0.0  0.00021023  0.00569974   0.0418579    …  0.161311     0.144922   0.0
 0.0  0.0         0.000876587  0.0147588       0.226081     0.155462   0.0
 0.0  0.0         0.0          0.000810509     0.233398     0.212234   0.0
 0.0  0.0         0.0          0.0             0.024813     0.214387   0.0
 0.0  0.0         0.0          0.0             0.000289854  0.060285   0.0
 0.0  0.0         0.0          0.0          …  0.0          0.0164453  0.0
 0.0  0.0         0.0          0.0             0.0          0.0        0.0
 0.0  0.0         0.0          0.0             0.0          0.0        0.0

In [109]:
i=3
af.lrf.f[:,:,i]

2×8 adjoint(::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}) with eltype Float32:
 -3.99467  -3.77999  -4.20178  -3.81247  …  -3.87028  -2.08638  -4.11205
  0.0      -3.84679  -3.91734  -4.20628     -3.89669  -3.64134   0.0

In [66]:
resf[:,:,i]

8×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 -4.40205  -0.0
 -2.64382  -2.65172
 -2.85175  -2.71133
 -2.93834  -2.94286
 -1.60939  -1.61924
 -2.31549  -2.43716
 -1.93683  -3.13591
 -1.13989  -0.0

In [34]:
#resfm = BpAlignGpu.update_f_memory!(af);

In [35]:
#resgm = BpAlignGpu.update_g_memory!(af);

In [17]:
BpAlignGpu.one_bp_sweep!(af, pm, pa)

In [18]:
BpAlignGpu.test_sweep!(100,af,pm,pa)

t=1	 err=0.14632031
t=2	 err=0.2410874
t=3	 err=0.037026465
t=4	 err=0.016859531
t=5	 err=0.01909548
t=6	 err=0.0031093322
t=7	 err=0.0040263534
t=8	 err=0.0015021563
t=9	 err=0.0011902452
t=10	 err=0.0009968281
t=11	 err=0.00076025724
t=12	 err=0.000351429
t=13	 err=0.00020188093
t=14	 err=0.00010347366
t=15	 err=5.26011e-5
t=16	 err=2.4735928e-5
t=17	 err=1.5556812e-5
t=18	 err=1.335144e-5
t=19	 err=8.225441e-6
converged: err=8.225441e-6, tol=1.0e-5


In [19]:
BpAlignGpu.lr_freeen(af, pm)

6.364505648612976

In [20]:
BpAlignGpu.logZi(af, pm, pa)

(-21.221033f0, [-0.30802572;;; -6.5986495;;; -7.6639633;;; -6.644477;;; -0.005917739])

In [21]:
BpAlignGpu.logZa(af, pm, pa)

(10.904351f0, [1.9342438;;;;; 3.2062142;;;;; 3.659298;;;;; 2.104595;;;;; 0.0])

In [22]:
BpAlignGpu.logZia(af, pm)

-26.486605f0