In [1]:
using Revise, Pkg
Pkg.activate("/home/louise/MSA/BpAlignGpu.jl")
using BpAlignGpu

[32m[1m  Activating[22m[39m project at `~/MSA/BpAlignGpu.jl`
┌ Info: Precompiling BpAlignGpu [5a3eb610-29b2-4cbe-8ba2-ea97f65fa95d]
└ @ Base loading.jl:1423


In [2]:
using CUDA
CUDA.device!(1)

│ For performance reasons, it is recommended to upgrade to a driver that supports CUDA 11.2 or higher.
└ @ CUDA /home/louise/.julia/packages/CUDA/bki2w/src/initialization.jl:70


CuDevice(1): TITAN RTX

In [3]:
q=21
ctype=Symbol("amino")
typel=Symbol("bm")
T = Float32
muext = 0.50;
muint = 2.50;

In [4]:
damp=T(0.0)
tol=T(1e-5)
tolnorm=T(1e-5)
tmax=10
upscheme=:sequential # :random or :sequential
lr=:sce  # :sce or :mf 
beta=T(1.0)
verbose=true
pa = ParamAlgo(damp, tol, tolnorm, tmax, upscheme, lr, beta, verbose)



ParamAlgo{Float32}
-------------
damp=0.0
tol=1.0e-5
tolnorm=1.0e-5
tmax=10
upscheme=sequential
lr=sce
beta=1.0
verbose=true
-------------

In [5]:
(N, L) = (6, 5) 
#(N, L) = (161, 67)

using Random
header = "myseq"
myseq = randstring('A':'Z', N)
seq = Seq(header, myseq, ctype)

lambda_o = ones(L)
lambda_e = ones(L)
H = rand(q,L)
J = rand(q,q,L,L)
J = J .+ permutedims(J, (2,1,4,3));

In [6]:
pm = ParamModel{T}(N, L, q, muint, muext, lambda_o, lambda_e, H, J)

ParamModel{Float32}[L=5 N=6 q=21 size=43.547 KiB]

In [7]:
bpm = BPMessages(seq, pm)

BPMessages{Float32}[L=5 N=6 ongpu=true size=26.914 KiB]

In [8]:
bpb = BPBeliefs(N, L)

BPBeliefs{Float32}[L=5 N=6 ongpu=true size=30.625 KiB]

In [9]:
lrf = LongRangeFields(N, L)

LongRangeFields{Float32}[L=5 N=6 ongpu=true size=5.312 KiB]

In [10]:
af = AllFields(bpm, bpb, lrf)

AllFields{Float32}[L=5 N=6 ongpu=true size=62.852 KiB]

In [11]:
@time BpAlignGpu.update_F!(af, pm, pa)
@time BpAlignGpu.update_hF!(af, pm, pa)
@time BpAlignGpu.update_B!(af, pm, pa)
@time BpAlignGpu.update_hB!(af, pm, pa)
@time BpAlignGpu.update_beliefs!(af, pm, pa)
@time BpAlignGpu.update_jointchain!(af, pm, pa)
@time BpAlignGpu.update_conditional_chain!(af, pa)
@time BpAlignGpu.update_conditional_all!(af, pm)


 60.367893 seconds (70.79 M allocations: 3.682 GiB, 4.48% gc time, 52.67% compilation time)
  4.822871 seconds (7.21 M allocations: 387.819 MiB, 2.58% gc time, 74.42% compilation time)
  4.089363 seconds (5.66 M allocations: 300.234 MiB, 2.39% gc time, 90.89% compilation time)
  5.655932 seconds (8.95 M allocations: 474.858 MiB, 3.76% gc time, 87.86% compilation time)
  2.143898 seconds (3.30 M allocations: 174.266 MiB, 2.00% gc time, 80.40% compilation time)
 19.020379 seconds (31.35 M allocations: 1.570 GiB, 3.64% gc time, 89.71% compilation time)
  4.423168 seconds (6.73 M allocations: 353.698 MiB, 5.56% gc time, 89.26% compilation time)
  4.859362 seconds (4.74 M allocations: 248.938 MiB, 3.21% gc time, 76.33% compilation time)


In [19]:
@time resgo = BpAlignGpu.update_gold!(af);

  0.000517 seconds (319 allocations: 17.109 KiB)


In [20]:
@time resg = BpAlignGpu.update_g_lowmem!(af);

  0.001217 seconds (878 allocations: 44.672 KiB)


In [21]:
@time BpAlignGpu.update_g!(af)

  0.001108 seconds (813 allocations: 42.156 KiB)


In [27]:
sum(abs.(af.lrf.g - resg)), sum(abs.(resg[:,:,:,:,1:L-1] - resgo))

(6.747246f-5, 0.00013566017f0)

In [32]:
@time resfo = BpAlignGpu.update_fold!(af);

  0.000561 seconds (285 allocations: 14.219 KiB)


In [33]:
@time BpAlignGpu.update_f!(af);

  0.000814 seconds (327 allocations: 17.891 KiB)


In [34]:
@time resf = BpAlignGpu.update_f_lowmem!(af);

  0.000809 seconds (401 allocations: 20.875 KiB)


In [35]:
sum(abs.(af.lrf.f - resf)), sum(abs.(af.lrf.f - resfo)) 

(4.053116f-6, 4.7683716f-6)

In [17]:
BpAlignGpu.one_bp_sweep!(af, pm, pa)

In [18]:
BpAlignGpu.test_sweep!(100,af,pm,pa)

t=1	 err=0.14632031
t=2	 err=0.2410874
t=3	 err=0.037026465
t=4	 err=0.016859531
t=5	 err=0.01909548
t=6	 err=0.0031093322
t=7	 err=0.0040263534
t=8	 err=0.0015021563
t=9	 err=0.0011902452
t=10	 err=0.0009968281
t=11	 err=0.00076025724
t=12	 err=0.000351429
t=13	 err=0.00020188093
t=14	 err=0.00010347366
t=15	 err=5.26011e-5
t=16	 err=2.4735928e-5
t=17	 err=1.5556812e-5
t=18	 err=1.335144e-5
t=19	 err=8.225441e-6
converged: err=8.225441e-6, tol=1.0e-5


In [19]:
BpAlignGpu.lr_freeen(af, pm)

6.364505648612976

In [20]:
BpAlignGpu.logZi(af, pm, pa)

(-21.221033f0, [-0.30802572;;; -6.5986495;;; -7.6639633;;; -6.644477;;; -0.005917739])

In [21]:
BpAlignGpu.logZa(af, pm, pa)

(10.904351f0, [1.9342438;;;;; 3.2062142;;;;; 3.659298;;;;; 2.104595;;;;; 0.0])

In [22]:
BpAlignGpu.logZia(af, pm)

-26.486605f0