In [1]:
using Revise, Pkg
Pkg.activate("/home/louise/MSA/BpAlignGpu.jl")
using BpAlignGpu

[32m[1m  Activating[22m[39m project at `~/MSA/BpAlignGpu.jl`


In [2]:
q=21
ctype=Symbol("amino")
typel=Symbol("bm")
T = Float32
muext = 0.50;
muint = 2.50;

In [3]:
damp=T(0.0)
tol=T(1e-5)
tolnorm=T(1e-5)
tmax=10
upscheme=:sequential # :random or :sequential
lr=:sce  # :sce or :mf 
beta=T(1.0)
verbose=true
pa = ParamAlgo(damp, tol, tolnorm, tmax, upscheme, lr, beta, verbose)



ParamAlgo{Float32}
-------------
damp=0.0
tol=1.0e-5
tolnorm=1.0e-5
tmax=10
upscheme=sequential
lr=sce
beta=1.0
verbose=true
-------------


In [11]:
#compile on a small example
(N_s, L_s) = (20, 15) 
using Random
header_s = "myseq"
myseq_s = randstring('A':'Z', N_s)
seq_s = Seq(header_s, myseq_s, ctype)
lambda_o_s = ones(L_s)
lambda_e_s = ones(L_s)
H_s = rand(q,L_s)
J_s = rand(q,q,L_s,L_s)
J_s = J_s .+ permutedims(J_s, (2,1,4,3));
pm = ParamModel{T}(N_s, L_s, q, muint, muext, lambda_o_s, lambda_e_s, H_s, J_s)
bpm = BPMessages(seq_s, pm)
bpb = BPBeliefs(N_s, L_s)
lrf = LongRangeFields(N_s, L_s)
af = AllFields(bpm, bpb, lrf)


AllFields{Float32}[L=15 N=20 ongpu=true]

In [12]:
using CUDA

In [13]:
CUDA.@time BpAlignGpu.update_F!(af, pm, pa)
CUDA.@time BpAlignGpu.update_hF!(af, pm, pa)
CUDA.@time BpAlignGpu.update_B!(af, pm, pa)
CUDA.@time BpAlignGpu.update_hB!(af, pm, pa)
CUDA.@time BpAlignGpu.update_beliefs!(af, pm)
CUDA.@time BpAlignGpu.update_jointchain!(af, pm)
CUDA.@time BpAlignGpu.update_conditional_chain!(af, pa)
CUDA.@time BpAlignGpu.update_conditional_all!(af, pm)
CUDA.@time BpAlignGpu.update_f!(af)
CUDA.@time BpAlignGpu.update_g!(af)


  0.000541 seconds (342 CPU allocations: 19.328 KiB) (2 GPU allocations: 120 bytes, 13.04% memmgmt time)
  0.000547 seconds (367 CPU allocations: 20.531 KiB) (2 GPU allocations: 120 bytes, 13.20% memmgmt time)
  0.000440 seconds (342 CPU allocations: 19.438 KiB) (2 GPU allocations: 120 bytes, 14.16% memmgmt time)
  0.000607 seconds (413 CPU allocations: 23.594 KiB) (2 GPU allocations: 120 bytes, 11.89% memmgmt time)
  0.000300 seconds (173 CPU allocations: 10.375 KiB) (1 GPU allocation: 60 bytes, 15.70% memmgmt time)
  1.109341 seconds (590.22 k CPU allocations: 29.765 MiB, 4.65% gc time) (2 GPU allocations: 300 bytes, 0.49% memmgmt time)
  0.000605 seconds (342 CPU allocations: 19.688 KiB) (2 GPU allocations: 5.156 KiB, 12.72% memmgmt time)
  0.011041 seconds (8.19 k CPU allocations: 463.531 KiB) (182 GPU allocations: 1.344 MiB, 23.86% memmgmt time)
  0.083040 seconds (66.63 k CPU allocations: 3.554 MiB) (1 GPU allocation: 1.662 MiB, 0.91% memmgmt time)
  0.002929 seconds (322 CPU all

In [14]:
BpAlignGpu.one_bp_sweep!(af, pm, pa)

  0.000462 seconds (342 CPU allocations: 19.328 KiB) (2 GPU allocations: 120 bytes, 21.37% memmgmt time)
  0.000293 seconds (367 CPU allocations: 20.531 KiB) (2 GPU allocations: 120 bytes, 11.63% memmgmt time)
  0.000254 seconds (342 CPU allocations: 19.438 KiB) (2 GPU allocations: 120 bytes, 14.23% memmgmt time)
  0.000290 seconds (413 CPU allocations: 23.594 KiB) (2 GPU allocations: 120 bytes, 14.26% memmgmt time)
  0.000121 seconds (173 CPU allocations: 10.375 KiB) (1 GPU allocation: 60 bytes, 13.27% memmgmt time)
  0.000260 seconds (249 CPU allocations: 16.641 KiB) (2 GPU allocations: 300 bytes, 14.05% memmgmt time)
  0.000213 seconds (342 CPU allocations: 19.688 KiB) (2 GPU allocations: 5.156 KiB, 16.24% memmgmt time)
  0.006447 seconds (8.19 k CPU allocations: 463.531 KiB) (182 GPU allocations: 1.344 MiB, 28.33% memmgmt time)
  0.002662 seconds (297 CPU allocations: 14.844 KiB) (1 GPU allocation: 1.662 MiB, 18.83% memmgmt time)
  0.002435 seconds (322 CPU allocations: 16.859 KiB)

In [16]:
BpAlignGpu.test_sweep!(100,af,pm,pa)

  0.000557 seconds (342 CPU allocations: 19.328 KiB) (2 GPU allocations: 120 bytes, 18.99% memmgmt time)
  0.000419 seconds (367 CPU allocations: 20.531 KiB) (2 GPU allocations: 120 bytes, 12.02% memmgmt time)
  0.000375 seconds (342 CPU allocations: 19.438 KiB) (2 GPU allocations: 120 bytes, 12.94% memmgmt time)
  0.000321 seconds (413 CPU allocations: 23.594 KiB) (2 GPU allocations: 120 bytes, 11.10% memmgmt time)
  0.000136 seconds (173 CPU allocations: 10.375 KiB) (1 GPU allocation: 60 bytes, 12.41% memmgmt time)
  0.000233 seconds (249 CPU allocations: 16.641 KiB) (2 GPU allocations: 300 bytes, 14.88% memmgmt time)
  0.000260 seconds (342 CPU allocations: 19.688 KiB) (2 GPU allocations: 5.156 KiB, 14.96% memmgmt time)
  0.010942 seconds (8.19 k CPU allocations: 463.531 KiB) (182 GPU allocations: 1.344 MiB, 29.47% memmgmt time)
  0.002264 seconds (297 CPU allocations: 14.844 KiB) (1 GPU allocation: 1.662 MiB, 0.99% memmgmt time)
  0.002731 seconds (322 CPU allocations: 16.859 KiB) 

  0.000260 seconds (342 CPU allocations: 19.688 KiB) (2 GPU allocations: 5.156 KiB, 14.23% memmgmt time)
  0.009815 seconds (8.19 k CPU allocations: 463.531 KiB) (182 GPU allocations: 1.344 MiB, 32.17% memmgmt time)
  0.002854 seconds (297 CPU allocations: 14.844 KiB) (1 GPU allocation: 1.662 MiB, 21.74% memmgmt time)
  0.002540 seconds (322 CPU allocations: 16.859 KiB) (1 GPU allocation: 1.662 MiB, 22.35% memmgmt time)
t=10	 err=1.0
  0.000368 seconds (342 CPU allocations: 19.328 KiB) (2 GPU allocations: 120 bytes, 15.33% memmgmt time)
  0.000328 seconds (367 CPU allocations: 20.531 KiB) (2 GPU allocations: 120 bytes, 14.93% memmgmt time)
  0.000308 seconds (342 CPU allocations: 19.438 KiB) (2 GPU allocations: 120 bytes, 12.55% memmgmt time)
  0.000339 seconds (413 CPU allocations: 23.594 KiB) (2 GPU allocations: 120 bytes, 11.18% memmgmt time)
  0.000165 seconds (173 CPU allocations: 10.375 KiB) (1 GPU allocation: 60 bytes, 11.75% memmgmt time)
  0.000273 seconds (249 CPU allocations

  0.000208 seconds (342 CPU allocations: 19.438 KiB) (2 GPU allocations: 120 bytes, 14.47% memmgmt time)
  0.001803 seconds (413 CPU allocations: 23.594 KiB) (2 GPU allocations: 120 bytes, 2.53% memmgmt time)
  0.000154 seconds (173 CPU allocations: 10.375 KiB) (1 GPU allocation: 60 bytes, 13.43% memmgmt time)
  0.000239 seconds (249 CPU allocations: 16.641 KiB) (2 GPU allocations: 300 bytes, 14.87% memmgmt time)
  0.000256 seconds (342 CPU allocations: 19.688 KiB) (2 GPU allocations: 5.156 KiB, 14.20% memmgmt time)
  0.009753 seconds (8.19 k CPU allocations: 463.531 KiB) (182 GPU allocations: 1.344 MiB, 31.61% memmgmt time)
  0.002739 seconds (297 CPU allocations: 14.844 KiB) (1 GPU allocation: 1.662 MiB, 20.62% memmgmt time)
  0.002542 seconds (322 CPU allocations: 16.859 KiB) (1 GPU allocation: 1.662 MiB, 22.16% memmgmt time)
t=20	 err=1.0
  0.000432 seconds (342 CPU allocations: 19.328 KiB) (2 GPU allocations: 120 bytes, 13.03% memmgmt time)
  0.000339 seconds (367 CPU allocations:

  0.002777 seconds (297 CPU allocations: 14.844 KiB) (1 GPU allocation: 1.662 MiB, 22.51% memmgmt time)
  0.002544 seconds (323 CPU allocations: 16.969 KiB) (1 GPU allocation: 1.662 MiB, 22.31% memmgmt time)
t=29	 err=1.0
  0.000262 seconds (342 CPU allocations: 19.328 KiB) (2 GPU allocations: 120 bytes, 16.84% memmgmt time)
  0.000272 seconds (367 CPU allocations: 20.531 KiB) (2 GPU allocations: 120 bytes, 13.48% memmgmt time)
  0.000233 seconds (342 CPU allocations: 19.438 KiB) (2 GPU allocations: 120 bytes, 13.50% memmgmt time)
  0.000264 seconds (413 CPU allocations: 23.594 KiB) (2 GPU allocations: 120 bytes, 12.26% memmgmt time)
  0.000116 seconds (173 CPU allocations: 10.375 KiB) (1 GPU allocation: 60 bytes, 14.27% memmgmt time)
  0.000200 seconds (249 CPU allocations: 16.641 KiB) (2 GPU allocations: 300 bytes, 17.91% memmgmt time)
  0.000201 seconds (342 CPU allocations: 19.688 KiB) (2 GPU allocations: 5.156 KiB, 14.93% memmgmt time)
  0.007223 seconds (8.19 k CPU allocations: 4

  0.000152 seconds (173 CPU allocations: 10.375 KiB) (1 GPU allocation: 60 bytes, 13.05% memmgmt time)
  0.000269 seconds (249 CPU allocations: 16.641 KiB) (2 GPU allocations: 300 bytes, 15.27% memmgmt time)
  0.000266 seconds (342 CPU allocations: 19.688 KiB) (2 GPU allocations: 5.156 KiB, 14.00% memmgmt time)
  0.010257 seconds (8.19 k CPU allocations: 463.531 KiB) (182 GPU allocations: 1.344 MiB, 33.80% memmgmt time)
  0.002890 seconds (297 CPU allocations: 14.844 KiB) (1 GPU allocation: 1.662 MiB, 22.56% memmgmt time)
  0.002540 seconds (322 CPU allocations: 16.859 KiB) (1 GPU allocation: 1.662 MiB, 22.69% memmgmt time)
t=39	 err=1.0
  0.000381 seconds (342 CPU allocations: 19.328 KiB) (2 GPU allocations: 120 bytes, 13.82% memmgmt time)
  0.000350 seconds (367 CPU allocations: 20.531 KiB) (2 GPU allocations: 120 bytes, 13.68% memmgmt time)
  0.000322 seconds (342 CPU allocations: 19.438 KiB) (2 GPU allocations: 120 bytes, 14.76% memmgmt time)
  0.000363 seconds (413 CPU allocations

  0.000305 seconds (342 CPU allocations: 19.328 KiB) (2 GPU allocations: 120 bytes, 14.69% memmgmt time)
  0.000313 seconds (367 CPU allocations: 20.531 KiB) (2 GPU allocations: 120 bytes, 13.77% memmgmt time)
  0.000281 seconds (342 CPU allocations: 19.438 KiB) (2 GPU allocations: 120 bytes, 14.01% memmgmt time)
  0.000329 seconds (413 CPU allocations: 23.594 KiB) (2 GPU allocations: 120 bytes, 11.46% memmgmt time)
  0.000146 seconds (173 CPU allocations: 10.375 KiB) (1 GPU allocation: 60 bytes, 15.49% memmgmt time)
  0.000229 seconds (249 CPU allocations: 16.641 KiB) (2 GPU allocations: 300 bytes, 16.30% memmgmt time)
  0.000258 seconds (342 CPU allocations: 19.688 KiB) (2 GPU allocations: 5.156 KiB, 13.34% memmgmt time)
  0.009856 seconds (8.19 k CPU allocations: 463.531 KiB) (182 GPU allocations: 1.344 MiB, 32.04% memmgmt time)
  0.002701 seconds (297 CPU allocations: 14.844 KiB) (1 GPU allocation: 1.662 MiB, 20.88% memmgmt time)
  0.002498 seconds (323 CPU allocations: 17.156 KiB)

  0.009974 seconds (8.19 k CPU allocations: 463.531 KiB) (182 GPU allocations: 1.344 MiB, 29.10% memmgmt time)
  0.002796 seconds (297 CPU allocations: 14.844 KiB) (1 GPU allocation: 1.662 MiB, 22.08% memmgmt time)
  0.002844 seconds (322 CPU allocations: 16.859 KiB) (1 GPU allocation: 1.662 MiB, 30.36% memmgmt time)
t=58	 err=1.0
  0.000331 seconds (342 CPU allocations: 19.328 KiB) (2 GPU allocations: 120 bytes, 16.80% memmgmt time)
  0.000314 seconds (367 CPU allocations: 20.531 KiB) (2 GPU allocations: 120 bytes, 13.99% memmgmt time)
  0.000363 seconds (342 CPU allocations: 19.438 KiB) (2 GPU allocations: 120 bytes, 29.84% memmgmt time)
  0.000344 seconds (413 CPU allocations: 23.594 KiB) (2 GPU allocations: 120 bytes, 13.51% memmgmt time)
  0.000173 seconds (173 CPU allocations: 10.375 KiB) (1 GPU allocation: 60 bytes, 14.98% memmgmt time)
  0.000235 seconds (249 CPU allocations: 16.641 KiB) (2 GPU allocations: 300 bytes, 20.01% memmgmt time)
  0.000257 seconds (342 CPU allocations

  0.000340 seconds (413 CPU allocations: 23.594 KiB) (2 GPU allocations: 120 bytes, 13.61% memmgmt time)
  0.000164 seconds (173 CPU allocations: 10.375 KiB) (1 GPU allocation: 60 bytes, 12.89% memmgmt time)
  0.000242 seconds (249 CPU allocations: 16.641 KiB) (2 GPU allocations: 300 bytes, 15.63% memmgmt time)
  0.000256 seconds (342 CPU allocations: 19.688 KiB) (2 GPU allocations: 5.156 KiB, 13.94% memmgmt time)
  0.009907 seconds (8.19 k CPU allocations: 463.531 KiB) (182 GPU allocations: 1.344 MiB, 32.33% memmgmt time)
  0.002717 seconds (297 CPU allocations: 14.844 KiB) (1 GPU allocation: 1.662 MiB, 21.01% memmgmt time)
  0.002552 seconds (322 CPU allocations: 16.859 KiB) (1 GPU allocation: 1.662 MiB, 22.83% memmgmt time)
t=68	 err=1.0
  0.000329 seconds (342 CPU allocations: 19.328 KiB) (2 GPU allocations: 120 bytes, 15.84% memmgmt time)
  0.000308 seconds (367 CPU allocations: 20.531 KiB) (2 GPU allocations: 120 bytes, 15.60% memmgmt time)
  0.000297 seconds (342 CPU allocations

  0.002527 seconds (322 CPU allocations: 16.859 KiB) (1 GPU allocation: 1.662 MiB, 22.89% memmgmt time)
t=77	 err=1.0
  0.000315 seconds (342 CPU allocations: 19.328 KiB) (2 GPU allocations: 120 bytes, 15.27% memmgmt time)
  0.000322 seconds (367 CPU allocations: 20.531 KiB) (2 GPU allocations: 120 bytes, 13.75% memmgmt time)
  0.000281 seconds (342 CPU allocations: 19.438 KiB) (2 GPU allocations: 120 bytes, 15.47% memmgmt time)
  0.000367 seconds (413 CPU allocations: 23.594 KiB) (2 GPU allocations: 120 bytes, 11.44% memmgmt time)
  0.000147 seconds (173 CPU allocations: 10.375 KiB) (1 GPU allocation: 60 bytes, 14.02% memmgmt time)
  0.000308 seconds (249 CPU allocations: 16.641 KiB) (2 GPU allocations: 300 bytes, 12.40% memmgmt time)
  0.000272 seconds (342 CPU allocations: 19.688 KiB) (2 GPU allocations: 5.156 KiB, 17.79% memmgmt time)
  0.010280 seconds (8.19 k CPU allocations: 463.531 KiB) (182 GPU allocations: 1.344 MiB, 31.71% memmgmt time)
  0.002723 seconds (297 CPU allocation

  0.000207 seconds (249 CPU allocations: 16.641 KiB) (2 GPU allocations: 300 bytes, 17.71% memmgmt time)
  0.000237 seconds (342 CPU allocations: 19.688 KiB) (2 GPU allocations: 5.156 KiB, 15.39% memmgmt time)
  0.008734 seconds (8.19 k CPU allocations: 463.531 KiB) (182 GPU allocations: 1.344 MiB, 26.27% memmgmt time)
  0.002696 seconds (297 CPU allocations: 14.844 KiB) (1 GPU allocation: 1.662 MiB, 20.91% memmgmt time)
  0.002506 seconds (322 CPU allocations: 16.859 KiB) (1 GPU allocation: 1.662 MiB, 22.35% memmgmt time)
t=87	 err=1.0
  0.000271 seconds (342 CPU allocations: 19.328 KiB) (2 GPU allocations: 120 bytes, 14.14% memmgmt time)
  0.000281 seconds (367 CPU allocations: 20.531 KiB) (2 GPU allocations: 120 bytes, 12.80% memmgmt time)
  0.000264 seconds (342 CPU allocations: 19.438 KiB) (2 GPU allocations: 120 bytes, 13.62% memmgmt time)
  0.000290 seconds (413 CPU allocations: 23.594 KiB) (2 GPU allocations: 120 bytes, 11.63% memmgmt time)
  0.000143 seconds (173 CPU allocatio

  0.000363 seconds (342 CPU allocations: 19.438 KiB) (2 GPU allocations: 120 bytes, 12.74% memmgmt time)
  0.000408 seconds (413 CPU allocations: 23.594 KiB) (2 GPU allocations: 120 bytes, 11.50% memmgmt time)
  0.000197 seconds (173 CPU allocations: 10.375 KiB) (1 GPU allocation: 60 bytes, 11.77% memmgmt time)
  0.000284 seconds (249 CPU allocations: 16.641 KiB) (2 GPU allocations: 300 bytes, 14.47% memmgmt time)
  0.000292 seconds (342 CPU allocations: 19.688 KiB) (2 GPU allocations: 5.156 KiB, 14.11% memmgmt time)
  0.011481 seconds (8.19 k CPU allocations: 463.531 KiB) (182 GPU allocations: 1.344 MiB, 29.91% memmgmt time)
  0.002776 seconds (297 CPU allocations: 14.844 KiB) (1 GPU allocation: 1.662 MiB, 19.26% memmgmt time)
  0.002493 seconds (323 CPU allocations: 17.156 KiB) (1 GPU allocation: 1.662 MiB, 20.76% memmgmt time)
t=97	 err=1.0
  0.000376 seconds (342 CPU allocations: 19.328 KiB) (2 GPU allocations: 120 bytes, 14.44% memmgmt time)
  0.000416 seconds (367 CPU allocations