In [None]:
print("loading PDMP... "); ta = time()
using PDMP
println("[done in $(round(time()-ta,1))s]")

print("loading other packages... "); ta = time()
using JLD
println("[done in $(round(time()-ta,1))s]")

cprint(s, b)   = b ? print(s)   : nothing
cprintln(s, b) = b ? println(s) : nothing

In [None]:
########################################################
# Idential preparation of the data for all experiments #
########################################################
verb = true

cprint("reading and preparing data... ", verb) ; ta = time()

rows  = vec(readdlm("data/rows.csv",  Int))
cols  = vec(readdlm("data/cols.csv",  Int))
rates = vec(readdlm("data/rates.csv", Float64))

# centre and scale the rates
range  = maximum(rates)-minimum(rates)
rates -= mean(rates)
rates /= range

data = Dict(
    "ROWS"  => rows,
    "COLS"  => cols,
    "RATES" => rates
)
cprintln("[done in $(round(time()-ta,1))s]", verb)
;

In [None]:
nU = maximum(rows)
nV = maximum(cols)

cU,sU,s2U = zeros(nU), zeros(nU), zeros(nU)
cV,sV,s2V = zeros(nV), zeros(nV), zeros(nV)

for (k,rk) in enumerate(rates)
    cU[rows[k]]  += 1
    sU[rows[k]]  += rk
    s2U[rows[k]] += rk^2
    cV[cols[k]]  += 1
    sV[cols[k]]  += rk
    s2V[cols[k]] += rk^2
end
vU = (s2U ./ cU) - (sU ./ cU).^2
vV = (s2V ./ cV) - (sV ./ cV).^2

vU[vU.<1e-10]=0.0
vV[vV.<1e-10]=0.0
;

In [None]:
base_sigma_r = 0.5 # Salakhutdinov & Mni
# https://pymc-devs.github.io/pymc3/notebooks/pmf-pymc.html
base_sigma_u = mean(sqrt.(vU[.~isnan.(vU)]))
base_sigma_v = mean(sqrt.(vV[.~isnan.(vV)]))

println(base_sigma_u)
println(base_sigma_v)

## LBPS runs

In [None]:
include("pmf_lbps.jl")

In [None]:
lbpsparams = Dict(
    "EXPNAME"    => "A",   # name of the experiment
    "LATENT_D"   => 10,    # dimension of latent space
    "SIGMA_U"    => base_sigma_u,
    "SIGMA_V"    => base_sigma_v,
    "SIGMA_R"    => base_sigma_r,
    "LAMBDAREF"  => 0.01,  # refreshment rate
    "MAXNEVENTS" => 50,    # maximum number of events to generate
    "MAXT"       => Inf,   # maximum time
)
srand(123)
results = pmf_lbps(data, lbpsparams)

In [None]:
pm = pathmean(results["ALL_EVLIST"])
pmu = pm[1:nU]
pmv = pm[nU+1:end]

length(pmv) == nV

### Computation of RMSE

Note: this is on SCALED rates, so the number is lower

In [None]:
mir = minimum(rates)
mar = maximum(rates)

s = 0.0
for (k, rk) in enumerate(rates)
    i, j = rows[k], cols[k]
    ui   = pmu[i]
    vj   = pmu[j]
    cand = dot(ui,vj)
    cand = (cand > mar) ? mar : cand
    cand = (cand < mir) ? mir : cand
    s   += (rk - cand).^2
end

sqrt(s/length(rates))

## HMC runs

In [None]:
include("hmc.jl")
include("pmf_ll.jl")

In [None]:
d = 10

In [None]:
(ll, gll) = pmf_ll(rows, cols, rates, nU, nV, 
                    base_sigma_r, base_sigma_u, base_sigma_v, d)

In [None]:
x0 = base_sigma_u*randn(d)
for i in 2:nU
    append!(x0, base_sigma_u*randn(d))
end
for i in 1:nV
    append!(x0, base_sigma_v*randn(d))
end
;

In [None]:
samples = hmc(ll, gll, x0; steps=50, burnin=5, stepsize=0.001);

In [None]:
ss = sum(samples)/length(samples);

In [None]:
u  = ss[1:(d*nU)]
v  = ss[(d*nU+1):end]

s = 0.0
for (k, rk) in enumerate(rates)
    i, j  = rows[k], cols[k]
    mui   = ((i-1)*d+1):(i*d)
    mvj   = ((j-1)*d+1):(j*d)
    ui,vj = u[mui], v[mvj]

    cand = dot(ui,vj)
    cand = (cand > mar) ? mar : cand
    cand = (cand < mir) ? mir : cand

    s += (rk - cand).^2
end
sqrt(s/length(rates))