In [1]:
import Pkg
Pkg.add("Turing")
Pkg.add("Flux")
Pkg.add("NCDatasets")
Pkg.add("TSVD")
Pkg.add("Statistics")
Pkg.add("Compat")
Pkg.add("LinearAlgebra")
Pkg.add("Glob")
Pkg.add("CSV")
Pkg.add("DataFrames")
Pkg.add("Distributions")
Pkg.add("ProgressMeter")
Pkg.add("PyPlot")
Pkg.add("Random")
Pkg.add("StatsPlots")
Pkg.add("SpecialFunctions")
Pkg.add("ReverseDiff")
Pkg.add("BSON")
Pkg.add("TransformVariables")
Pkg.add("TransformedLogDensities")
Pkg.add("LogDensityProblems")
Pkg.add("LogDensityProblemsAD")
Pkg.add("DynamicHMC")
Pkg.add("Parameters")
Pkg.add("TSVD")
Pkg.add("Glob")
using Flux
using Statistics
using LinearAlgebra
using Compat
using Glob
using TSVD
using NCDatasets
using CSV
using DataFrames
using Distributions: Categorical, Dirichlet, Gamma, Beta
using ProgressMeter
using PyPlot
using Random
using SpecialFunctions: loggamma
using ReverseDiff
using BSON: @load

[32m[1m    Updating[22m[39m registry at `~/.julia/registries/General.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.ju

In [2]:
obs_file ="../data/observed_speeds/greenland_vel_mosaic250_v1_g9000m.nc"
d_obs = NCDataset(obs_file)
v_obs = d_obs["velsurf_mag"][:]
v_obs = nomissing(v_obs, 0.0);
idx = findall(v_obs .> 0)
Obs = v_obs[idx];

n_grid_points = size(idx)[1];

In [3]:
training_files = sort(glob("../tests/training_data/*.nc"))

nf = length(training_files)
d = NCDataset(training_files[1], "r")
v = d["velsurf_mag"]
nx, ny, nt = size(v)

Data = zeros(n_grid_points, nf * nt)
ids = zeros(Int64, nf)
@showprogress for (k, training_file) in enumerate(training_files)
    m_id = match(r"id_(.+?)_", training_file)
    ids[k] = parse(Int, m_id[1])
    d = NCDataset(training_file, "r")
    v = d["velsurf_mag"][:]
    v = nomissing(v, 0.0)
    Data[:, k] = v[idx]
end

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:02[39m


## Read training samples

In [4]:
X_df = DataFrame(CSV.File("../data/samples/velocity_calibration_samples_50.csv"))
X_df = X_df[ [x in ids for x in X_df[!, :id]] ,:]
X = transpose(Matrix(X_df[!, 2:9]))
X_mean = mean(X, dims=2);
X_std = std(X, dims=2);
X_scaled = (X .- X_mean) ./ X_std;
X_train = X_scaled;
n_parameters, n_samples = size(X);

## Load the model

Should be a commmand line argument

That we have to define the struct again is not ok. How can we avoid this?

In [5]:
struct NNModel
    chain::Chain
    V_hat::AbstractArray
    F_mean::AbstractArray
end

function (m::NNModel)(x, add_mean=false)
    if add_mean
        return V_hat * m.chain(x) .+ F_mean
    else
        return V_hat * m.chain(x)
    end

end

In [6]:
@load "emulator_1.bson" model

In [7]:
function get_eigenglaciers(omegas, F, q)
    
    F_mean = sum(F .* omegas, dims=2);
    F_bar = F .- F_mean;

    Z = diagm(sqrt.(omegas[1, :] * n_grid_points))
    U, S, V = tsvd(Z * transpose(F_bar), q);
    lamda = S.^2 / n_grid_points
    V_hat = V * diagm(sqrt.(lamda));
    
    return V_hat, F_bar, F_mean
end;

In [8]:
q = 50
F = log10.(Data)
F = replace!(F, -Inf=>0)

dirichlet_dist = Dirichlet(n_samples, 1)

model_index = 1
omegas = transpose(rand(dirichlet_dist, 1))
omegas_0 = omegas ./ size(omegas)[1];
    
V_hat, F_bar, F_mean = get_eigenglaciers(omegas, F, q);

In [9]:
alpha_b = 3;
beta_b = 3;
beta_dist = Beta(alpha_b, beta_b);
X_prior = rand(beta_dist, n_parameters, 100000);
X_0 = mean(X_prior, dims=2);

In [10]:
Y_target = log10.(Obs);
Y_target = replace!(Y_target, -Inf=>0);

In [11]:
grid_resolution = ones(n_grid_points) .* 9000
sigma = 10
rho = 1.0 / (1e4 .^ 2)
point_area = (grid_resolution) .^ 2
K = point_area .* rho
sigma_hat = sqrt.(sigma .^ 2 ./ K .^ 2)

X_min = minimum(X_scaled, dims=2);
X_max = maximum(X_scaled, dims=2);

In [12]:
using TransformVariables, TransformedLogDensities, LogDensityProblems, LogDensityProblemsAD,
    DynamicHMC, DynamicHMC.Diagnostics, Parameters, Statistics, Random

In [13]:
struct SampleBayesProblem
    nn
    X_min::AbstractArray
    X_max::AbstractArray
    Y_target::AbstractArray
    sigma_hat::AbstractArray
    nu::Int
    alpha::Float16
end

In [14]:
function (problem::SampleBayesProblem)(θ)
    @unpack α = θ               # extract the parameters
    @unpack nn, X_min, X_max, Y_target, sigma_hat, nu, alpha = problem       # extract the data
    
    Y_pred = 10 .^ nn(α, true)
    r = Y_pred .- Y_target
    t = r ./ sigma_hat

    # StudentT distribution
    loglikelihood = sum(
        loggamma((nu + 1) / 2)
        - loggamma(nu / 2)
        .- log.(sqrt.(pi * nu) .* sigma_hat)
        .- (nu + 1) / 2.0 * log.(1 .+ 1.0 / nu .* t .^ 2)
    )
    # Beta prior
    X_bar = relu((α .- X_min) ./ (X_max - X_min))
    logprior = sum(
        (alpha_b - 1) * log.(X_bar) + (beta_b - 1) * log.(1 .- X_bar) 
        .+ loggamma(alpha_b + beta_b) 
        .- loggamma(alpha_b)
        .- loggamma(beta_b)
    )
    return (alpha * loglikelihood + logprior)
end

In [15]:
nu = 1
alpha = 0.01
logp = SampleBayesProblem(model, X_min, X_max, Y_target, sigma_hat, nu, alpha)
logp((α = X_0,)) # make sure that it works

-1360.1149370416597

In [None]:
trans = as((α = as(Array, as𝕀, n_parameters),))
P = TransformedLogDensity(trans, logp)
∇P = ADgradient(:Zygote, P)

In [None]:
Pkg.add("Optim")
Pkg.add("LineSearches")
using LineSearches
using Optim


In [None]:
function logp_g(α, nn, X_min, X_max, Y_target, sigma_hat, nu, alpha)
    
    Y_pred = 10 .^ nn(α, true)
    r = Y_pred .- Y_target
    t = r ./ sigma_hat

    # StudentT distribution
    loglikelihood = sum(
        loggamma((nu + 1) / 2)
        - loggamma(nu / 2)
        .- log.(sqrt.(pi * nu) .* sigma_hat)
        .- (nu + 1) / 2.0 * log.(1 .+ 1.0 / nu .* t .^ 2)
    )
    # Beta prior
    X_bar = (α .- X_min) ./ (X_max - X_min)
    logprior = sum(
        (alpha_b - 1) * real.(log.(Complex.(X_bar))) + (beta_b - 1) * real.(log.(Complex.(1 .- X_bar)))
        .+ loggamma(alpha_b + beta_b) 
        .- loggamma(alpha_b)
        .- loggamma(beta_b)
    )

    return (alpha * loglikelihood + logprior)
end


ll(X_0) = logp_g(X_0, model, X_min, X_max, Y_target, sigma_hat, nu, alpha)
res = optimize(ll, X_0, LBFGS(linesearch = LineSearches.MoreThuente(), ), Optim.Options(show_trace=true, iterations = 51))
X_map = Optim.minimizer(res)

In [None]:
X_map .* X_std .+ X_mean

In [None]:
results = mcmc_with_warmup(Random.default_rng(), ∇P, 2; 
    initialization = (q = vec(X_map), ))

In [None]:
summarize_tree_statistics(results.tree_statistics)

In [None]:
posterior = transform.(trans, eachcol(results.posterior_matrix))
posterior_α = first.(posterior)
mean(posterior_α)

In [None]:
logp_p(x) = LogDensityProblems.logdensity(P, x)
∇logp_p(x) = LogDensityProblems.logdensity_and_gradient(∇P, x)[2]
result_pf = pathfinder(logp_p, ∇logp_p; dim=8)

In [None]:
init_params = result_pf.draws[:, 1]
result_dhmc1 = mcmc_with_warmup(
    Random.GLOBAL_RNG,
    ∇P,
    1;
    initialization=(; q=init_params),
    reporter=NoProgressReport(),
)

In [None]:
Pkg.add("Optim")
using Optim

In [None]:
Pkg.add("StatsBase")
using StatsBase

In [None]:
Pkg.add("Pathfinder")
using Pathfinder

In [None]:
Pkg.add("AdvancedMH")
Pkg.add("MCMCChains")
using AdvancedMH
using MCMCChains

In [None]:
Pkg.add("StructArrays")
using LogDensityProblemsAD
using LogDensityProblems
using AdvancedMH
using Distributions
using MCMCChains
using ForwardDiff
using StructArrays



In [None]:
sample(∇P, logp, 100000; init_params=ones(2), chain_type=StructArray, param_names=["μ", "σ"])

In [None]:
a =log(Complex(-3))

In [None]:
real(a)

In [None]:
?real

In [None]:
using LinearAlgebra
using Distributions
Pkg.add("Arpack")
using Arpack

In [None]:


function mala(logdensity,gradient,h,M,niter,θinit)       
        function gradientStep(θ,t)                                                                                                                                                                                 
                θ-t*M*gradient(θ)                                                                                                                                                                                  
        end        
        print(θinit)
        θtrace = zeros(length(θinit),niter)
        #θtrace=Array{Float64}(length(θinit),niter)    
        θ=θinit
        θtrace[:,1]=θinit                                                                                                                                                                                          
        for i=2:niter                                                                                                                                                                                              
                θold=θ                                                                                                                                                                                             
                θ=rand(MvNormal(gradientStep(θ,0.5*h),h*M))                                                                                                                                                        
                d=logdensity(θ) - logdensity(θold) + logpdf(MvNormal(gradientStep(θ,0.5*h),h*M),θold) - logpdf(MvNormal(gradientStep(θold,0.5*h),h*M),θ)                                                           
                if(!(log(rand(Uniform(0,1)))<d))                                                                                                                                                                   
                        θ=θold                                                                                                                                                                                     
                end                                                                                                                                                                                                
                θtrace[:,i]=θ                                                                                                                                                                                      
        end                                                                                                                                                                                                        
        θtrace                                                                                                                                                                                                     
end  

In [None]:
ρ²=0.8                                                                                                                                                                                                             
Σ=[1 ρ²;ρ² 1]                                                                                                                                                                                                      
                                                                                                                                                                                                                   
function logdensity(θ)                                                                                                                                                                                             
        logpdf(MvNormal(Σ),θ)                                                                                                                                                                                      
end                                                                                                                                                                                                                
                                                                                                                                                                                                                   
function gradient(θ)                                                                                                                                                                                               
        Σ\θ                                                                                                                                                                                                        
end  

function gradient(θ)                                                                                                                                                                                               
        logp_g(θ)                                                                                                                                                                                                        
end  

function Hinv
    
end


gradient((α= X_0),)
niter=1000                                                                                                                                                                                                         
h=1/eigs(inv(Σ),nev=1)[1][1]                                                                                                                                                                                       
#draws=mala(logp,gradient,h,I,niter,[5,50]);   #No preconditioning                                                                                                                                                                                                                                                                                                    
pdraws=mala(logp,gradient,h,Σ,niter, X_0);       #With Preconditioning                                                                                                                                                    

In [None]:
mean(pdraws, dims=2)

In [None]:
logp_gg(X_0) = logp_g(X_0, model, X_min, X_max, Y_target, sigma_hat, nu, alpha)

In [None]:
Zygote.gradient(logp_gg, X_0)

In [None]:
g = Flux.gradient(logp_gg, X_0)
g[1]

In [None]:
?gradient

In [None]:
Pkg.add("Zygote")

In [16]:
using Zygote

In [17]:
function logpi(α, nn, X_min, X_max, Y_target, sigma_hat, nu, alpha)
    
    Y_pred = 10 .^ nn(α, true)
    r = Y_pred .- Y_target
    t = r ./ sigma_hat

    # StudentT distribution
    loglikelihood = sum(
        loggamma((nu + 1) / 2)
        - loggamma(nu / 2)
        .- log.(sqrt.(pi * nu) .* sigma_hat)
        .- (nu + 1) / 2.0 * log.(1 .+ 1.0 / nu .* t .^ 2)
    )
    # Beta prior
    X_bar = (α .- X_min) ./ (X_max - X_min)
    logprior = sum(
        (alpha_b - 1) * log.(X_bar) + (beta_b - 1) * log.(1 .- X_bar)
        .+ loggamma(alpha_b + beta_b) 
        .- loggamma(alpha_b)
        .- loggamma(beta_b)
    )

    return (alpha * loglikelihood + logprior)
end

logpi (generic function with 1 method)

In [18]:
llogpi(X_0) = logpi(X_0, model, X_min, X_max, Y_target, sigma_hat, nu, alpha)

llogpi (generic function with 1 method)

In [None]:
function get_log_like_gradient_and_hessian(X;  eps=1e-2, compute_hessian=false)
   log_pi =  llogpi(X)
    if compute_hessian
        g = Zygote.gradient(llogpi, X)
        H = Zygote.hessian(llogpi, X)
    end
end

In [None]:
get_log_like_gradient_and_hessian(X_0, compute_hessian=true)

In [19]:
nu = 1
alpha = 0.01

0.01

In [19]:
Flux.hessian(llogpi, X_0)

LoadError: ArgumentError: Sampler for this object is not defined

In [21]:
Flux.gradient(llogpi(), X_0)

([-2.551063644299809; -2.7110333047501496; … ; 2.7595886860123953; 8.304913874547792;;],)

In [None]:
?Zygote.hessian

In [22]:
f(x, y) = sum((x .- y) .^ 2)

f (generic function with 1 method)

In [23]:
Flux.gradient(f, [2, 1], [2, 0])

([0.0, 2.0], [-0.0, -2.0])

In [25]:
g(x, y, a, b) = sum((a .* x .- b .* y) .^ 2)

g (generic function with 1 method)

In [38]:
Flux.gradient(g, [2, 1], [2, 0], 1, 1)

([0.0, 2.0], [-0.0, -2.0], 2.0, 0.0)

In [39]:
Flux.gradient(logpi, X_0, model, X_min, X_max, Y_target, sigma_hat, nu, alpha)[1]

8×1 Matrix{Float64}:
   0.7390905411396775
   2.2799365468929462
  -3.3934056109035264
 -10.187146135876366
   2.8339567433738164
  -0.1706748018860189
   6.002782838990898
  -0.09627376450225733

In [37]:
Zygote.hessian(logpi, X_0, model, X_min, X_max, Y_target, sigma_hat, nu, alpha)[1]

LoadError: MethodError: no method matching hessian(::typeof(logpi), ::Matrix{Float64}, ::NNModel, ::Matrix{Float64}, ::Matrix{Float64}, ::Vector{Float32}, ::Vector{Float64}, ::Int64, ::Float64)
[0mClosest candidates are:
[0m  hessian(::Any, ::Any) at ~/.julia/packages/Zygote/g2w9o/src/lib/grad.jl:62

In [44]:
mean(10 .^ model(X_0, true))

37.866820538442255

In [69]:
function logpi(α, nn, X_min, X_max, Y_target, sigma_hat, nu, alpha)
    
    Y_pred = 10 .^ nn(α, true)
    r = Y_pred .- Y_target
    t = r ./ sigma_hat

    # StudentT distribution
    loglikelihood = sum(
        loggamma((nu + 1) / 2)
        - loggamma(nu / 2)
        .- log.(sqrt.(pi * nu) .* sigma_hat)
        .- (nu + 1) / 2.0 * log.(1 .+ 1.0 / nu .* t .^ 2)
    )
    print(loglikelihood)
    # Beta prior
    X_bar = (α .- X_min) ./ (X_max - X_min)
    logprior = sum(
        (alpha_b - 1) * log.(X_bar) + (beta_b - 1) * log.(1 .- X_bar)
        .+ loggamma(alpha_b + beta_b) 
        .- loggamma(alpha_b)
        .- loggamma(beta_b)
    )

    return (alpha * loglikelihood + logprior)

end

Flux.gradient(logpi, X_0, model, X_min, X_max, Y_target, sigma_hat, nu, 0)[1]

-137857.9262553373

8×1 Matrix{Float64}:
 -0.7741794858333761
 -0.760770308475807
 -0.5385046832296535
 -0.7628440276390841
 -0.7703665928714939
 -0.6829851244869003
 -0.763628355067097
 -0.7090196756829038