In [1]:
import Pkg
Pkg.add("Turing")
Pkg.add("Flux")
Pkg.add("NCDatasets")
Pkg.add("TSVD")
Pkg.add("Statistics")
Pkg.add("Compat")
Pkg.add("LinearAlgebra")
Pkg.add("Glob")
Pkg.add("CSV")
Pkg.add("DataFrames")
Pkg.add("Distributions")
Pkg.add("ProgressMeter")
Pkg.add("PyPlot")
Pkg.add("Random")
Pkg.add("StatsPlots")
Pkg.add("SpecialFunctions")
Pkg.add("ReverseDiff")
using Turing
using Flux
using Flux: train!
using Plots
using TSVD
using Statistics
using LinearAlgebra
using Compat
using Glob
using NCDatasets
using CSV
using DataFrames
using Distributions: Categorical, Dirichlet, Gamma, Beta
using ProgressMeter
using PyPlot
using Random
using StatsPlots
using SpecialFunctions: loggamma
using ReverseDiff

[32m[1m    Updating[22m[39m registry at `~/.julia/registries/General.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.ju

In [14]:
obs_file ="../data/observed_speeds/greenland_vel_mosaic250_v1_g9000m.nc"
d_obs = NCDataset(obs_file)
v_obs = d_obs["velsurf_mag"][:]
v_obs = nomissing(v_obs, 0.0);
idx = findall(v_obs .> 0)
Obs = v_obs[idx];

n_grid_points = size(idx)[1];

## Load the training data

In [15]:
training_files = sort(glob("../tests/training_data/*.nc"))

nf = length(training_files)
d = NCDataset(training_files[1], "r")
v = d["velsurf_mag"]
nx, ny, nt = size(v)

Data = zeros(n_grid_points, nf * nt)
ids = zeros(Int64, nf)
@showprogress for (k, training_file) in enumerate(training_files)
    m_id = match(r"id_(.+?)_", training_file)
    ids[k] = parse(Int, m_id[1])
    d = NCDataset(training_file, "r")
    v = d["velsurf_mag"][:]
    v = nomissing(v, 0.0)
    Data[:, k] = v[idx]
end

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:02[39m


## Read training samples

In [16]:
X_df = DataFrame(CSV.File("../data/samples/velocity_calibration_samples_50.csv"))
X_df = X_df[ [x in ids for x in X_df[!, :id]] ,:]
X = transpose(Matrix(X_df[!, 2:9]))
X_mean = mean(X, dims=2);
X_std = std(X, dims=2);
X_scaled = (X .- X_mean) ./ X_std;
X_train = X_scaled;
n_parameters, n_samples = size(X);

## Data preprocessing

Log10-transform the training data and set -Inf to 0

In [17]:
F = log10.(Data)
F = replace!(F, -Inf=>0)
dirichlet_dist = Dirichlet(n_samples, 1)

area = ones(n_grid_points);
area = area ./ sum(area);

# Number of eigenglaciers
q = 50;

## Function to get Eigenglaciers using SVD

In [18]:
function get_eigenglaciers(omegas, F, q)
    
    F_mean = sum(F .* omegas, dims=2);
    F_bar = F .- F_mean;

    Z = diagm(sqrt.(omegas[1, :] * n_grid_points))
    U, S, V = tsvd(Z * transpose(F_bar), q);
    lamda = S.^2 / n_grid_points
    V_hat = V * diagm(sqrt.(lamda));
    
    return V_hat, F_bar, F_mean
end;

## Set up the Neural Network

In [19]:
n_hidden = 128

struct NNModel
    chain::Chain
end

function (m::NNModel)(x, add_mean=false)
    if add_mean
        return V_hat * m.chain(x) .+ F_mean
    else
        return V_hat * m.chain(x)
    end

end


# Call @functor to allow for training. Described below in more detail.
Flux.@functor NNModel

chain = Chain(
    Dense(n_parameters => n_hidden),
    LayerNorm(n_hidden),
    Dropout(0.0),
    Dense(n_hidden => n_hidden),
    LayerNorm(n_hidden),
    Dropout(0.5),
    Dense(n_hidden => n_hidden),
    LayerNorm(n_hidden),
    Dropout(0.5),
    Dense(n_hidden => n_hidden),
    LayerNorm(n_hidden),
    Dropout(0.3),
    Dense(n_hidden => q, bias=false),
    );

In [20]:
no_models = 1
n_epochs = 101
opt = Adam(0.1, (0.9, 0.8));

## Loss function

In [21]:
loss(y_pred, y, o) = sum(sum(abs.((y_pred - y)).^2 .* area, dims=1) .* o);

In [26]:
    model_index = 1
    Random.seed!(model_index)
    omegas = transpose(rand(dirichlet_dist, 1))
    omegas_0 = omegas ./ size(omegas)[1];
    V_hat, F_bar, F_mean = get_eigenglaciers(omegas, F, q);


## We're ready for training

In [27]:
models = []
for model_index in 1:no_models
    println("Training surrogate model ", model_index)
    Random.seed!(model_index)
    omegas = transpose(rand(dirichlet_dist, 1))
    omegas_0 = omegas ./ size(omegas)[1];
    
    V_hat, F_bar, F_mean = get_eigenglaciers(omegas, F, q);
    train_loader = Flux.DataLoader((X_train, F_bar, omegas), batchsize = 128, shuffle = true)
    model = NNModel(chain);
    ps = Flux.params(model);
    opt_state = Flux.setup(opt, model);
    
    println("  epoch, train_loss, test_loss")
    @showprogress for epoch in 1:n_epochs
        for (x, y, o) in train_loader

          # Calculate the gradient of the objective
          # with respect to the parameters within the model:
          grads = Flux.gradient(model) do m
              y_pred = m(x)
              loss(y_pred, y, o)
          end

          # Update the parameters so as to reduce the objective,
          # according the chosen optimisation rule:
          Flux.update!(opt_state, model, grads[1])
        end
        F_pred = model(X_scaled)
        train_loss = loss(F_pred, F_bar, omegas)
        test_loss = loss(F_pred, F_bar, omegas_0)
        if epoch % 5 == 0
            println("  ", epoch, " ", train_loss, " ", test_loss)
        end
    end
    push!(models, model)
end

Training surrogate model 1
  epoch, train_loss, test_loss


[32mProgress:   5%|██                                       |  ETA: 0:03:57[39m

  5 0.06066475650939917 0.06066475650939917


[32mProgress:  10%|████                                     |  ETA: 0:02:11[39m

  10 0.06070377855428051 0.06070377855428051


[32mProgress:  15%|██████▏                                  |  ETA: 0:01:34[39m

  15 0.05482887630593755 0.05482887630593755


[32mProgress:  20%|████████▏                                |  ETA: 0:01:14[39m

  20 0.053407082659209064 0.053407082659209064


[32mProgress:  25%|██████████▏                              |  ETA: 0:01:02[39m

  25 0.05866266457145048 0.05866266457145048


[32mProgress:  30%|████████████▏                            |  ETA: 0:00:53[39m

  30 0.05100992290450186 0.05100992290450186


[32mProgress:  35%|██████████████▎                          |  ETA: 0:00:46[39m

  35 0.05112867697287285 0.05112867697287285


[32mProgress:  40%|████████████████▎                        |  ETA: 0:00:40[39m

  40 0.051033604727100355 0.051033604727100355


[32mProgress:  45%|██████████████████▎                      |  ETA: 0:00:35[39m

  45 0.05256612450378154 0.05256612450378154


[32mProgress:  50%|████████████████████▎                    |  ETA: 0:00:31[39m

  50 0.051055391231564114 0.051055391231564114


[32mProgress:  54%|██████████████████████▍                  |  ETA: 0:00:27[39m

  55 0.050864238361456364 0.050864238361456364


[32mProgress:  59%|████████████████████████▍                |  ETA: 0:00:23[39m

  60 0.051723588927197646 0.051723588927197646


[32mProgress:  64%|██████████████████████████▍              |  ETA: 0:00:20[39m

  65 0.04912256217914614 0.04912256217914614


[32mProgress:  69%|████████████████████████████▍            |  ETA: 0:00:17[39m

  70 0.04655362568251399 0.04655362568251399


[32mProgress:  74%|██████████████████████████████▌          |  ETA: 0:00:14[39m

  75 0.04743961538485777 0.04743961538485777


[32mProgress:  79%|████████████████████████████████▌        |  ETA: 0:00:11[39m

  80 0.0471768787939831 0.0471768787939831


[32mProgress:  84%|██████████████████████████████████▌      |  ETA: 0:00:08[39m

  85 0.046946051594289086 0.046946051594289086


[32mProgress:  89%|████████████████████████████████████▌    |  ETA: 0:00:06[39m

  90 0.04764170892220392 0.04764170892220392


[32mProgress:  94%|██████████████████████████████████████▋  |  ETA: 0:00:03[39m

  95 0.04762742624497922 0.04762742624497922


[32mProgress:  99%|████████████████████████████████████████▋|  ETA: 0:00:00[39m

  100 0.046280718306838345 0.046280718306838345


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:49[39m


In [28]:
n_glaciers = 10
p = ones(n_samples)
p = p / sum(p)
# This does sampling with replacement, need to figure out how to do
# sampling without replacement
P = Categorical(p)
glaciers = rand(P, n_glaciers);

## Now calcuate some metrics to assess the surrogate committee

In [29]:
F_train = F
maes = []
for m in glaciers
    for (model_index, model) in enumerate(models)
        X_val = X_train[:, m]
        Y_val = F_train[:, m]
        Y_pred = model(X_val, true)
        mae = Flux.mae(10 .^ mean(Y_pred, dims=2), 10 .^ mean(Y_val, dims=2))
        push!(maes, mae)
    end
end
mae = mean(maes)
print("MAE: ", mae)

MAE: 32.25563707162096

In [31]:
alpha_b = 3;
beta_b = 3;

In [32]:
Y_target = log10.(Obs);
Y_target = replace!(Y_target, -Inf=>0);

In [33]:
grid_resolution = ones(n_grid_points) .* 9000
sigma = 10
rho = 1.0 / (1e4 .^ 2)
point_area = (grid_resolution) .^ 2
K = point_area .* rho
sigma_hat = sqrt.(sigma .^ 2 ./ K .^ 2)

X_min = minimum(X_scaled, dims=2);
X_max = maximum(X_scaled, dims=2);

In [46]:
using TransformVariables, TransformedLogDensities, LogDensityProblems, LogDensityProblemsAD,
    DynamicHMC, DynamicHMC.Diagnostics, Parameters, Statistics, Random

In [47]:
struct SampleBayesProblem
    nn
    X_min
    X_max
    Y_target
    sigma_hat
    nu
    alpha
end

In [59]:
function (problem::SampleBayesProblem)(θ)
    @unpack α = θ               # extract the parameters
    @unpack nn, X_min, X_max, Y_target, sigma_hat, nu, alpha = problem       # extract the data
    
    Y_pred = 10 .^ nn(α, true)
    r = Y_pred .- Y_target
    t = r ./ sigma_hat

    # StudentT distribution
    loglikelihood = sum(
        loggamma((nu + 1) / 2)
        - loggamma(nu / 2)
        .- log.(sqrt.(pi * nu) .* sigma_hat)
        .- (nu + 1) / 2.0 * log.(1 .+ 1.0 / nu .* t .^ 2)
    )
    # Beta prior
    X_bar = (α .- X_min) ./ (X_max - X_min)
    logprior = sum(
        (alpha_b - 1) * log.(X_bar) + (beta_b - 1) * log.(1 .- X_bar)
    )
    (alpha * loglikelihood + logprior)
end

In [60]:
nu = 1
alpha = 0.01
logp = SampleBayesProblem(models[1], X_min, X_max, Y_target, sigma_hat, nu, alpha)
logp((α = X_0,)) # make sure that it works

-1358.303183215249

In [74]:
trans = as((α = as𝕀,))
trans = as((α = TransformVariables.UnitVector, ))
P = TransformedLogDensity(trans, logp)
∇P = ADgradient(:Zygote, P)

LoadError: MethodError: no method matching as(::NamedTuple{(:α,), Tuple{DataType}})
[0mClosest candidates are:
[0m  as([91m::Type{Real}[39m, [91m::Real[39m, [91m::TransformVariables.Infinity{true}[39m) at ~/.julia/packages/TransformVariables/fpJuU/src/scalar.jl:167
[0m  as([91m::Type{Real}[39m, [91m::TransformVariables.Infinity{false}[39m, [91m::Real[39m) at ~/.julia/packages/TransformVariables/fpJuU/src/scalar.jl:169
[0m  as([91m::Type{Real}[39m, [91m::Real[39m, [91m::Real[39m) at ~/.julia/packages/TransformVariables/fpJuU/src/scalar.jl:171
[0m  ...

In [63]:
results = mcmc_with_warmup(Random.default_rng(), ∇P, 1000; 
    warmup_stages = default_warmup_stages(; M = Symmetric),
    initialization = (ϵ = 0.1,))

LoadError: MethodError: no method matching (::Dense{typeof(identity), Matrix{Float32}, Vector{Float32}})(::Float64)
[0mClosest candidates are:
[0m  (::Dense)([91m::AbstractVecOrMat[39m) at ~/.julia/packages/Flux/v79Am/src/layers/basic.jl:170
[0m  (::Dense)([91m::AbstractArray[39m) at ~/.julia/packages/Flux/v79Am/src/layers/basic.jl:175

In [None]:
summarize_tree_statistics(results.tree_statistics)

In [35]:
# Bayesian logistic regression (LR)
@model function bayes_nn(x, X_min, X_max, Y_target, sigma_hat, nu=1, alpha=0.01)

    mu ~ Normal(0, 0.1)
    x ~ MvNormal(mu, 0.5)
    Y_pred = 10 .^ models[1](X, true)
    r = Y_pred .- Y_target
    t = r ./ sigma_hat
    
    log_likelihood = sum(
        loggamma((nu + 1) / 2)
        - loggamma(nu / 2)
        .- log.(sqrt.(pi * nu) .* sigma_hat)
        .- (nu + 1) / 2.0 * log.(1 .+ 1.0 / nu .* t .^ 2)
    )

    X_bar = (X .- X_min) ./ (X_max - X_min)
    log_prior = sum(
        (alpha_b - 1) * log.(X_bar) + (beta_b - 1) * log.(1 .- X_bar)
    )

    log_prob = alpha * log_likelihood + log_prior

    
    return x
end;

beta_dist = Beta(alpha_b, beta_b)
X_prior = rand(beta_dist, n_parameters, 100000) .* (X_max - X_min) .+ X_min
X_0 = mean(X_prior, dims=2);

In [None]:
posterior = transform.(trans, eachcol(results.posterior_matrix))
posterior_α = first.(posterior)
mean(posterior_α)

In [None]:
function log_prob(X, X_min, X_max, Y_target, sigma_hat, nu=1, alpha=0.01)
    
    Y_pred = 10 .^ models[1](X, true)
    r = Y_pred .- Y_target
    t = r ./ sigma_hat
    
    log_likelihood = sum(
        loggamma((nu + 1) / 2)
        - loggamma(nu / 2)
        .- log.(sqrt.(pi * nu) .* sigma_hat)
        .- (nu + 1) / 2.0 * log.(1 .+ 1.0 / nu .* t .^ 2)
    )

    X_bar = (X .- X_min) ./ (X_max - X_min)
    log_prior = sum(
        (alpha_b - 1) * log.(X_bar) + (beta_b - 1) * log.(1 .- X_bar)
    )

    log_prob = alpha * log_likelihood + log_prior
    
    return -log_prob
end;

beta_dist = Beta(alpha_b, beta_b)
X_prior = rand(beta_dist, n_parameters, 100000) .* (X_max - X_min) .+ X_min
X_0 = mean(X_prior, dims=2);

In [52]:
?mcmc_with_warmup

search: [0m[1mm[22m[0m[1mc[22m[0m[1mm[22m[0m[1mc[22m[0m[1m_[22m[0m[1mw[22m[0m[1mi[22m[0m[1mt[22m[0m[1mh[22m[0m[1m_[22m[0m[1mw[22m[0m[1ma[22m[0m[1mr[22m[0m[1mm[22m[0m[1mu[22m[0m[1mp[22m



```julia
mcmc_with_warmup(
    rng,
    ℓ,
    N;
    initialization,
    warmup_stages,
    algorithm,
    reporter
)

```

Perform MCMC with NUTS, including warmup which is not returned. Return a `NamedTuple` of

  * `posterior_matrix`, a matrix of position vectors, indexes by `[parameter_index, draw_index]`
  * `tree_statistics`, a vector of tree statistics for each sample
  * `κ` and `ϵ`, the adapted metric and stepsize.

# Arguments

  * `rng`: the random number generator, eg `Random.GLOBAL_RNG`.
  * `ℓ`: the log density, supporting the API of the `LogDensityProblems` package
  * `N`: the number of samples for inference, after the warmup.

# Keyword arguments

  * `initialization`: see below.
  * `warmup_stages`: a sequence of warmup stages. See [`default_warmup_stages`](@ref) and [`fixed_stepsize_warmup_stages`](@ref); the latter requires an `ϵ` in initialization.
  * `algorithm`: see [`NUTS`](@ref). It is very unlikely you need to modify this, except perhaps for the maximum depth.
  * `reporter`: how progress is reported. By default, verbosely for interactive sessions using the log message mechanism (see [`LogProgressReport`](@ref), and no reporting for non-interactive sessions (see [`NoProgressReport`](@ref)).

# Initialization

The `initialization` keyword argument should be a `NamedTuple` which can contain the following fields (all of them optional and provided with reasonable defaults):

  * `q`: initial position. *Default*: random (uniform [-2,2] for each coordinate).
  * `κ`: kinetic energy specification. *Default*: Gaussian with identity matrix.
  * `ϵ`: a scalar for initial stepsize, or `nothing` for heuristic finders.

# Usage examples

Using a fixed stepsize:

```julia
mcmc_with_warmup(rng, ℓ, N;
                 initialization = (ϵ = 0.1, ),
                 warmup_stages = fixed_stepsize_warmup_stages())
```

Starting from a given position `q₀` and kinetic energy scaled down (will still be adapted):

```julia
mcmc_with_warmup(rng, ℓ, N;
                 initialization = (q = q₀, κ = GaussianKineticEnergy(5, 0.1)))
```

Using a dense metric:

```julia
mcmc_with_warmup(rng, ℓ, N;
                 warmup_stages = default_warmup_stages(; M = Symmetric))
```

Disabling the initial stepsize search (provided explicitly, still adapted):

```julia
mcmc_with_warmup(rng, ℓ, N;
                 initialization = (ϵ = 1.0, ),
                 warmup_stages = default_warmup_stages(; stepsize_search = nothing))
```


In [None]:
results = mcmc_with_warmup(Random.default_rng(), ∇P, 1000; initialization = (ϵ = 0.1, ))

In [None]:
logp_g(X) = logp_f(X, X_min, X_max, Y_target, sigma_hat)
pathfinder(logp_g, X_0, 100; ndraws_elbo=100);

In [64]:
methods(as)

In [67]:
TransformVariables.UnitVector

UnitVector

In [69]:
?TransformVariables.VectorTransform

```julia
abstract type VectorTransform <: TransformVariables.AbstractTransform
```

Transformation that transforms `<: AbstractVector`s to other values.

# Implementation

Implements [`transform`](@ref) and [`transform_and_logjac`](@ref) via [`transform_with`](@ref), and [`inverse`](@ref) via [`inverse!`](@ref).
