In [None]:
import Pkg
Pkg.add("Turing")
Pkg.add("Flux")
Pkg.add("NCDatasets")
Pkg.add("TSVD")
Pkg.add("Statistics")
Pkg.add("Compat")
Pkg.add("LinearAlgebra")
Pkg.add("Glob")
Pkg.add("CSV")
Pkg.add("DataFrames")
Pkg.add("Distributions")
Pkg.add("ProgressMeter")
Pkg.add("PyPlot")
Pkg.add("Random")
Pkg.add("StatsPlots")
Pkg.add("SpecialFunctions")
Pkg.add("ReverseDiff")
Pkg.add("BSON")
Pkg.add("TransformVariables")
Pkg.add("TransformedLogDensities")
Pkg.add("LogDensityProblems")
Pkg.add("LogDensityProblemsAD")
Pkg.add("DynamicHMC")
Pkg.add("Parameters")
Pkg.add("TSVD")
Pkg.add("Glob")

In [1]:
using Flux
using Statistics
using LinearAlgebra
using Compat
using Glob
using TSVD
using NCDatasets
using CSV
using DataFrames
using Distributions: Categorical, Dirichlet, Gamma, Beta
using ProgressMeter
using PyPlot
using Random
using SpecialFunctions: loggamma
using ReverseDiff
using BSON: @load

In [2]:
obs_file ="../data/observed_speeds/greenland_vel_mosaic250_v1_g9000m.nc"
d_obs = NCDataset(obs_file)
v_obs = d_obs["velsurf_mag"][:]
v_obs = nomissing(v_obs, 0.0);
idx = findall(v_obs .> 0)
Obs = v_obs[idx];

n_grid_points = size(idx)[1];

In [3]:
training_files = sort(glob("../tests/training_data/*.nc"))

nf = length(training_files)
d = NCDataset(training_files[1], "r")
v = d["velsurf_mag"]
nx, ny, nt = size(v)

Data = zeros(n_grid_points, nf * nt)
ids = zeros(Int64, nf)
@showprogress for (k, training_file) in enumerate(training_files)
    m_id = match(r"id_(.+?)_", training_file)
    ids[k] = parse(Int, m_id[1])
    d = NCDataset(training_file, "r")
    v = d["velsurf_mag"][:]
    v = nomissing(v, 0.0)
    Data[:, k] = v[idx]
end

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:02[39m


## Read training samples

In [4]:
X_df = DataFrame(CSV.File("../data/samples/velocity_calibration_samples_50.csv"))
X_df = X_df[ [x in ids for x in X_df[!, :id]] ,:]
X = transpose(Matrix(X_df[!, 2:9]))
X_mean = mean(X, dims=2);
X_std = std(X, dims=2);
X_scaled = (X .- X_mean) ./ X_std;
X_train = X_scaled;
n_parameters, n_samples = size(X);

## Load the model

Should be a commmand line argument

That we have to define the struct again is not ok. How can we avoid this?

In [5]:
struct NNModel
    chain::Chain
    V_hat::AbstractArray
    F_mean::AbstractArray
end

function (m::NNModel)(x, add_mean=false)
    if add_mean
        return V_hat * m.chain(x) .+ F_mean
    else
        return V_hat * m.chain(x)
    end

end

In [107]:
@load "emulator_1.bson" model
# model = Flux.loadmodel!(model, @load("mymodel.bson"))

In [7]:
function get_eigenglaciers(omegas, F, q)
    
    F_mean = sum(F .* omegas, dims=2);
    F_bar = F .- F_mean;

    Z = diagm(sqrt.(omegas[1, :] * n_grid_points))
    U, S, V = tsvd(Z * transpose(F_bar), q);
    lamda = S.^2 / n_grid_points
    V_hat = V * diagm(sqrt.(lamda));
    
    return V_hat, F_bar, F_mean
end;

In [46]:
q = 50
F = log10.(Data)
F = replace!(F, -Inf=>0)

dirichlet_dist = Dirichlet(n_samples, 1)

model_index = 1
Random.seed!(model_index)
omegas = transpose(rand(dirichlet_dist, 1))
omegas_0 = omegas ./ size(omegas)[1];
    
V_hat, F_bar, F_mean = get_eigenglaciers(omegas, F, q);

In [47]:
alpha_b = 3;
beta_b = 3;
beta_dist = Beta(alpha_b, beta_b);
X_prior = rand(beta_dist, n_parameters, 100000);
X_0 = mean(X_prior, dims=2);

In [48]:
Y_target = log10.(Obs);
Y_target = replace!(Y_target, -Inf=>0);

In [49]:
grid_resolution = ones(n_grid_points) .* 9000
sigma = 10
rho = 1.0 / (1e4 .^ 2)
point_area = (grid_resolution) .^ 2
K = point_area .* rho
sigma_hat = sqrt.(sigma .^ 2 ./ K .^ 2)

X_min = minimum(X_scaled, dims=2);
X_max = maximum(X_scaled, dims=2);

In [50]:
using TransformVariables, TransformedLogDensities, LogDensityProblems, LogDensityProblemsAD,
    DynamicHMC, DynamicHMC.Diagnostics, Parameters, Statistics, Random

In [100]:
struct SampleBayesProblem
    nn
    X_min::AbstractArray
    X_max::AbstractArray
    Y_target::AbstractArray
    sigma_hat::AbstractArray
    nu::Int
    alpha::Float16
end

In [101]:
function (problem::SampleBayesProblem)(θ)
    @unpack α = θ               # extract the parameters
    @unpack nn, X_min, X_max, Y_target, sigma_hat, nu, alpha = problem       # extract the data
    
    Y_pred = 10 .^ nn(α, true)
    r = Y_pred .- Y_target
    t = r ./ sigma_hat

    # StudentT distribution
    loglikelihood = sum(
        loggamma((nu + 1) / 2)
        - loggamma(nu / 2)
        .- log.(sqrt.(pi * nu) .* sigma_hat)
        .- (nu + 1) / 2.0 * log.(1 .+ 1.0 / nu .* t .^ 2)
    )
    # Beta prior
    X_bar = relu((α .- X_min) ./ (X_max - X_min))
    logprior = sum(
        (alpha_b - 1) * log.(X_bar) + (beta_b - 1) * log.(1 .- X_bar) 
        .+ loggamma(alpha_b + beta_b) 
        .- loggamma(alpha_b)
        .- loggamma(beta_b)
    )
    return (alpha * loglikelihood + logprior)
end

In [102]:
nu = 1
alpha = 0.01


0.01

In [108]:
logp = SampleBayesProblem(model, X_min, X_max, Y_target, sigma_hat, nu, alpha)
logp((α = X_0,)) # make sure that it works

-1452.0127961516039

In [109]:
trans = as((α = as(Array, as𝕀, n_parameters),))
P = TransformedLogDensity(trans, logp)
∇P = ADgradient(:Zygote, P)

Zygote AD wrapper for TransformedLogDensity of dimension 8

In [52]:
using LineSearches
using Optim


In [166]:
function log_prior(X_bar, alpha_b, beta_b)
    sum(
        (alpha_b - 1) * log.(X_bar) + (beta_b - 1) * log.(1 .- X_bar)
        .+ loggamma(alpha_b + beta_b) 
        .- loggamma(alpha_b)
        .- loggamma(beta_b)
    )
end

function logp_g(α, nn, X_min, X_max, Y_target, sigma_hat, nu, alpha)
    
    Y_pred = 10 .^ nn(α, true)
    r = Y_pred .- Y_target
    t = r ./ sigma_hat

    # StudentT distribution
    loglikelihood = sum(
        loggamma((nu + 1) / 2)
        - loggamma(nu / 2)
        .- log.(sqrt.(pi * nu) .* sigma_hat)
        .- (nu + 1) / 2.0 * log.(1 .+ 1.0 / nu .* t .^ 2)
    )
    # Beta prior
    X_bar = (α .- X_min) ./ (X_max - X_min)
    insupport(X) = sum(X_bar .< 0) == 0
    println(insupport(X))
    logprior = insupport(X) ?  log_prior(X_bar, alpha_b, beta_b) : -Inf
    
    (alpha * loglikelihood + logprior)

end


ll(X_0) = logp_g(X_0, model, X_min, X_max, Y_target, sigma_hat, nu, alpha)
res = optimize(ll, X_0, LBFGS(linesearch = LineSearches.MoreThuente(), ), Optim.Options(show_trace=true, iterations = 51))
X_map = Optim.minimizer(res)

true
true
true
true
true
true
true
true
true
true
true
true
true
true
true
true
true
Iter     Function value   Gradient norm 
     0    -1.451702e+03     2.302032e+02
 * time: 1.9073486328125e-5
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
true


LoadError: DomainError with -0.015968561594892794:
log will only return a complex result if called with a complex argument. Try log(Complex(x)).

In [None]:
X_map .* X_std .+ X_mean

In [None]:
results = mcmc_with_warmup(Random.default_rng(), ∇P, 2; 
    initialization = (q = vec(X_map), ))

In [None]:
summarize_tree_statistics(results.tree_statistics)

In [None]:
posterior = transform.(trans, eachcol(results.posterior_matrix))
posterior_α = first.(posterior)
mean(posterior_α)

In [97]:
logp_p(x) = LogDensityProblems.logdensity(P, x)
∇logp_p(x) = LogDensityProblems.logdensity_and_gradient(∇P, x)[2]
result_pf = pathfinder(logp_p, ∇logp_p; dim=8)

LoadError: UndefVarError: pathfinder not defined

In [None]:
init_params = result_pf.draws[:, 1]
result_dhmc1 = mcmc_with_warmup(
    Random.GLOBAL_RNG,
    ∇P,
    1;
    initialization=(; q=init_params),
    reporter=NoProgressReport(),
)

In [None]:
Pkg.add("Optim")
using Optim

In [None]:
Pkg.add("StatsBase")
using StatsBase

In [None]:
Pkg.add("Pathfinder")
using Pathfinder

In [None]:
Pkg.add("AdvancedMH")
Pkg.add("MCMCChains")
using AdvancedMH
using MCMCChains

In [None]:
Pkg.add("StructArrays")
using LogDensityProblemsAD
using LogDensityProblems
using AdvancedMH
using Distributions
using MCMCChains
using ForwardDiff
using StructArrays



In [None]:
sample(∇P, logp, 100000; init_params=ones(2), chain_type=StructArray, param_names=["μ", "σ"])

In [None]:
a =log(Complex(-3))

In [None]:
real(a)

In [None]:
?real

In [None]:
using LinearAlgebra
using Distributions
Pkg.add("Arpack")
using Arpack

In [None]:


function mala(logdensity,gradient,h,M,niter,θinit)       
        function gradientStep(θ,t)                                                                                                                                                                                 
                θ-t*M*gradient(θ)                                                                                                                                                                                  
        end        
        print(θinit)
        θtrace = zeros(length(θinit),niter)
        #θtrace=Array{Float64}(length(θinit),niter)    
        θ=θinit
        θtrace[:,1]=θinit                                                                                                                                                                                          
        for i=2:niter                                                                                                                                                                                              
                θold=θ                                                                                                                                                                                             
                θ=rand(MvNormal(gradientStep(θ,0.5*h),h*M))                                                                                                                                                        
                d=logdensity(θ) - logdensity(θold) + logpdf(MvNormal(gradientStep(θ,0.5*h),h*M),θold) - logpdf(MvNormal(gradientStep(θold,0.5*h),h*M),θ)                                                           
                if(!(log(rand(Uniform(0,1)))<d))                                                                                                                                                                   
                        θ=θold                                                                                                                                                                                     
                end                                                                                                                                                                                                
                θtrace[:,i]=θ                                                                                                                                                                                      
        end                                                                                                                                                                                                        
        θtrace                                                                                                                                                                                                     
end  

In [60]:
ρ²=0.8                                                                                                                                                                                                             
Σ=[1 ρ²;ρ² 1]                                                                                                                                                                                                      
                                                                                                                                                                                                                   
function logdensity(θ)                                                                                                                                                                                             
        logpdf(MvNormal(Σ),θ)                                                                                                                                                                                      
end                                                                                                                                                                                                                
                                                                                                                                                                                                                   
function gradient(θ)                                                                                                                                                                                               
        Σ\θ                                                                                                                                                                                                        
end  
 

function Hinv
    
end


gradient((α= X_0),)
niter=1000                                                                                                                                                                                                         
h=1/eigs(inv(Σ),nev=1)[1][1]                                                                                                                                                                                       
#draws=mala(logp,gradient,h,I,niter,[5,50]);   #No preconditioning                                                                                                                                                                                                                                                                                                    
pdraws=mala(logp,gradient,h,Σ,niter, X_0);       #With Preconditioning                                                                                                                                                    

LoadError: error in method definition: function Zygote.gradient must be explicitly imported to be extended

In [None]:
mean(pdraws, dims=2)

In [None]:
logp_gg(X_0) = logp_g(X_0, model, X_min, X_max, Y_target, sigma_hat, nu, alpha)

In [None]:
Zygote.gradient(logp_gg, X_0)

In [None]:
g = Flux.gradient(logp_gg, X_0)
g[1]

In [None]:
?gradient

In [None]:
Pkg.add("Zygote")

In [None]:
using Zygote

In [160]:
function logpi(α, nn, X_min, X_max, Y_target, sigma_hat, nu, alpha)
    
    Y_pred = 10 .^ nn(α, true)
    r = Y_pred .- Y_target
    t = r ./ sigma_hat

    # StudentT distribution
    loglikelihood = sum(
        loggamma((nu + 1) / 2)
        - loggamma(nu / 2)
        .- log.(sqrt.(pi * nu) .* sigma_hat)
        .- (nu + 1) / 2.0 * log.(1 .+ 1.0 / nu .* t .^ 2)
    )
    # Beta prior
    X_bar = (α .- X_min) ./ (X_max - X_min)
    logprior = sum(
        (alpha_b - 1) * log.(X_bar) + (beta_b - 1) * log.(1 .- X_bar)
        .+ loggamma(alpha_b + beta_b) 
        .- loggamma(alpha_b)
        .- loggamma(beta_b)
    )

    insupport(X) = sum(X_bar .< 0) == 0
    insupport(X) ?  (alpha * loglikelihood + logprior) : -Inf

end

logpi (generic function with 2 methods)

In [64]:
llogpi(X_0) = logpi(X_0, model, X_min, X_max, Y_target, sigma_hat, nu, alpha)

llogpi (generic function with 1 method)

In [65]:
function get_log_like_gradient_and_hessian(X;  eps=1e-2, compute_hessian=false)
   log_pi =  llogpi(X)
    if compute_hessian
        g = Zygote.gradient(llogpi, X)
        H = Zygote.hessian(llogpi, X)
    end
end

get_log_like_gradient_and_hessian (generic function with 1 method)

In [66]:
get_log_like_gradient_and_hessian(X_0, compute_hessian=true)

LoadError: UndefVarError: Zygote not defined

In [67]:
nu = 1
alpha = 0.01

0.01

In [68]:
Flux.hessian(llogpi, X_0)

LoadError: ArgumentError: Sampler for this object is not defined

In [69]:
Flux.gradient(llogpi(), X_0)

LoadError: MethodError: no method matching llogpi()
[0mClosest candidates are:
[0m  llogpi([91m::Any[39m) at In[64]:1

In [None]:
?Zygote.hessian

In [None]:
f(x, y) = sum((x .- y) .^ 2)

In [None]:
Flux.gradient(f, [2, 1], [2, 0])

In [None]:
g(x, y, a, b) = sum((a .* x .- b .* y) .^ 2)

In [None]:
Flux.gradient(g, [2, 1], [2, 0], 1, 1)

In [None]:
Zygote.hessian(logpi, X_0, model, X_min, X_max, Y_target, sigma_hat, nu, alpha)[1]

In [159]:
function logpi(α, nn, X_min, X_max, Y_target, sigma_hat, nu, alpha)
    Y_pred = 10 .^ nn(α, true);
    r = Y_pred .- Y_target
    t = r ./ sigma_hat

    # StudentT distribution
    loglikelihood = sum(
        loggamma((nu + 1) / 2)
        - loggamma(nu / 2)
        .- log.(sqrt.(pi * nu) .* sigma_hat)
        .- (nu + 1) / 2.0 * log.(1 .+ 1.0 / nu .* t .^ 2)
    )
    
    println("Y_pred: ", mean(Y_pred))
    
    # Beta prior
    X_bar = (α .- X_min) ./ (X_max - X_min)
    logprior = sum(
        (alpha_b - 1) * log.(X_bar) + (beta_b - 1) * log.(1 .- X_bar)
        .+ loggamma(alpha_b + beta_b) 
        .- loggamma(alpha_b)
        .- loggamma(beta_b)
    )

    insupport(X) = sum(X_bar .< 0) == 0
    insupport(X) ?  (alpha * loglikelihood + logprior) : -Inf

end

lp = logpi(X_0, model, X_min, X_max, Y_target, sigma_hat, nu, alpha)
println("Log_prob: ", lp)
Flux.gradient(logpi, X_0, model, X_min, X_max, Y_target, sigma_hat, nu, alpha)[1]
Flux.withgradient(logpi, X_0, model, X_min, X_max, Y_target, sigma_hat, nu, alpha)[1]

Y_pred: 58.99657521835326
Log_prob: -1451.7018975352285
Y_pred: 99.79009360000074
Y_pred: 61.590387812293535


-1444.0742151359996

In [35]:
    Y_pred = 10 .^ model(X_0, true)
    r = Y_pred .- Y_target
    t = r ./ sigma_hat
    mean(t)



4.6942285259538075

In [None]:
function logpii(α, nn)
    Y_pred = 10 .^ nn(α, true)
    println(mean(Y_pred))
end


In [None]:
logpii(X_0, model)

In [None]:
model

In [22]:
?Flux.gradient

```
gradient(f, args...)
```

Returns a tuple containing `∂f/∂x` for each argument `x`, the derivative (for scalar `x`) or the gradient.

`f(args...)` must be a real number, see [`jacobian`](@ref) for array output.

See also [`withgradient`](@ref) to keep the value `f(args...)`, and [`pullback`](@ref) for value and back-propagator.

```jldoctest; setup=:(using Zygote)
julia> gradient(*, 2.0, 3.0, 5.0)
(15.0, 10.0, 6.0)

julia> gradient(x -> sum(abs2,x), [7.0, 11.0, 13.0])
([14.0, 22.0, 26.0],)

julia> gradient([7, 11], 0, 1) do x, y, d
         p = size(x, d)
         sum(x.^p .+ y)
       end
([14.0, 22.0], 2.0, nothing)
```

---

```
gradient(() -> loss(), ps::Params) -> Grads
```

Gradient with implicit parameters. Takes a zero-argument function, and returns a dictionary-like container, whose keys are arrays `x in ps`.

See also [`withgradient`](@ref) to keep the value `loss()`.

```jldoctest; setup=:(using Zygote)
julia> x = [1 2 3; 4 5 6]; y = [7, 8]; z = [1, 10, 100];

julia> g = gradient(Params([x, y])) do
         sum(x .* y .* z')
       end
Grads(...)

julia> g[x]
2×3 Matrix{Float64}:
 7.0  70.0  700.0
 8.0  80.0  800.0

julia> haskey(g, z)  # only x and y are parameters
false
```


In [57]:
real(log(Complex(-1)))

0.0

In [11]:
# Import the package.
using AdvancedMH
using Distributions
using MCMCChains
using ForwardDiff
using StructArrays

using LinearAlgebra

μ_true = 2
σ_true = 0.5
# Generate a set of data from the posterior we want to estimate.
data = rand(Normal(μ_true, σ_true), 100)

# Define the components of a basic model.
insupport(θ) = θ[2] >= 0
likelihood(θ) = Normal(θ[1], θ[2])
prior(θ) = Uniform(-10, 10)
density(θ) = insupport(θ) ? sum(logpdf.(likelihood(θ), data)) + sum(logpdf.(prior(θ), data)) : -Inf

# Construct a DensityModel.
dmodel = DensityModel(density)

# Set up the sampler with a multivariate Gaussian proposal.
σ² = 0.01
spl = MALA(x -> MvNormal((σ² / 2) .* x, σ² * I))

# Sample from the posterior.
chain = sample(dmodel, spl, 100000; init_params=ones(2), chain_type=StructArray, param_names=["μ", "σ"])

[32mSampling: 100%|█████████████████████████████████████████| Time: 0:00:00[39m


100000-element StructArray(::Vector{Float64}, ::Vector{Float64}, ::Vector{Float64}) with eltype NamedTuple{(:μ, :σ, :lp), Tuple{Float64, Float64, Float64}}:
 (μ = 1.0, σ = 1.0, lp = -455.3876282711064)
 (μ = 1.4279958663926915, σ = 1.2326476257668044, lp = -432.6222032600074)
 (μ = 1.6140884866437557, σ = 0.9854582772505509, lp = -412.70040952940894)
 (μ = 1.8752723351125504, σ = 0.9301501467333264, lp = -402.32850249450405)
 (μ = 1.8597574663454057, σ = 0.7416082786230347, lp = -390.3932035619456)
 (μ = 1.8597574663454057, σ = 0.7416082786230347, lp = -390.3932035619456)
 (μ = 2.031570902119432, σ = 0.5696234020560965, lp = -381.7442692222307)
 (μ = 2.031570902119432, σ = 0.5696234020560965, lp = -381.7442692222307)
 (μ = 2.031570902119432, σ = 0.5696234020560965, lp = -381.7442692222307)
 (μ = 2.031570902119432, σ = 0.5696234020560965, lp = -381.7442692222307)
 (μ = 2.031570902119432, σ = 0.5696234020560965, lp = -381.7442692222307)
 (μ = 2.031570902119432, σ = 0.5696234020560965, lp

In [118]:
using LogDensityProblemsAD
#model_with_ad = LogDensityProblemsAD.ADgradient(Val(:ForwardDiff), ∇P)
sample(logp, spl, 100000; init_params=X_0, chain_type=StructArray)

LoadError: ArgumentError: the log density function does not support the LogDensityProblems.jl interface. Please implement the interface or provide a model of type `AbstractMCMC.AbstractModel`

In [114]:
trans = as((α = as(Array, as𝕀, n_parameters),))
P = TransformedLogDensity(trans, logp)
∇P = ADgradient(:Zygote, P)

Zygote AD wrapper for TransformedLogDensity of dimension 8

In [5]:
help logpdf

LoadError: syntax: extra token "logpdf" after end of expression

In [93]:
chain = psample(model, RWMH(init_params), 100000, 4; param_names=["μ","σ"], chain_type=Chains)

LoadError: UndefVarError: psample not defined

In [6]:
?logpdf

search: [0m[1ml[22m[0m[1mo[22m[0m[1mg[22m[0m[1mp[22m[0m[1md[22m[0m[1mf[22m [0m[1ml[22m[0m[1mo[22m[0m[1mg[22m[0m[1mp[22m[0m[1md[22m[0m[1mf[22m! grad[0m[1ml[22m[0m[1mo[22m[0m[1mg[22m[0m[1mp[22m[0m[1md[22m[0m[1mf[22m componentwise_[0m[1ml[22m[0m[1mo[22m[0m[1mg[22m[0m[1mp[22m[0m[1md[22m[0m[1mf[22m



```
logpdf(d::Distribution{ArrayLikeVariate{N}}, x::AbstractArray{<:Real,N}) where {N}
```

Evaluate the probability density function of `d` at `x`.

This function checks if the size of `x` is compatible with distribution `d`. This check can be disabled by using `@inbounds`.

# Implementation

Instead of `logpdf` one should implement `_logpdf(d, x)` which does not have to check the size of `x`.

See also: [`pdf`](@ref).

---

```
logpdf(d::Distribution{ArrayLikeVariate{N}}, x) where {N}
```

Evaluate the logarithm of the probability density function of `d` at every element in a collection `x`.

This function checks for every element of `x` if its size is compatible with distribution `d`. This check can be disabled by using `@inbounds`.

Here, `x` can be

  * an array of dimension `> N` with `size(x)[1:N] == size(d)`, or
  * an array of arrays `xi` of dimension `N` with `size(xi) == size(d)`.

---

```
logpdf(d::UnivariateDistribution, x::Real)
```

Evaluate the logarithm of probability density (mass) at `x`.

See also: [`pdf`](@ref).

---

```
logpdf(d::Union{UnivariateMixture, MultivariateMixture}, x)
```

Evaluate the logarithm of the (mixed) probability density function over `x`. Here, `x` can be a single sample or an array of multiple samples.


In [13]:
using Pkg
Pkg.add("AbstractMCMC")
using AbstractMCMC

[33m[1m│ [22m[39m  exception = Could not resolve host: pkg.julialang.org while requesting https://pkg.julialang.org/registries
[33m[1m└ [22m[39m[90m@ Pkg.Registry /opt/local/var/macports/build/_opt_bblocal_var_buildworker_ports_build_ports_lang_julia/julia/work/julia-1.8.4/usr/share/julia/stdlib/v1.8/Pkg/src/Registry/Registry.jl:68[39m
[32m[1m    Updating[22m[39m registry at `~/.julia/registries/General.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Manifest.toml`


In [14]:
?LogDensityProblemsAD

search:

Couldn't find [36mLogDensityProblemsAD[39m
Perhaps you meant DensityModel


No documentation found.

Binding `LogDensityProblemsAD` does not exist.


In [12]:
?LogDensityProblems

search:

Couldn't find [36mLogDensityProblems[39m
Perhaps you meant DensityModel


No documentation found.

Binding `LogDensityProblems` does not exist.


In [122]:
X_bar

8×1 Matrix{Float64}:
 0.6483565433127019
 0.647910676617902
 0.6273077544435656
 0.6478926462889346
 0.6505437104220909
 0.6353471420662554
 0.6483217215407353
 0.63814507171178

In [126]:
a = [2.5, 3, -1, 3]

4-element Vector{Float64}:
  2.5
  3.0
 -1.0
  3.0

In [157]:
function logpi(α, nn, X_min, X_max, Y_target, sigma_hat; nu=1, alpha=0.01, alpha_b=3, beta_b=3)
    
    Y_pred = 10 .^ nn(α, true)
    r = Y_pred .- Y_target
    t = r ./ sigma_hat

    # StudentT distribution
    loglikelihood = sum(
        loggamma((nu + 1) / 2)
        - loggamma(nu / 2)
        .- log.(sqrt.(pi * nu) .* sigma_hat)
        .- (nu + 1) / 2.0 * log.(1 .+ 1.0 / nu .* t .^ 2)
    )
    # Beta prior
    X_bar = (α .- X_min) ./ (X_max - X_min)
    logprior = sum(
        (alpha_b - 1) * log.(X_bar) + (beta_b - 1) * log.(1 .- X_bar)
        .+ loggamma(alpha_b + beta_b) 
        .- loggamma(alpha_b)
        .- loggamma(beta_b)
    )

    insupport(X) = sum(X_bar .< 0) == 0
    insupport(X) ?  (alpha * loglikelihood + logprior) : -Inf
    
end

logpi (generic function with 2 methods)

In [151]:
function bar(X, X_min, X_max)
    (X .- X_min) ./ (X_max .- X_min)
end

insupport(X) = sum(bar(X, X_min, X_max) .< 0) == 0
density(X) = insupport(X) ? logpi(X, model, X_min, X_max, Y_target, sigma_hat) : -Inf

density (generic function with 1 method)

In [158]:
logpi(X_0, model, X_min, X_max, Y_target, sigma_hat)

-1451.7018975352285

In [153]:
# Construct a DensityModel.
dmodel = DensityModel(density)

# Set up the sampler with a multivariate Gaussian proposal.
H = Flux.hessian(logpi)
σ² = 1 / H
spl = MALA(x -> MvNormal((σ² / 2) .* x, σ² * I))

# Sample from the posterior.
chain = sample(dmodel, spl, 100000; init_params=X_0, chain_type=StructArray)

LoadError: MethodError: no method matching hessian(::typeof(logpi))
[0mClosest candidates are:
[0m  hessian(::Any, [91m::Any[39m) at ~/.julia/packages/Zygote/g2w9o/src/lib/grad.jl:62

In [154]:
using Flux

In [156]:
?Flux.hessian

```
hessian(f, x)
```

Construct the Hessian `∂²f/∂x²`, where `x` is a real number or an array, and `f(x)` is a real number. When `x` is an array, the result is a matrix `H[i,j] = ∂²f/∂x[i]∂x[j]`, using linear indexing `x[i]` even if the argument is higher-dimensional.

This uses forward over reverse, ForwardDiff over Zygote, calling `hessian_dual(f, x)`. See [`hessian_reverse`](@ref) for an all-Zygote alternative.

See also [`diaghessian`](@ref) to compute only the diagonal part.

# Examples

```jldoctest; setup=:(using Zygote)
julia> hessian(x -> x[1]*x[2], randn(2))
2×2 Matrix{Float64}:
 0.0  1.0
 1.0  0.0

julia> hessian(x -> sum(x.^3), [1 2; 3 4])  # uses linear indexing of x
4×4 Matrix{Int64}:
 6   0   0   0
 0  18   0   0
 0   0  12   0
 0   0   0  24

julia> hessian(sin, pi/2)
-1.0
```


In [16]:
### A Pluto.jl notebook ###
# v0.19.24

import Pkg; 
Pkg.add("DynamicHMCModels")
using Markdown
using InteractiveUtils

# ╔═╡ c0452572-c9ba-4833-b22a-49c0889b16b2
using Pkg

# ╔═╡ a8c916a9-d464-4fe2-9b6b-ab61308bffed
Pkg.activate(expanduser("~/.julia/dev/DynamicHMCModels"))

# ╔═╡ e5a5c94e-402c-48a8-b573-5b5c877dba69
begin
	using DynamicHMCModels
	using BenchmarkTools
	using RegressionAndOtherStories
end

# ╔═╡ c565bfd9-b5d2-4e50-9527-c8df52579858
md" ## Linear regression example"

# ╔═╡ 2cf37bf9-9412-42cf-a524-0041581b48f9
html"""
<style>
	main {
		margin: 0 auto;
		max-width: 3500px;
    	padding-left: max(10px, 5%);
    	padding-right: max(10px, 5%);
	}
</style>
"""

# ╔═╡ c897fe4c-4d26-40d0-9338-48022c7044bd
md" ### Estimate simple linear regression model with a half-T prior."

# ╔═╡ b9e2f55a-f809-4467-b617-b292e12b55c3
begin
	# A structure to hold the data: observables, covariates, and the degrees of freedom for the prior.

	"""
	Linear regression model ``y ∼ Xβ + ϵ``, where ``ϵ ∼ N(0, σ²)`` IID.
	Weakly informative prior for `β`, half-T for `σ`.
	"""
	struct LinearRegressionProblem{TY <: AbstractVector, TX <: AbstractMatrix, Tν <: Real}
	    "Observations."
	    y::TY
	    "Covariates"
	    X::TX
	    "Degrees of freedom for prior."
	    ν::Tν
	end
	
	# Make the type callable with the parameters *as a single argument*.

	function (problem::LinearRegressionProblem)(θ)
	    @unpack y, X, ν = problem                    # extract the data
	    @unpack β, σ = θ                             # works on the named tuple too
	    ϵ_distribution = Normal(0, σ)                # the error term
		                                             # likelihood for error
	    ℓ_error = mapreduce((y, x) -> logpdf(ϵ_distribution, y - dot(x, β)), +, y, eachrow(X)) 
	    ℓ_σ = logpdf(TDist(ν), σ)                    # prior for σ
	    ℓ_β = loglikelihood(Normal(0, 10), β)        # prior for β
	    ℓ_error + ℓ_σ + ℓ_β
	end
end

# ╔═╡ 52bcf291-bae0-484d-83ac-2b72487584c9
# Make up random data and test the function runs.

begin
	N = 100
	X = hcat(ones(N), randn(N, 2));
	β = [1.0, 2.0, -1.0]
	σ = 0.5
	y = X*β .+ randn(N) .* σ;
	p = LinearRegressionProblem(y, X, 1.0);
	p((β = β, σ = σ))
end

# ╔═╡ 404098e3-543c-492e-8bff-0ac0c770dd3e
md" ##### It is usually a good idea to benchmark and optimize your log posterior code at this stage. Above, we have carefully optimized allocations away using `mapreduce`."

# ╔═╡ c3294c81-72a3-4435-8019-b0285ad33f6d
@btime p((β = $β, σ = $σ))

# ╔═╡ 21f09380-9554-47b3-b8b8-d04b6fc7260e
md" ##### For this problem, we write a function to return the transformation (as it varies with the number of covariates)."

# ╔═╡ 14d716cb-6c14-45c0-8ef3-0065b2076b57
function problem_transformation(p::LinearRegressionProblem)
    as((β = as(Array, size(p.X, 2)), σ = asℝ₊))
end

# ╔═╡ f825e1bb-13d1-42f5-9808-de29b85604a2
md" ##### Wrap the problem with a transformation, then use ForwardDiff for the gradient."

# ╔═╡ 8519b5bd-797a-4513-82b3-1cfc072db755
t = problem_transformation(p)

# ╔═╡ 70418813-3ea7-4143-9804-3a98f3f682dc
P = TransformedLogDensity(t, p)

# ╔═╡ 275a0030-11ef-4533-b412-4ac82a8c795f
∇P = ADgradient(:ForwardDiff, P);

# ╔═╡ ce7f9c63-6f71-480b-ad5b-95dae7f08dac
md" ##### Sample from the posterior. `results` holds the chain, positions, diagnostic information, and the tuned sampler (which would allow continuation of sampling)."

# ╔═╡ 3b727805-8d0a-4d72-8b8c-7135095e1ff5
results = map(_ -> mcmc_with_warmup(Random.default_rng(), ∇P, 1000), 1:5)

# ╔═╡ 83bef086-3538-4ae8-ac10-778fb1d0ce30
md" ##### We use the transformation to obtain the posterior from the chain."

# ╔═╡ 0d3a5d33-e49f-4a41-b190-a44ca915a1a4
posterior = TransformVariables.transform.(t, eachcol(pool_posterior_matrices(results)))

# ╔═╡ 7a8ec9c6-2f00-450c-8d5a-2754f8e443d9
md" ##### Extract the parameter posterior means: `β`."

# ╔═╡ 11d815b2-8375-4229-970e-f66187b4d014
posterior_β = mean(first, posterior)

# ╔═╡ 4c790100-a9c9-452b-9e6a-6d9147a8d807
md" ##### then `σ`:"

# ╔═╡ bfd42f7a-6d73-45cc-8112-c44749c2c1e7
posterior_σ = mean(last, posterior)

# ╔═╡ fee2b723-23b7-45d3-add4-6657396a389b
md" ##### Effective sample sizes (of untransformed draws)"

# ╔═╡ 2eeecb48-f659-4a26-89b1-0b16d64450ec
ess, R̂ = ess_rhat(stack_posterior_matrices(results))

# ╔═╡ e3c07642-86f0-4d5c-b9e3-dc4f59731604
md" ##### Summarize NUTS-specific statistics of all chains"

# ╔═╡ 2580659d-1186-481b-a1e7-d0f21cf08d94
summarize_tree_statistics(mapreduce(x -> x.tree_statistics, vcat, results))

# ╔═╡ Cell order:
# ╟─c565bfd9-b5d2-4e50-9527-c8df52579858
# ╠═2cf37bf9-9412-42cf-a524-0041581b48f9
# ╠═c0452572-c9ba-4833-b22a-49c0889b16b2
# ╠═a8c916a9-d464-4fe2-9b6b-ab61308bffed
# ╟─c897fe4c-4d26-40d0-9338-48022c7044bd
# ╠═e5a5c94e-402c-48a8-b573-5b5c877dba69
# ╠═b9e2f55a-f809-4467-b617-b292e12b55c3
# ╠═52bcf291-bae0-484d-83ac-2b72487584c9
# ╟─404098e3-543c-492e-8bff-0ac0c770dd3e
# ╠═c3294c81-72a3-4435-8019-b0285ad33f6d
# ╟─21f09380-9554-47b3-b8b8-d04b6fc7260e
# ╠═14d716cb-6c14-45c0-8ef3-0065b2076b57
# ╟─f825e1bb-13d1-42f5-9808-de29b85604a2
# ╠═8519b5bd-797a-4513-82b3-1cfc072db755
# ╠═70418813-3ea7-4143-9804-3a98f3f682dc
# ╠═275a0030-11ef-4533-b412-4ac82a8c795f
# ╟─ce7f9c63-6f71-480b-ad5b-95dae7f08dac
# ╠═3b727805-8d0a-4d72-8b8c-7135095e1ff5
# ╟─83bef086-3538-4ae8-ac10-778fb1d0ce30
# ╠═0d3a5d33-e49f-4a41-b190-a44ca915a1a4
# ╟─7a8ec9c6-2f00-450c-8d5a-2754f8e443d9
# ╠═11d815b2-8375-4229-970e-f66187b4d014
# ╟─4c790100-a9c9-452b-9e6a-6d9147a8d807
# ╠═bfd42f7a-6d73-45cc-8112-c44749c2c1e7
# ╟─fee2b723-23b7-45d3-add4-6657396a389b
# ╠═2eeecb48-f659-4a26-89b1-0b16d64450ec
# ╟─e3c07642-86f0-4d5c-b9e3-dc4f59731604
# ╠═2580659d-1186-481b-a1e7-d0f21cf08d94

[32m[1m   Resolving[22m[39m package versions...
[32m[1m   Installed[22m[39m BitTwiddlingConvenienceFunctions ─ v0.1.5
[32m[1m   Installed[22m[39m GPUArraysCore ──────────────────── v0.1.4
[32m[1m   Installed[22m[39m ArrayInterfaceStaticArrays ─────── v0.1.5
[32m[1m   Installed[22m[39m MCMCDiagnostics ────────────────── v0.3.0
[32m[1m   Installed[22m[39m Adapt ──────────────────────────── v3.5.0
[32m[1m   Installed[22m[39m SIMDTypes ──────────────────────── v0.1.0
[32m[1m   Installed[22m[39m TransformVariables ─────────────── v0.3.12
[32m[1m   Installed[22m[39m Rmath ──────────────────────────── v0.7.1
[32m[1m   Installed[22m[39m CpuId ──────────────────────────── v0.3.1
[32m[1m   Installed[22m[39m StatsFuns ──────────────────────── v0.9.18
[32m[1m   Installed[22m[39m LayoutPointers ─────────────────── v0.1.13
[32m[1m   Installed[22m[39m OffsetArrays ───────────────────── v1.12.9
[32m[1m   Installed[22m[39m VectorizationBase ──────

LoadError: failed to clone from https://github.com/JuliaCI/BenchmarkTools.jl.git, error: GitError(Code:ERROR, Class:Net, failed to resolve address for github.com: nodename nor servname provided, or not known)