In [1]:
using LatinHypercubeSampling
using Surrogates
using Interact, Plots
using LinearAlgebra
import Random
using MLBase
using LaTeXStrings

include("gridsamp.jl")
include("surrogate.jl")

using .sampling
using .surrogate_models

## The samlping effort of Latin hypercube and full-factorial sampling

In [2]:
ui = @manipulate throttle=.1 for n=3:1:100
    lb = [-2.0,-2.0]
    ub = [2.0,2.0]

    Random.seed!(1234)
    plan, _ = LHCoptim(n,2,10);
    lhs = scaleLHC(plan,[(lb[1],ub[1]),(lb[2],ub[2])])
    p1 = plot(lhs[:,1], lhs[:,2], seriestype = :scatter, title = "Latin hypercube sampling")
    xlabel!("x1", xguidefontsize=8)
    ylabel!("x2", yguidefontsize=8)

    bounds = Matrix{Float64}(undef,2,2)
    bounds[1,:] = lb
    bounds[2,:] = ub

    n_levels = [Int(round(sqrt(n))),Int(round(sqrt(n)))]
    full_fact = gridsamp(bounds,n_levels)
    p2 = plot(full_fact[:,1], full_fact[:,2], seriestype = :scatter, title = "Full factorial sampling")
    xlabel!("x1", xguidefontsize=8)
    
    plot(p1, p2, size = (750, 400), layout = grid(1, 2, widths=[0.5 ,0.5]), legend = false)
end

## The effect of reguralization $r$ and spread $\lambda$ on $\hat{y}(\mathbf{x})$

In [37]:
function test_fun(x::Float64)
    -(1.4 - 3.0 * x) * sin(18.0 * x) + 2
end

lb = [0.0]
ub = [1.2]
n_samples = 10

bounds_s = Matrix{Float64}(undef,2,1)
bounds_s[1,:] = lb
bounds_s[2,:] = ub
ns = [n_samples]
Xs = gridsamp(bounds_s,ns);

# training data
X = collect(Xs)
Y = test_fun.(X)

ui = @manipulate throttle=.1 for λ=slider(0:0.0001:1, value=0.5884), r=slider(0:0.001:1, value=0.0)
    
    r = 10 ^ (log10(1+1e-1)*r) - 1 # log scale
    λ = 10 ^ ((1+log10(500))*λ - 1) # log scale
    
    # test function plot
    x_p = collect(reshape(LinRange(0,1.2,100),100,1))
    p1 = plot(Xs, test_fun, st=:scatter, label="samples" )
    plot!(x_p, test_fun, st=:line, label=L"f(x)",legend=:topleft)
    xlabel!("x")
    ylabel!("y")
    
    # train the model
    m = Surrogate(X=X , Y=Y, type="RBF", r=r, λ=λ, kernel=Guassian, name="myRBF")
    train(m.model)

    # Predictions
    y_p = surrogate_models.predict(m.model,x_p)
    plot!(x_p,y_p, label=L"\hat{f}(x)")
    annotate!(0.5, 4.0, text("r: $(round(r, sigdigits=3)), λ: $(round(λ, sigdigits=3))", :black, :left, 12))
    
    B = basis(m.model.λ,x_p,m.model.X,m.model.kernel)'
    p2 = plot()
    for i = 1:size(B,1)
        plot!(x_p,B[i,:],label=missing)
        xlabel!("x")
        ylabel!(L"\phi (\lambda \left||{\mathbf{\zeta} -\mathbf{x} _{i}}\right||)")
    end
    plot(p1, p2, size = (650, 600), layout = grid(2, 1, widths=[1.0 ,1.0], heights=[0.65 ,0.3]))
end

## Effect of reguralization on ill-conditioned design matrices

In [27]:
using DelimitedFiles

# read the data sets
data = readdlm("sample_data_1.csv", ',', Float64, skipstart=1)
X_data_1 = data[:,3:4]
Y_data_1 = data[:,2:2]

data = readdlm("sample_data_2.csv", ',', Float64, skipstart=1)
X_data_2 = data[:,3:4]
Y_data_2 = data[:,2:2]


ridge = 0.0
degree = 1

# plot bounds
lb = [1 20]
ub = [12 30]

bounds_reg = Matrix{Float64}(undef,2,2)
bounds_reg[1,:] = lb
bounds_reg[2,:] = ub

n_grids = 200
n_reg = [n_grids,n_grids]
X_reg = gridsamp(bounds_reg,n_reg)
x_reg = LinRange(bounds_reg[1,1],bounds_reg[2,1],n_grids)
y_reg = LinRange(bounds_reg[1,2],bounds_reg[2,2],n_grids)


ui = @manipulate throttle=.5 for d=slider(1:1:16, value=1), r=slider(0:0.001:1, value=0.0)

    r = 10 ^ (log10(10)*r) - 1 # log scale
    
    # train models
    ls1 = Surrogate(X=X_data_1, Y=Y_data_1, type="LS", r=r, d=d)
    surrogate_models.train(ls1.model);

    ls2 = Surrogate(X=X_data_2, Y=Y_data_2, type="LS", r=r, d=d)
    surrogate_models.train(ls2.model);

    # Predictions
    y_hat_1 = surrogate_models.predict(ls1.model,X_reg)
    y_hat_2 = surrogate_models.predict(ls2.model,X_reg)

    # Surface plot
    p1 = plot(y_reg, x_reg, vec(y_hat_1),st=:surface,camera=(30,40), zlabel=L"\hat{y}(\mathbf{x})", legend = :none)
    scatter3d!(X_data_1[:,2], X_data_1[:,1], vec(Y_data_1), label = "samples", markersize=2);
    xlabel!("x2", xguidefontsize=8)
    ylabel!("x1", yguidefontsize=8)

    # Surface plot
    p2 = plot(y_reg, x_reg, vec(y_hat_2),st=:surface,camera=(30,40), zlabel=L"\hat{y}(\mathbf{x})", legend = :none)
    scatter3d!(X_data_2[:,2], X_data_2[:,1], vec(Y_data_2), label = "samples", markersize=2);
    xlabel!("x2", xguidefontsize=8)
    ylabel!("x1", yguidefontsize=8)

    # combined plot
    title = plot(title = "ridge = $(round(r, sigdigits=3)), degree = $d", grid = false, showaxis = false, bottom_margin = -50Plots.px, ticks = false)
    plot(title, p1, p2, size = (750, 400), layout = @layout([A{0.01h}; [B C]]))
end

In [18]:
# convert 2D matrix rows to Tuples
function toTuple(A::Array{T,2}) where {T<:Any}
    return [tuple(A[c,:]...) for c in 1:size(A,1)]
end

function rosenbrock(x)
    z = (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2
end

rosenbrock (generic function with 1 method)

## The effect of number of data points $n$ and spread $\theta$ on the Kriging predictor

In [28]:
# plotting
lb = [-2.0,-1.0]
ub = [2.0,3.0]

bounds_s = Matrix{Float64}(undef,2,2)
bounds_s[1,:] = lb
bounds_s[2,:] = ub
n_grids = 30
ns = [n_grids,n_grids]
Xs = gridsamp(bounds_s,ns)
xs = LinRange(bounds_s[1,1],bounds_s[2,1],n_grids)
ys = LinRange(bounds_s[1,2],bounds_s[2,2],n_grids)

ui = @manipulate throttle=.05 for n=10:10:100, θ=1e-4:5e-3:5e-2

    Random.seed!(1234)
    plan, _ = LHCoptim(n,2,5);
    lhs = scaleLHC(plan,[(lb[1],ub[1]),(lb[2],ub[2])])

    z = mapslices(rosenbrock,lhs; dims=2); # evaluate LHS samples using Rosenbrock
#     z = Matrix{Float64}(undef,size(Xs,1),1)
#     for i = 1:size(Xs,1)
#         z[i,:] = [rosenbrock(Xs[i,:])]
#     end
    
    # train the model
    kriging_surrogate = Kriging(toTuple(lhs), vec(z), lb, ub, p=[2.0, 2.0], theta=[θ, θ])

    # Predictions
    zs = kriging_surrogate.(toTuple(Xs))

    # contour plot
    p1 = contour(ys, xs, vec(zs), fill = false, levels = 100, c = :jet1)
    scatter!(lhs[:,2], lhs[:,1], seriestype = :scatter, label = "samples");
    xlabel!("x2", xguidefontsize=8)
    ylabel!("x1", yguidefontsize=8)
    
    # Surface plot
    p2 = plot(ys, xs, vec(zs),st=:surface,camera=(50,40), label = "prediction", legend = :none)
    scatter3d!(lhs[:,2], lhs[:,1], vec(z), label = "samples", markersize=3);
    xlabel!("x2", xguidefontsize=8)
    ylabel!("x1", yguidefontsize=8)
    
    # combined plot
    plot(p1, p2, size = (750, 400), layout = grid(1, 2, widths=[0.5 ,0.5]))
end