In [4]:
using GaussianProcesses, Plots, SumProductNetworks, StatsFuns, Distributions, StatsBase, DataFrames
import SumProductNetworks.add!

In [6]:
include("gpUtils.jl")

optimize2! (generic function with 1 method)

#### Data set

In [3]:
datapath = "../data/uci/energy.csv"

"../data/uci/energy.csv"

In [7]:
data = readtable("../data/uci/energy.csv");

In [164]:
X = Float64.(Array(data[[:X1, :X2, :X3, :X4, :X5, :X6, :X7, :X8]]));
X .-= minimum(X, 1)
X ./= maximum(X, 1);
X .-= mean(X, 1);


In [165]:
y = Array(data[[:Y1, :Y2]]);

y .-= mean(y, 1);

In [166]:
N = size(X, 1)

In [167]:
Ntrain = Int(ceil(N * 0.8))  # 1_000

srand(1234)

ids = collect(1:size(X, 1))
trainingIds = shuffle(ids)[1:Ntrain]
testingIds = shuffle(setdiff(ids, trainingIds));

In [168]:
Xtest = X[testingIds,:]
ytest = y[testingIds, :]

Xtrain = X[trainingIds, :]
ytrain = y[trainingIds, :];

In [169]:
mZero = MeanZero()
kern = SE(-1.0,0.0) 
logObsNoise = -1.0 

gp = GP(Xtrain', vec(ytrain[:,1]), mZero, kern, logObsNoise)
set_priors!(gp.k,[Normal(-2.0,4.0),Normal(-2.0,4.0)])

samples = mcmc(gp; nIter=100,burnin=0,thin=10);

BasicMCJob:
  Variable [1]: p (BasicContMuvParameter)
  GenericModel: 1 variables, 0 dependencies (directed graph)
  HMC sampler: number of leaps = 10, leap step = 0.1
  VanillaMCTuner: period = 100, verbose = false
  BasicMCRange: number of steps = 91, burnin = 0, thinning = 10

In [150]:
#Set the parameters to the posterior values the sample random function
fsamples = [];
for i in 1:size(samples,2)
    set_params!(gp, samples[:,i])
    update_target!(gp)
    μ, σ² = predict_y(gp, Xtest');
    push!(fsamples, μ)
end

In [151]:
μ = mean(reduce(hcat, fsamples), 2)

153×1 Array{Float64,2}:
 -7.79041
 12.1846 
  6.63563
 -8.71989
 -4.20884
 12.7829 
 -7.6357 
  2.82111
 17.2644 
 -9.74916
 13.2508 
 -8.76915
 10.7798 
  ⋮      
 -9.14533
 -6.8803 
 -4.88192
  1.89881
 18.1786 
 14.4113 
 15.1019 
 -9.9823 
 12.4038 
 -6.29291
 -8.18436
 -7.90336

In [209]:
mZero = MeanZero()
kern = SE(-1.0,0.0) 
logObsNoise = -1.0 

gp_1 = GP(Xtrain', vec(ytrain[:,1]), mZero, kern, logObsNoise)


μ, σ² = predict_y(gp_1, Xtrain');
#println(sqrt(mean((μ .- ytrain[:,1]).^2)))
#optimize2!(gp_1, mean = false, kern = true, noise = false, lik=false, lowerBound = -100)

μ, σ² = predict_y(gp_1, Xtrain');
println(sqrt(mean((μ .- ytrain[:,1]).^2)))

gp_2 = GP(Xtrain', vec(ytrain[:,2]), mZero, kern, logObsNoise)
#optimize2!(gp_2, mean = false, kern = true, noise = false, lik=false, lowerBound = -100)

1.0683756852622335


GP Exact object:
  Dim = 8
  Number of observations = 615
  Mean function:
    Type: GaussianProcesses.MeanZero, Params: Float64[]
  Kernel:
    Type: GaussianProcesses.SEIso, Params: [-1.0, 0.0]
  Input observations = 
[0.377315 -0.206019 … -0.206019 -0.344907; -0.368056 0.215278 … 0.215278 0.381944; … ; 0.0390625 -0.335938 … -0.335938 0.0390625; -0.1625 0.4375 … 0.0375 -0.1625]
  Output observations = [8.05224, -10.1978, -10.0478, 6.55224, -10.4478, 5.31224, -9.02776, -7.59776, -8.81776, -9.48776  …  -10.3178, -9.18776, -5.10776, 23.0022, -5.29776, 18.2722, 8.45224, -2.09776, -9.91776, -4.11776]
  Variance of observation noise = 0.1353352832366127
  Marginal Log-Likelihood = -8682.592

In [210]:
yhat = zeros(size(ytest))

μ, σ² = predict_y(gp_1, Xtest');
yhat[:,1] = μ

μ, σ² = predict_y(gp_2, Xtest');
yhat[:,2] = μ;

In [211]:
sqrt(mean((yhat .- ytest).^2))

## Learn a full GP

In [11]:
mZero = MeanZero()                   #Zero mean function
kern = SE(0.0,0.0)                   #Sqaured exponential kernel (note that hyperparameters are on the log scale)

Type: GaussianProcesses.SEIso, Params: [0.0, 0.0]


In [12]:
logObsNoise = -1.0                        # log standard deviation of observation noise (this is optional)

In [13]:
log(5)

In [170]:
gp = GP(reshape(X, 1, N), y, MeanZero(), SE(log(5.0),log(1.0)), -2.)       #Fit the GP

LoadError: [91mDimensionMismatch("new dimensions (1, 768) must be consistent with array size 6144")[39m

In [171]:
μ, σ² = predict_y(gp,linspace(minimum(X),maximum(X),100));

LoadError: [91mArgumentError: Gaussian Process object and input observations do not have consistent dimensions[39m

In [16]:
scatter(X, y, label = "observations", title = "Full GP with SE Kernel and LLH: $(gp.target)")
plot!(linspace(minimum(X),maximum(X),100), μ, ribbon=2*sqrt.(σ²), label = "full GP with 95% interval")

In [17]:
yhat = reduce(vcat, [rand(gp, linspace(minimum(X),maximum(X),100)) for k in 1:100]);
yhat += rand() * 0.2
size(yhat)

(10000,)

In [18]:
μ, σ = predict_y(gp,linspace(minimum(X),maximum(X),100));
yhat = reduce(hcat, [rand(gp, linspace(minimum(X),maximum(X),100)) for k in 1:100]);

scatter(X, y, label = "observations", title = "Full GP with SE Kernel and LLH: $(gp.target)")
plot!(reduce(hcat, [linspace(minimum(X),maximum(X),100) for k in 1:100]), yhat, markersize = 2, markerstrokewidth = 0, legend =false)
plot!(linspace(minimum(X),maximum(X),100), μ + 2*σ, color = :green, w = 4, label = "Full GP mean + 2*std")
plot!(linspace(minimum(X),maximum(X),100), μ - 2*σ, color = :green, w = 4, label = "Full GP mean - 2*std")
savefig("../plots/full_gp_draws.png")

## optimize a full GP

In [19]:
optimize!(gp)

Results of Optimization Algorithm
 * Algorithm: L-BFGS
 * Starting Point: [-2.0,1.6094379124341003,0.0]
 * Minimizer: [-1.2282746156639477,1.6156825479807913, ...]
 * Minimum: 3.510034e+01
 * Iterations: 12
 * Convergence: true
   * |x - x'| < 1.0e-32: false
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: false
   * |g(x)| < 1.0e-08: true
   * f(x) > f(x'): false
   * Reached Maximum Number of Iterations: false
 * Objective Function Calls: 56
 * Gradient Calls: 56

In [20]:
gp.target

In [21]:
μ, σ² = predict_y(gp,linspace(minimum(X),maximum(X),100));

In [22]:
scatter(X, y, label = "observations", title = "Marginal LLH optimized full GP with SE Kernel and LLH: $(gp.target)")
plot!(linspace(minimum(X),maximum(X),100), μ, ribbon=2*sqrt.(σ²), label = "optimized full GP with 95% interval")
savefig("../plots/optimized_fullGP.png")

# Make a SPN with GP leaves

In [23]:
global gID = 0

Hand-coded 2-layer example

In [24]:
srand(12345678)
K = 10;
numSamples = 0.25;
meanFunction = MeanZero();
kernelFunction = SE(log(5.0),log(1.0));
noise = -1.;

In [29]:
root = GPSumNode(nextID(), Int[]);

for k in 1:K
    
    # make random (1D) split
    split = rand() * maximum(X) + minimum(X)
    n = min(sum(X .>= split), sum(X .< split))
    c = 0
    
    while n < (N * numSamples)
        @assert c < 100 "Could not find a split"
        split = rand() * maximum(X) + minimum(X)
        n = min(sum(X .>= split), sum(X .< split))
        c += 1
    end
    
    child = FiniteSplitNode(nextID(), Float64[split])
    
    # append two GP leaves
    leaf1 = GPLeaf{Any}(nextID(),
            GP(reshape(X[X .<= split], 1, sum(X .<= split)), y[X .<= split], meanFunction, kernelFunction, noise))
    leaf1.parents = SPNNode[]
    
    leaf2 = GPLeaf{Any}(nextID(),
            GP(reshape(X[X .> split], 1, sum(X .> split)), y[X .> split], meanFunction, kernelFunction, noise))
    leaf2.parents = SPNNode[]
    
    add!(child, leaf1)
    add!(child, leaf2)
    
    add!(root, child)
end

## Visualize splits

In [30]:
plt = scatter(X, y, label = "observations", title = "Random splits of SPN-GP")
plotSplits!(plt, getAllSplits(root))
savefig("../plots/randomSplits.png")

## Likelihood of product nodes

In [38]:

for (pi, pnode) in enumerate(root.children)
    
    μ, σ² = spn_predict(pnode, linspace(minimum(X),maximum(X),100));
    
    plt = scatter(X, y, label = "observations", title ="Prediction of product node $(pi), w: $(root.posterior_weights[pi]), LLH: $(spn_likelihood(pnode))")
    plot!(plt, linspace(minimum(X),maximum(X),100), μ, ribbon=2*sqrt.(σ²), label = "MoE GP with 95% interval")
    plotSplits!(plt, getAllSplits(pnode))
    savefig("../plots/productNode$(pi).png")

end

LoadError: [91mUndefVarError: spn_predict not defined[39m

## Compute likelihoods

In [39]:
# test
n = GPLeaf{Any}(0, GP(reshape(X, 1, N), y, meanFunction, kernelFunction, noise))
spn_likelihood(n)

In [40]:
# test
n = root.children[3]
spn_likelihood(n)

In [41]:
# test
spn_likelihood(root)

## Update posterior weights

In [42]:
root.prior_weights = ones(length(root)) / length(root);

In [44]:
spn_update!(root)

In [45]:
println(root)

Gaussian Process Sum Node [ID: 32, 
	 w_prior: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
	 w_posterior: [0.109, 0.074, 0.023, 0.097, 0.126, 0.104, 0.237, 0.1, 0.097, 0.034]]


## Predictions under the model

In [46]:
# test
n = root.children[3]
μ, σ² = spn_predict(n, linspace(minimum(X),maximum(X),100));

plt = scatter(X, y, label = "observations", title ="Prediction of product node 3 only")
plot!(plt, linspace(minimum(X),maximum(X),100), μ, ribbon=2*sqrt.(σ²), label = "MoE GP with 95% interval")
plotSplits!(plt, getAllSplits(n))
savefig("../plots/productNode3.png")

LoadError: [91mUndefVarError: spn_predict not defined[39m

In [47]:
@assert sum(root.prior_weights) ≈ 1.
@assert sum(root.posterior_weights) ≈ 1.

In [48]:
# test
n = root
μ, σ² = spn_predict(n, linspace(minimum(X),maximum(X),100));

plt = scatter(X, y, label = "observations", title="SPN-GP with $(K) splits and LLH: $(spn_likelihood(root))")
plot!(plt, linspace(minimum(X),maximum(X),100), μ, ribbon=2*sqrt.(σ²), label = "SPN-GP with 95% interval")
plotSplits!(plt, getAllSplits(n))
savefig("../plots/spn_gp.png")

LoadError: [91mUndefVarError: spn_predict not defined[39m

## Print SPN

In [49]:
rDepth = depth(root)
for node in reverse(SumProductNetworks.order(root))
    print(repeat("\t", rDepth - depth(node)), " ", node, "\n")
end

LoadError: [91mUndefVarError: order not defined[39m

## Create SPN-GP with flexible depth

In [50]:
K = 5;
numSamples = 0.25;
minSamples = 40;
meanFunction = MeanZero();
kernelFunction = SE(log(5.0),log(1.0));
noise = -1.;
maximumDepth = 1

param = SPNParameters(K, numSamples, minSamples, kernelFunction, meanFunction, noise)

root = makeSumNode(X, y, param, 1, maximumDepth)

LoadError: [91mUndefVarError: SPNParameters not defined[39m

In [595]:
spn_update(root)

In [596]:
spn_likelihood(root)

In [570]:
root

Gaussian Process Sum Node [ID: 3051258, 
	 w_prior: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
	 w_posterior: [0.181, 0.028, 0.08, 0.134, 0.176, 0.016, 0.026, 0.071, 0.198, 0.09]]

In [571]:
depth(root)

In [572]:
sum(isa.(SumProductNetworks.order(root), SumNode))

In [525]:
# test
n = root
μ, σ² = spn_predict(n, linspace(minimum(X),maximum(X),100));


plt = scatter(X, y, label = "observations", title="SPN-GP with depth $(depth(root)) and $(K) splits and LLH: $(spn_likelihood(root))")
plot!(plt, linspace(minimum(X),maximum(X),100), μ, ribbon=2*sqrt.(σ²), label = "SPN-GP with 95% interval")
#plotSplits!(plt, getSplits(n, 1))
savefig("../plots/deep_spn_gp.png")

plt = scatter(X, y, label = "observations", title="SPN-GP with depth $(depth(root)) and $(K) splits and LLH: $(spn_likelihood(root))")
plot!(plt, linspace(minimum(X),maximum(X),100), μ, ribbon=2*sqrt.(σ²), label = "SPN-GP with 95% interval")
plotSplits!(plt, getSplits(n, 1))
savefig("../plots/deep_spn_gp_splits.png")

In [500]:
include("../../infinitesumproductnetworks/src/plotSPN.jl")

spn2graphviz (generic function with 1 method)

In [552]:
print(root)

Gaussian Process Sum Node [ID: 3035894, 
	 w_prior: [0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02], 
	 w_posterior: [0.03, 0.03, 0.026, 0.019, 0.029, 0.007, 0.014, 0.012, 0.022, 0.007, 0.007, 0.009, 0.032, 0.028, 0.006, 0.017, 0.016, 0.007, 0.016, 0.023, 0.032, 0.021, 0.018, 0.028, 0.019, 0.085, 0.006, 0.008, 0.029, 0.007, 0.006, 0.018, 0.029, 0.03, 0.03, 0.026, 0.02, 0.007, 0.007, 0.03, 0.027, 0.021, 0.018, 0.006, 0.034, 0.027, 0.008, 0.007, 0.022, 0.017]]

## some plots

#### Flat SPN

In [652]:
K = 10000;
numSamples = 0.25;
minSamples = 40;
meanFunction = MeanZero();
kernelFunction = SE(log(5.0),log(1.0));
noise = -1.;
maximumDepth = 1

param = SPNParameters(K, numSamples, minSamples, kernelFunction, meanFunction, noise)

root = makeSumNode(X, y, param, 1, maximumDepth)
spn_update(root)

In [None]:
for K in [5, 50, 500, 5_000, 10_000, 50_000]

    numSamples = 0.25;
    minSamples = 40;
    meanFunction = MeanZero();
    kernelFunction = SE(log(5.0),log(1.0));
    noise = -1.;
    maximumDepth = 3

    param = SPNParameters(K, numSamples, minSamples, kernelFunction, meanFunction, noise)

    root = makeSumNode(X, y, param, 1, maximumDepth)
    spn_update(root)

    S = 100

    x = linspace(minimum(X),maximum(X),100)
    xx = reduce(hcat, x for k in 1:S)
    yy = reduce(hcat, spn_rand(root, x) for k in 1:S)

    μ, σ = predict_y(gp,linspace(minimum(X),maximum(X),100));

    plt = scatter(X, y, label = "observations", legend = false)
    plot!(xx, yy, markersize = 2, markerstrokewidth = 0, label = "Posterior samples from SPN-GP");
    plot!(linspace(minimum(X),maximum(X),100), w =4, μ + 2*σ, color = :green, label = "Full GP mean + 2*std")
    plot!(linspace(minimum(X),maximum(X),100), w =4, μ - 2*σ, color = :green, label = "Full GP mean - 2*std")
    
    splits = [child.split[1] for child in children(root)]
    
    vline!([minimum(splits), maximum(splits)], color = :yellow, w = 4)

    plot(plt, size = (1024, 1024), title="SPN-GPs with differen $(K) splits")
    savefig("../plots/spn_gp_splits_$(K)_$(maximumDepth).png")
    
end

In [670]:
plot(
histogram([child.split[1] for child in children(root)], weightss = root.posterior_weights, title= "posterior weighted"), 
histogram([child.split[1] for child in children(root)], title = "prior over splits"))
savefig("../plots/split_weights.png")

#### Deep SPN

In [625]:
K = 5;
numSamples = 0.25;
minSamples = 40;
meanFunction = MeanZero();
kernelFunction = SE(log(5.0),log(1.0));
noise = -1.;
maximumDepth = 100

param = SPNParameters(K, numSamples, minSamples, kernelFunction, meanFunction, noise)

root = makeSumNode(X, y, param, 1, maximumDepth)
spn_update(root)

In [631]:
S = 100

x = linspace(minimum(X),maximum(X),100)
xx = reduce(hcat, x for k in 1:S)
yy = reduce(hcat, spn_rand(root, x) for k in 1:S)

μ, σ = predict_y(gp,linspace(minimum(X),maximum(X),100));

plt1 = scatter(X, y, label = "observations", legend = false)
plot!(xx, yy, markersize = 2, markerstrokewidth = 0, label = "Posterior samples from SPN-GP");
plot!(linspace(minimum(X),maximum(X),100), μ + 2*σ, color = :green, label = "Full GP mean + 2*std")
plot!(linspace(minimum(X),maximum(X),100), μ - 2*σ, color = :green, label = "Full GP mean - 2*std")

plt2 = scatter(X, y, label = "observations")
plot!(xx, yy, markersize = 2, markerstrokewidth = 0, label = "Posterior samples from SPN-GP");
plot!(linspace(minimum(X),maximum(X),100), μ + 2*σ, color = :green, label = "Full GP mean + 2*std")
plot!(linspace(minimum(X),maximum(X),100), μ - 2*σ, color = :green, label = "Full GP mean - 2*std")
plotSplits!(plt2, getAllSplits(root))

plot(plt1, plt2, size = (2024, 768), title="SPN-GP with depth $(depth(root) - 1) and $(K) splits per sum node (LLH: $(spn_likelihood(root)))")
savefig("../plots/spn_gp_deep.png")