In [36]:
using GaussianProcesses, Plots, SumProductNetworks, StatsFuns, Distributions, StatsBase
import SumProductNetworks.add!

#### Include own implementations

In [37]:
include("utilFunctions.jl")
include("dataTypes.jl")
include("dataTypeUtilFunctions.jl")
include("computationFunctions.jl")

spn_posterior (generic function with 3 methods)

#### Data set

In [3]:
gr(size = (1024, 768))

Plots.GRBackend()

In [4]:
datapath = "../data/clean/motor.csv"

"../data/clean/motor.csv"

In [5]:
(data, header) = readcsv(datapath, header = true)

([2.4 0.0 1.0 3.7; 2.6 -1.3 1.0 3.7; … ; 55.4 -2.7 3.0 138.0; 57.6 10.7 3.0 138.0], AbstractString["times" "accel" "strata" "v"])

In [6]:
headerDict = Dict(col[2] => col[1] for col in enumerate(header))

Dict{SubString{String},Int64} with 4 entries:
  "v"      => 4
  "times"  => 1
  "accel"  => 2
  "strata" => 3

In [7]:
X = convert(Vector, data[:,headerDict["times"]])
y = convert(Vector, data[:,headerDict["accel"]]);

In [8]:
y /= maximum(y);

In [9]:
N = length(X)

In [10]:
scatter(X, y)

## Learn a full GP

In [11]:
mZero = MeanZero()                   #Zero mean function
kern = SE(0.0,0.0)                   #Sqaured exponential kernel (note that hyperparameters are on the log scale)

Type: GaussianProcesses.SEIso, Params: [0.0, 0.0]


In [12]:
logObsNoise = -1.0                        # log standard deviation of observation noise (this is optional)

In [13]:
log(5)

In [14]:
gp = GP(reshape(X, 1, N), y, MeanZero(), SE(log(5.0),log(1.0)), -2.)       #Fit the GP

GP Exact object:
  Dim = 1
  Number of observations = 94
  Mean function:
    Type: GaussianProcesses.MeanZero, Params: Float64[]
  Kernel:
    Type: GaussianProcesses.SEIso, Params: [1.60944, 0.0]
  Input observations = 
[2.4 2.6 … 55.4 57.6]
  Output observations = [0.0, -0.0173333, -0.036, 0.0, -0.036, -0.036, -0.036, -0.0173333, -0.036, -0.036  …  0.142667, 0.142667, -0.357333, -0.177333, 0.0, 0.142667, -0.196, -0.036, -0.036, 0.142667]
  Variance of observation noise = 0.01831563888873418
  Marginal Log-Likelihood = -125.001

In [15]:
μ, σ² = predict_y(gp,linspace(minimum(X),maximum(X),100));

In [16]:
scatter(X, y, label = "observations", title = "Full GP with SE Kernel and LLH: $(gp.target)")
plot!(linspace(minimum(X),maximum(X),100), μ, ribbon=2*sqrt.(σ²), label = "full GP with 95% interval")

In [17]:
yhat = reduce(vcat, [rand(gp, linspace(minimum(X),maximum(X),100)) for k in 1:100]);
yhat += rand() * 0.2
size(yhat)

(10000,)

In [18]:
μ, σ = predict_y(gp,linspace(minimum(X),maximum(X),100));
yhat = reduce(hcat, [rand(gp, linspace(minimum(X),maximum(X),100)) for k in 1:100]);

scatter(X, y, label = "observations", title = "Full GP with SE Kernel and LLH: $(gp.target)")
plot!(reduce(hcat, [linspace(minimum(X),maximum(X),100) for k in 1:100]), yhat, markersize = 2, markerstrokewidth = 0, legend =false)
plot!(linspace(minimum(X),maximum(X),100), μ + 2*σ, color = :green, w = 4, label = "Full GP mean + 2*std")
plot!(linspace(minimum(X),maximum(X),100), μ - 2*σ, color = :green, w = 4, label = "Full GP mean - 2*std")
savefig("../plots/full_gp_draws.png")

## optimize a full GP

In [19]:
optimize!(gp)

Results of Optimization Algorithm
 * Algorithm: L-BFGS
 * Starting Point: [-2.0,1.6094379124341003,0.0]
 * Minimizer: [-1.2282746156639477,1.6156825479807913, ...]
 * Minimum: 3.510034e+01
 * Iterations: 12
 * Convergence: true
   * |x - x'| < 1.0e-32: false
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: false
   * |g(x)| < 1.0e-08: true
   * f(x) > f(x'): false
   * Reached Maximum Number of Iterations: false
 * Objective Function Calls: 56
 * Gradient Calls: 56

In [20]:
gp.target

In [21]:
μ, σ² = predict_y(gp,linspace(minimum(X),maximum(X),100));

In [22]:
scatter(X, y, label = "observations", title = "Marginal LLH optimized full GP with SE Kernel and LLH: $(gp.target)")
plot!(linspace(minimum(X),maximum(X),100), μ, ribbon=2*sqrt.(σ²), label = "optimized full GP with 95% interval")
savefig("../plots/optimized_fullGP.png")

# Make a SPN with GP leaves

In [23]:
global gID = 0

Hand-coded 2-layer example

In [24]:
srand(12345678)
K = 10;
numSamples = 0.25;
meanFunction = MeanZero();
kernelFunction = SE(log(5.0),log(1.0));
noise = -1.;

In [29]:
root = GPSumNode(nextID(), Int[]);

for k in 1:K
    
    # make random (1D) split
    split = rand() * maximum(X) + minimum(X)
    n = min(sum(X .>= split), sum(X .< split))
    c = 0
    
    while n < (N * numSamples)
        @assert c < 100 "Could not find a split"
        split = rand() * maximum(X) + minimum(X)
        n = min(sum(X .>= split), sum(X .< split))
        c += 1
    end
    
    child = FiniteSplitNode(nextID(), Float64[split])
    
    # append two GP leaves
    leaf1 = GPLeaf{Any}(nextID(),
            GP(reshape(X[X .<= split], 1, sum(X .<= split)), y[X .<= split], meanFunction, kernelFunction, noise))
    leaf1.parents = SPNNode[]
    
    leaf2 = GPLeaf{Any}(nextID(),
            GP(reshape(X[X .> split], 1, sum(X .> split)), y[X .> split], meanFunction, kernelFunction, noise))
    leaf2.parents = SPNNode[]
    
    add!(child, leaf1)
    add!(child, leaf2)
    
    add!(root, child)
end

## Visualize splits

In [30]:
plt = scatter(X, y, label = "observations", title = "Random splits of SPN-GP")
plotSplits!(plt, getAllSplits(root))
savefig("../plots/randomSplits.png")

## Likelihood of product nodes

In [38]:

for (pi, pnode) in enumerate(root.children)
    
    μ, σ² = spn_predict(pnode, linspace(minimum(X),maximum(X),100));
    
    plt = scatter(X, y, label = "observations", title ="Prediction of product node $(pi), w: $(root.posterior_weights[pi]), LLH: $(spn_likelihood(pnode))")
    plot!(plt, linspace(minimum(X),maximum(X),100), μ, ribbon=2*sqrt.(σ²), label = "MoE GP with 95% interval")
    plotSplits!(plt, getAllSplits(pnode))
    savefig("../plots/productNode$(pi).png")

end

LoadError: [91mUndefVarError: spn_predict not defined[39m

## Compute likelihoods

In [39]:
# test
n = GPLeaf{Any}(0, GP(reshape(X, 1, N), y, meanFunction, kernelFunction, noise))
spn_likelihood(n)

In [40]:
# test
n = root.children[3]
spn_likelihood(n)

In [41]:
# test
spn_likelihood(root)

## Update posterior weights

In [42]:
root.prior_weights = ones(length(root)) / length(root);

In [44]:
spn_update!(root)

In [45]:
println(root)

Gaussian Process Sum Node [ID: 32, 
	 w_prior: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
	 w_posterior: [0.109, 0.074, 0.023, 0.097, 0.126, 0.104, 0.237, 0.1, 0.097, 0.034]]


## Predictions under the model

In [46]:
# test
n = root.children[3]
μ, σ² = spn_predict(n, linspace(minimum(X),maximum(X),100));

plt = scatter(X, y, label = "observations", title ="Prediction of product node 3 only")
plot!(plt, linspace(minimum(X),maximum(X),100), μ, ribbon=2*sqrt.(σ²), label = "MoE GP with 95% interval")
plotSplits!(plt, getAllSplits(n))
savefig("../plots/productNode3.png")

LoadError: [91mUndefVarError: spn_predict not defined[39m

In [47]:
@assert sum(root.prior_weights) ≈ 1.
@assert sum(root.posterior_weights) ≈ 1.

In [48]:
# test
n = root
μ, σ² = spn_predict(n, linspace(minimum(X),maximum(X),100));

plt = scatter(X, y, label = "observations", title="SPN-GP with $(K) splits and LLH: $(spn_likelihood(root))")
plot!(plt, linspace(minimum(X),maximum(X),100), μ, ribbon=2*sqrt.(σ²), label = "SPN-GP with 95% interval")
plotSplits!(plt, getAllSplits(n))
savefig("../plots/spn_gp.png")

LoadError: [91mUndefVarError: spn_predict not defined[39m

## Print SPN

In [49]:
rDepth = depth(root)
for node in reverse(SumProductNetworks.order(root))
    print(repeat("\t", rDepth - depth(node)), " ", node, "\n")
end

LoadError: [91mUndefVarError: order not defined[39m

## Create SPN-GP with flexible depth

In [50]:
K = 5;
numSamples = 0.25;
minSamples = 40;
meanFunction = MeanZero();
kernelFunction = SE(log(5.0),log(1.0));
noise = -1.;
maximumDepth = 1

param = SPNParameters(K, numSamples, minSamples, kernelFunction, meanFunction, noise)

root = makeSumNode(X, y, param, 1, maximumDepth)

LoadError: [91mUndefVarError: SPNParameters not defined[39m

In [595]:
spn_update(root)

In [596]:
spn_likelihood(root)

In [570]:
root

Gaussian Process Sum Node [ID: 3051258, 
	 w_prior: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
	 w_posterior: [0.181, 0.028, 0.08, 0.134, 0.176, 0.016, 0.026, 0.071, 0.198, 0.09]]

In [571]:
depth(root)

In [572]:
sum(isa.(SumProductNetworks.order(root), SumNode))

In [525]:
# test
n = root
μ, σ² = spn_predict(n, linspace(minimum(X),maximum(X),100));


plt = scatter(X, y, label = "observations", title="SPN-GP with depth $(depth(root)) and $(K) splits and LLH: $(spn_likelihood(root))")
plot!(plt, linspace(minimum(X),maximum(X),100), μ, ribbon=2*sqrt.(σ²), label = "SPN-GP with 95% interval")
#plotSplits!(plt, getSplits(n, 1))
savefig("../plots/deep_spn_gp.png")

plt = scatter(X, y, label = "observations", title="SPN-GP with depth $(depth(root)) and $(K) splits and LLH: $(spn_likelihood(root))")
plot!(plt, linspace(minimum(X),maximum(X),100), μ, ribbon=2*sqrt.(σ²), label = "SPN-GP with 95% interval")
plotSplits!(plt, getSplits(n, 1))
savefig("../plots/deep_spn_gp_splits.png")

In [500]:
include("../../infinitesumproductnetworks/src/plotSPN.jl")

spn2graphviz (generic function with 1 method)

In [552]:
print(root)

Gaussian Process Sum Node [ID: 3035894, 
	 w_prior: [0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02], 
	 w_posterior: [0.03, 0.03, 0.026, 0.019, 0.029, 0.007, 0.014, 0.012, 0.022, 0.007, 0.007, 0.009, 0.032, 0.028, 0.006, 0.017, 0.016, 0.007, 0.016, 0.023, 0.032, 0.021, 0.018, 0.028, 0.019, 0.085, 0.006, 0.008, 0.029, 0.007, 0.006, 0.018, 0.029, 0.03, 0.03, 0.026, 0.02, 0.007, 0.007, 0.03, 0.027, 0.021, 0.018, 0.006, 0.034, 0.027, 0.008, 0.007, 0.022, 0.017]]

## some plots

#### Flat SPN

In [652]:
K = 10000;
numSamples = 0.25;
minSamples = 40;
meanFunction = MeanZero();
kernelFunction = SE(log(5.0),log(1.0));
noise = -1.;
maximumDepth = 1

param = SPNParameters(K, numSamples, minSamples, kernelFunction, meanFunction, noise)

root = makeSumNode(X, y, param, 1, maximumDepth)
spn_update(root)

In [None]:
for K in [5, 50, 500, 5_000, 10_000, 50_000]

    numSamples = 0.25;
    minSamples = 40;
    meanFunction = MeanZero();
    kernelFunction = SE(log(5.0),log(1.0));
    noise = -1.;
    maximumDepth = 3

    param = SPNParameters(K, numSamples, minSamples, kernelFunction, meanFunction, noise)

    root = makeSumNode(X, y, param, 1, maximumDepth)
    spn_update(root)

    S = 100

    x = linspace(minimum(X),maximum(X),100)
    xx = reduce(hcat, x for k in 1:S)
    yy = reduce(hcat, spn_rand(root, x) for k in 1:S)

    μ, σ = predict_y(gp,linspace(minimum(X),maximum(X),100));

    plt = scatter(X, y, label = "observations", legend = false)
    plot!(xx, yy, markersize = 2, markerstrokewidth = 0, label = "Posterior samples from SPN-GP");
    plot!(linspace(minimum(X),maximum(X),100), w =4, μ + 2*σ, color = :green, label = "Full GP mean + 2*std")
    plot!(linspace(minimum(X),maximum(X),100), w =4, μ - 2*σ, color = :green, label = "Full GP mean - 2*std")
    
    splits = [child.split[1] for child in children(root)]
    
    vline!([minimum(splits), maximum(splits)], color = :yellow, w = 4)

    plot(plt, size = (1024, 1024), title="SPN-GPs with differen $(K) splits")
    savefig("../plots/spn_gp_splits_$(K)_$(maximumDepth).png")
    
end

In [670]:
plot(
histogram([child.split[1] for child in children(root)], weightss = root.posterior_weights, title= "posterior weighted"), 
histogram([child.split[1] for child in children(root)], title = "prior over splits"))
savefig("../plots/split_weights.png")

#### Deep SPN

In [625]:
K = 5;
numSamples = 0.25;
minSamples = 40;
meanFunction = MeanZero();
kernelFunction = SE(log(5.0),log(1.0));
noise = -1.;
maximumDepth = 100

param = SPNParameters(K, numSamples, minSamples, kernelFunction, meanFunction, noise)

root = makeSumNode(X, y, param, 1, maximumDepth)
spn_update(root)

In [631]:
S = 100

x = linspace(minimum(X),maximum(X),100)
xx = reduce(hcat, x for k in 1:S)
yy = reduce(hcat, spn_rand(root, x) for k in 1:S)

μ, σ = predict_y(gp,linspace(minimum(X),maximum(X),100));

plt1 = scatter(X, y, label = "observations", legend = false)
plot!(xx, yy, markersize = 2, markerstrokewidth = 0, label = "Posterior samples from SPN-GP");
plot!(linspace(minimum(X),maximum(X),100), μ + 2*σ, color = :green, label = "Full GP mean + 2*std")
plot!(linspace(minimum(X),maximum(X),100), μ - 2*σ, color = :green, label = "Full GP mean - 2*std")

plt2 = scatter(X, y, label = "observations")
plot!(xx, yy, markersize = 2, markerstrokewidth = 0, label = "Posterior samples from SPN-GP");
plot!(linspace(minimum(X),maximum(X),100), μ + 2*σ, color = :green, label = "Full GP mean + 2*std")
plot!(linspace(minimum(X),maximum(X),100), μ - 2*σ, color = :green, label = "Full GP mean - 2*std")
plotSplits!(plt2, getAllSplits(root))

plot(plt1, plt2, size = (2024, 768), title="SPN-GP with depth $(depth(root) - 1) and $(K) splits per sum node (LLH: $(spn_likelihood(root)))")
savefig("../plots/spn_gp_deep.png")