In [7]:
using GaussianProcesses, PGFPlots, SumProductNetworks
using StatsFuns, Distributions, ProgressMeter#, MultivariateStats
import SumProductNetworks.add!

include("utilFunctions.jl")
include("dataTypes.jl")
include("dataTypeUtilFunctions.jl")
include("computationFunctions.jl")
include("regionGraph.jl")
include("regionGraphUtils.jl")
include("gpUtils.jl")

optimize2! (generic function with 1 method)

In [2]:
# todo if wifi access
# Pkg.add("MultivariateStats")

Load the data set

In [3]:
Xtrain = readdlm("../data/kin40k/kin40k_train_data.asc");
ytrain = readdlm("../data/kin40k/kin40k_train_labels.asc");

Xtest = readdlm("../data/kin40k/kin40k_test_data.asc");
ytest = readdlm("../data/kin40k/kin40k_test_labels.asc");

In [4]:
println("size of training set: ", size(Xtrain))
println("size of test set: ", size(Xtest))

size of training set: (10000, 8)
size of test set: (30000, 8)


In [11]:
global gID = 1

(N, D) = size(Xtrain)

numSums = 1
meanFunction = MeanZero();
kernelFunctions = [LinArd(ones(D)*log(5.0)), SE(-1., 0.), SEArd(ones(D)*log(5.0), log(1.0))]

kernelPriors = []

noise = -1.;

# data range
minX = vec(minimum(Xtrain, 1)) - 0.1
maxX = vec(maximum(Xtrain, 1)) + 0.1

# split size
δ = (maxX - minX) ./ 2

# maximum depth
max_depth = 3
min_samples = 200

overlap = 0.0

(rootRegion, sumRegions, gpRegions, allPartitions) = poonDomingos_ND(δ, minX, maxX, max_depth, min_samples, Xtrain);

RegionIDs = Dict(r[2] => r[1] for r in enumerate(union(sumRegions, gpRegions)));
PartitionIDS = Dict(p[2] => p[1] + maximum(values(RegionIDs)) for p in enumerate(allPartitions));

Samples in expert: 0
Samples in expert: 2507
Samples in expert: 2600
Samples in expert: 5107
Samples in expert: 0
Samples in expert: 1264
Samples in expert: 1291
Samples in expert: 2555
Samples in expert: 0
Samples in expert: 1243
Samples in expert: 1309
Samples in expert: 2552
Samples in expert: 0
Samples in expert: 0
Samples in expert: 2463
Samples in expert: 2430
Samples in expert: 4893
Samples in expert: 0
Samples in expert: 1182
Samples in expert: 1203
Samples in expert: 2385
Samples in expert: 0
Samples in expert: 1281
Samples in expert: 1227
Samples in expert: 2508
Samples in expert: 0


In [9]:
# check helper structures
for p in allPartitions
    @assert haskey(PartitionIDS, p)
    @assert length(p.regions) == 2
    for r in p.regions
        @assert haskey(RegionIDs, r)
        if isa(r, NDSumRegion)
            @assert length(r.partitions) >= 1
        end
    end
end

# check for loops
function findPartition(p, r::NDSumRegion, depth, maxdepth)
    
    @assert depth < maxdepth
    
    if p in r.partitions
        return true
    end
    
    found = false
    for pp in r.partitions
        found |= findPartition(p, pp, depth + 1, maxdepth)
    end
    
    return found
end

function findPartition(p, r::NDGPRegion, depth, maxdepth)
    return false
end

function findPartition(p, pp::NDSplitPartition, depth, maxdepth)
    found = false
    for r in pp.regions
        found |= findPartition(p, r, depth, maxdepth)
    end
    
    return found
end

for p in allPartitions
    for r in p.regions
        @assert !findPartition(p, r, 0, 4)
    end
end

In [10]:
root_ = convertToSPN_ND(rootRegion, gpRegions, RegionIDs, PartitionIDS, Xtrain, ytrain[:,1], meanFunction, 
                    kernelFunctions, kernelPriors, noise; overlap = overlap, do_mcmc = false)

[32mConstructing GP nodes... 21%|██████                     |  ETA: 0:01:48[39m

LoadError: [91mInterruptException:[39m

In [74]:
yhat_fixed = zeros(size(ytest))
yhat_opt = zeros(size(ytest))

for yi in 1:Dy

    root_ = convertToSPN_ND(rootRegion, gpRegions, RegionIDs, PartitionIDS, Xtrain, ytrain[:,yi], meanFunction, 
                    kernelFunctions, kernelPriors, noise; overlap = overlap, do_mcmc = false)
    
    gpnodes = unique(filter(n -> isa(n, GPLeaf), SumProductNetworks.getOrderedNodes(root_)));
    #map(gnode -> optimize2!(gnode.gp, mean = false, kern = true, noise = true, lik=false), gpnodes);

    fill!(root_.prior_weights, 1. / length(root_))
    fill!(root_.posterior_weights, 1. / length(root_))

    spn_update!(root_)
    spn_posterior(root_)

    μ = predict_spn!(root_, Xtest);
    yhat_fixed[:,yi] = μ
end

In [75]:
rmse_spn_fixed_noise = sqrt(mean((yhat_fixed .- ytest).^2))