In [1]:
using LinearAlgebra
using Printf
include("QuantBnB-2D.jl")
include("QuantBnB-3D.jl")
include("gen_data.jl")
include("lowerbound_middle.jl")
include("Algorithms.jl")

myfindinterval (generic function with 1 method)

In [2]:
regress_data = ["carbon","casp","concrete","energy","fish","gas","grid","news","qsar","query1","query2"]

class_data = ["avila", "bank", "bean", "bidding", "eeg", "fault", "HTRU",
"magic", "occupancy", "page","raisin", "rice", "room", "segment","skin","wilt"]


16-element Vector{String}:
 "avila"
 "bank"
 "bean"
 "bidding"
 "eeg"
 "fault"
 "HTRU"
 "magic"
 "occupancy"
 "page"
 "raisin"
 "rice"
 "room"
 "segment"
 "skin"
 "wilt"

In [3]:
# test depth-2 trees on classification problems
println("Test Quant-BnB on depth-2 trees for classification problems")
for i=1:length(class_data)
    X_train, X_test, Y_train, Y_test = generate_realdata(string("./dataset/class/",class_data[i],".json"))
    n_train, m = size(Y_train)
    n_test, _ = size(Y_test)
    gre_train, gre_tree = greedy_tree(X_train, Y_train, 2, "C")
    opt_train, opt_tree = QuantBnB_2D(X_train, Y_train, 3, gre_train*(1+1e-6), 2, 0.2, nothing, "C", false)

    gre_test = sum((Y_test - tree_eval(gre_tree, X_test, 2, m)).>0)
    opt_test = sum((Y_test - tree_eval(opt_tree, X_test, 2, m)).>0)

    @printf("Dataset: %s,  CART train/test acc: %.3f / %.3f,  Quant-BnB train/test acc: %.3f / %.3f", class_data[i], 
            1-gre_train/n_train,1-gre_test/n_test, 1-opt_train/n_train,1-opt_test/n_test)

end

Test Quant-BnB on depth-2 trees for classification problems


Dataset: avila,  CART train/test acc: 0.507 / 0.505,  Quant-BnB train/test acc: 0.542 / 0.537

Dataset: bank,  CART train/test acc: 0.909 / 0.895,  Quant-BnB train/test acc: 0.925 / 0.924Dataset: bean,  CART train/test acc: 0.657 / 0.650,  Quant-BnB train/test acc: 0.663 / 0.654

Dataset: bidding,  CART train/test acc: 0.981 / 0.986,  Quant-BnB train/test acc: 0.981 / 0.985

Dataset: eeg,  CART train/test acc: 0.625 / 0.631,  Quant-BnB train/test acc: 0.665 / 0.683

Dataset: fault,  CART train/test acc: 0.540 / 0.519,  Quant-BnB train/test acc: 0.583 / 0.599

Dataset: HTRU,  CART train/test acc: 0.977 / 0.979,  Quant-BnB train/test acc: 0.979 / 0.979

Dataset: magic,  CART train/test acc: 0.794 / 0.792,  Quant-BnB train/test acc: 0.805 / 0.796

Dataset: occupancy,  CART train/test acc: 0.989 / 0.977,  Quant-BnB train/test acc: 0.989 / 0.961Dataset: page,  CART train/test acc: 0.954 / 0.951,  Quant-BnB train/test acc: 0.954 / 0.952

Dataset: raisin,  CART train/test acc: 0.868 / 0.889,  Quant-BnB train/test acc: 0.874 / 0.883Dataset: rice,  CART train/test acc: 0.930 / 0.921,  Quant-BnB train/test acc: 0.933 / 0.920

Dataset: room,  CART train/test acc: 0.932 / 0.935,  Quant-BnB train/test acc: 0.946 / 0.941

Dataset: segment,  CART train/test acc: 0.430 / 0.418,  Quant-BnB train/test acc: 0.575 / 0.554

Dataset: skin,  CART train/test acc: 0.907 / 0.908,  Quant-BnB train/test acc: 0.927 / 0.925

Dataset: wilt,  CART train/test acc: 0.991 / 0.752,  Quant-BnB train/test acc: 0.991 / 0.742

In [4]:
# test depth-2 trees on regression problems
println("Test Quant-BnB on depth-2 trees for regression problems")
for i=1:length(regress_data)
    X_train, X_test, Y_train, Y_test = generate_realdata(string("./dataset/regress/",regress_data[i],".json"))
    n_train, m = size(Y_train)
    n_test, m = size(Y_test)
    gre_train, gre_tree = greedy_tree(X_train, Y_train, 2, "R")
    opt_train, opt_tree = QuantBnB_2D(X_train, Y_train, 3, gre_train*(1+1e-6), 2, 0.2, nothing, "R", false)

    gre_test = sum((Y_test - tree_eval(gre_tree, X_test, 2, m)).^2)
    opt_test = sum((Y_test - tree_eval(opt_tree, X_test, 2, m)).^2)

    @printf("Dataset: %s,  CART train/test err: %.3f / %.3f,  Quant-BnB train/test err: %.3f / %.3f", regress_data[i], 
            gre_train/n_train,gre_test/n_test, opt_train/n_train,opt_test/n_test)

end

Test Quant-BnB on depth-2 trees for regression problems


Dataset: carbon,  CART train/test err: 0.116 / 0.119,  Quant-BnB train/test err: 0.114 / 0.116

Dataset: casp,  CART train/test err: 0.071 / 0.071,  Quant-BnB train/test err: 0.071 / 0.070Dataset: concrete,  CART train/test err: 0.022 / 0.025,  Quant-BnB train/test err: 0.021 / 0.025

Dataset: energy,  CART train/test err: 0.008 / 0.008,  Quant-BnB train/test err: 0.008 / 0.007Dataset: fish,  CART train/test err: 0.013 / 0.013,  Quant-BnB train/test err: 0.012 / 0.014

Dataset: gas,  CART train/test err: 0.003 / 0.003,  Quant-BnB train/test err: 0.003 / 0.003

Dataset: grid,  CART train/test err: 0.030 / 0.032,  Quant-BnB train/test err: 0.029 / 0.029

Dataset: news,  CART train/test err: 0.000 / 0.000,  Quant-BnB train/test err: 0.000 / 0.000Dataset: qsar,  CART train/test err: 0.019 / 0.018,  Quant-BnB train/test err: 0.018 / 0.017

Dataset: query1,  CART train/test err: 0.008 / 0.009,  Quant-BnB train/test err: 0.006 / 0.006

Dataset: query2,  CART train/test err: 0.033 / 0.033,  Quant-BnB train/test err: 0.030 / 0.030

In [5]:
# test depth-3 trees on a classification problem
X_train, X_test, Y_train, Y_test = generate_realdata(string("./dataset/class/","bidding",".json"))
n_train, m = size(Y_train)
n_test, _ = size(Y_test)
gre_train, gre_tree = greedy_tree(X_train, Y_train, 3, "C")
opt_train, opt_tree = QuantBnB_3D(X_train, Y_train, 3, 3, gre_train*(1+1e-6), 0, 0, nothing, "C", 300)
gre_test = sum((Y_test - tree_eval(gre_tree, X_test, 3, m)).>0)
opt_test = sum((Y_test - tree_eval(opt_tree, X_test, 3, m)).>0)
@printf("Dataset: %s,  CART train/test acc: %.3f / %.3f,  Quant-BnB train/test acc: %.3f / %.3f", "occupancy", 
            1-gre_train/n_train,1-gre_test/n_test, 1-opt_train/n_train,1-opt_test/n_test)

Total number of trees = 3781512036
Total number of intervals = 2916
--------------------------------------


Loop 1
Number of remaining trees = 2038097934
Total number of intervals = 14984
Current objective = 64.0
time = 4.023999929428101
--------------------------------------


Loop 2
Number of remaining trees = 175114545
Total number of intervals = 15858
Current objective = 37.0
time = 7.443000078201294
--------------------------------------


Loop 3
Number of remaining trees = 7571
Total number of intervals = 171
Current objective = 37.0
time = 8.652000188827515
--------------------------------------


Loop 4
Number of remaining trees = 135
Total number of intervals = 49
Current objective = 37.0
time = 0.125
--------------------------------------


Obj = 37.0
Tree is 

Any[9, 0.5555444444444444, Any[9, 0.11118888888888888, Any[3, 0.25005, 

[1.0 0.0], [0.0 1.0]], Any[3, 0.74995, [1.0 0.0], [0.0 1.0]]], Any[3, 0.25005, Any[1, 0.0016102718851999998, [1.0 0.0], [1.0 0.0]], Any[2, 0.08903313378412894, [1.0 0.0], [0.0 1.0]]]]
total time = 20.55400037765503
Dataset: occupancy,  CART train/test acc: 0.981 / 0.986,  Quant-BnB train/test acc: 0.993 / 0.990