# Comparison of OSDT with CART
Here OSDT is compared to CART using various complexity settings

###### Dependencies

In [1]:
# All dependencies of this notebook

# third-party imports
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier


# local imports
from lib.model import OSDT
from lib.model_selection import train_cross_validate
from lib.data_processing import read_dataset

# Experiment
Run an experiment comparing how both OSDT and CART accuracy vary with $\lambda$

In [2]:
dataset = read_dataset('data/preprocessed/compas-binary.csv')
X = dataset.values[:,:-1]
y = dataset.values[:,-1]

x_lamb = np.arange(0.001, 1.0, 0.005)
y_cart = []
y_osdt = []

for lamb in x_lamb:
    (n, _m) = dataset.shape
    fold_size = round(n / 2) # 50/50 split for training vs testing set
    
    # May want to go into more detail about how we compute equivalent hyperparameters
    hyperparameters = [{
        'max_depth': 5,
        'min_samples_split': math.ceil(lamb * 2 * fold_size), 
        'min_samples_leaf': math.ceil(lamb * fold_size), 
        'max_leaf_nodes': max(2, math.floor(1 / ( 2 * lamb ))), 
        'min_impurity_decrease': lamb
    }]
    _model, cart_accuracy, _hyperparameters = train_cross_validate(
        dataset,
        DecisionTreeClassifier,
        hyperparameters=hyperparameters)
    y_cart.append(cart_accuracy) # 
    
    hyperparameters = [{}]
    _model, osdt_accuracy, _hyperparameters = train_cross_validate(
        dataset,
        OSDT,
        hyperparameters=hyperparameters)
    y_osdt.append(osdt_accuracy)

plt.scatter(x_lamb, y_cart, marker='^', label="CART")
plt.scatter(x_lamb, y_osdt, marker='o', label="OSDT")

plt.title('Test Accuracy vs $\lambda$')
plt.legend()
plt.xlabel(r'$\lambda$')
plt.ylabel('Test Accuracy')

plt.savefig('figures/test_accuracy_vs_lambda.png')

nrule: 12
ndata: 3453
gr: [0.00346032 0.00509632 0.00622846 0.00631345 0.00878234 0.00509816
 0.00593233 0.01578993 0.02611275 0.00274281 0.00053302 0.03359428]
order: [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
odr: [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
the order of x's columns:  [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate support bound: True
>>> equiv points bound: True
>>> lookahead bound: True
prior_metric= curiosity
COUNT_UNIQLEAVES: 21
COUNT_LEAFLOOKUPS: 0
total time:  0.2013711929321289
lambda:  0.1
leaves:  [()]
num_captured:  [3453]
num_captured_incorrect:  [1604]
prediction:  [0]
Objective:  0.5645236026643499
Accuracy:  0.5354763973356501
COUNT of the best tree:  0
time when the best tree is achieved:  1560879042.856838
TOTAL COUNT:  11
best_is_cart False
[1 1 1 ... 1 1 1]
Testing Accuracy: 0.5390851187029531
nrule: 12
ndata: 3454
gr: [0.00543411 0.00267721 0.00356736 0.00678393 0.00761259 0.00919989
 0.011654

>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate support bound: True
>>> equiv points bound: True
>>> lookahead bound: True
prior_metric= curiosity
COUNT_UNIQLEAVES: 21
COUNT_LEAFLOOKUPS: 0
total time:  0.21373367309570312
lambda:  0.1
leaves:  [(-1,), (1,)]
num_captured:  [2381, 1073]
num_captured_incorrect:  [874, 355]
prediction:  [0, 1]
Objective:  0.555819339895773
Accuracy:  0.644180660104227
COUNT of the best tree:  1
time when the best tree is achieved:  0.20130681991577148
TOTAL COUNT:  11
best_is_cart False
[1 1 1 ... 1 1 1]
Testing Accuracy: 0.6336518969012453
nrule: 12
ndata: 3453
gr: [0.00346032 0.00509632 0.00622846 0.00631345 0.00878234 0.00509816
 0.00593233 0.01578993 0.02611275 0.00274281 0.00053302 0.03359428]
order: [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
odr: [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
the order of x's columns:  [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate supp

>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate support bound: True
>>> equiv points bound: True
>>> lookahead bound: True
prior_metric= curiosity
COUNT_UNIQLEAVES: 21
COUNT_LEAFLOOKUPS: 0
total time:  0.17873096466064453
lambda:  0.1
leaves:  [()]
num_captured:  [3453]
num_captured_incorrect:  [1604]
prediction:  [0]
Objective:  0.5645236026643499
Accuracy:  0.5354763973356501
COUNT of the best tree:  0
time when the best tree is achieved:  1560879047.8377001
TOTAL COUNT:  11
best_is_cart False
[1 1 1 ... 1 1 1]
Testing Accuracy: 0.5390851187029531
nrule: 12
ndata: 3454
gr: [0.00543411 0.00267721 0.00356736 0.00678393 0.00761259 0.00919989
 0.01165459 0.02382521 0.02613283 0.00385476 0.00036193 0.03908285]
order: [11, 8, 7, 6, 5, 4, 3, 0, 9, 2, 1, 10]
odr: [11, 8, 7, 6, 5, 4, 3, 0, 9, 2, 1, 10]
the order of x's columns:  [11, 8, 7, 6, 5, 4, 3, 0, 9, 2, 1, 10]
>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate support bound: True
>>> e

nrule: 12
ndata: 3453
gr: [0.00346032 0.00509632 0.00622846 0.00631345 0.00878234 0.00509816
 0.00593233 0.01578993 0.02611275 0.00274281 0.00053302 0.03359428]
order: [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
odr: [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
the order of x's columns:  [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate support bound: True
>>> equiv points bound: True
>>> lookahead bound: True
prior_metric= curiosity
COUNT_UNIQLEAVES: 21
COUNT_LEAFLOOKUPS: 0
total time:  0.1282958984375
lambda:  0.1
leaves:  [()]
num_captured:  [3453]
num_captured_incorrect:  [1604]
prediction:  [0]
Objective:  0.5645236026643499
Accuracy:  0.5354763973356501
COUNT of the best tree:  0
time when the best tree is achieved:  1560879049.860648
TOTAL COUNT:  11
best_is_cart False
[1 1 1 ... 1 1 1]
Testing Accuracy: 0.5390851187029531
nrule: 12
ndata: 3454
gr: [0.00543411 0.00267721 0.00356736 0.00678393 0.00761259 0.00919989
 0.01165459 

>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate support bound: True
>>> equiv points bound: True
>>> lookahead bound: True
prior_metric= curiosity
COUNT_UNIQLEAVES: 21
COUNT_LEAFLOOKUPS: 0
total time:  0.182966947555542
lambda:  0.1
leaves:  [(-1,), (1,)]
num_captured:  [2381, 1073]
num_captured_incorrect:  [874, 355]
prediction:  [0, 1]
Objective:  0.555819339895773
Accuracy:  0.644180660104227
COUNT of the best tree:  1
time when the best tree is achieved:  0.180739164352417
TOTAL COUNT:  11
best_is_cart False
[1 1 1 ... 1 1 1]
Testing Accuracy: 0.6336518969012453
nrule: 12
ndata: 3453
gr: [0.00346032 0.00509632 0.00622846 0.00631345 0.00878234 0.00509816
 0.00593233 0.01578993 0.02611275 0.00274281 0.00053302 0.03359428]
order: [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
odr: [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
the order of x's columns:  [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate support 

gr: [0.00543411 0.00267721 0.00356736 0.00678393 0.00761259 0.00919989
 0.01165459 0.02382521 0.02613283 0.00385476 0.00036193 0.03908285]
order: [11, 8, 7, 6, 5, 4, 3, 0, 9, 2, 1, 10]
odr: [11, 8, 7, 6, 5, 4, 3, 0, 9, 2, 1, 10]
the order of x's columns:  [11, 8, 7, 6, 5, 4, 3, 0, 9, 2, 1, 10]
>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate support bound: True
>>> equiv points bound: True
>>> lookahead bound: True
prior_metric= curiosity
COUNT_UNIQLEAVES: 21
COUNT_LEAFLOOKUPS: 0
total time:  0.2365100383758545
lambda:  0.1
leaves:  [(-1,), (1,)]
num_captured:  [2381, 1073]
num_captured_incorrect:  [874, 355]
prediction:  [0, 1]
Objective:  0.555819339895773
Accuracy:  0.644180660104227
COUNT of the best tree:  1
time when the best tree is achieved:  0.23514819145202637
TOTAL COUNT:  11
best_is_cart False
[1 1 1 ... 1 1 1]
Testing Accuracy: 0.6336518969012453
nrule: 12
ndata: 3453
gr: [0.00346032 0.00509632 0.00622846 0.00631345 0.00878234 0.00509816
 0.005932

gr: [0.00346032 0.00509632 0.00622846 0.00631345 0.00878234 0.00509816
 0.00593233 0.01578993 0.02611275 0.00274281 0.00053302 0.03359428]
order: [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
odr: [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
the order of x's columns:  [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate support bound: True
>>> equiv points bound: True
>>> lookahead bound: True
prior_metric= curiosity
COUNT_UNIQLEAVES: 21
COUNT_LEAFLOOKUPS: 0
total time:  0.15988492965698242
lambda:  0.1
leaves:  [()]
num_captured:  [3453]
num_captured_incorrect:  [1604]
prediction:  [0]
Objective:  0.5645236026643499
Accuracy:  0.5354763973356501
COUNT of the best tree:  0
time when the best tree is achieved:  1560879055.402645
TOTAL COUNT:  11
best_is_cart False
[1 1 1 ... 1 1 1]
Testing Accuracy: 0.5390851187029531
nrule: 12
ndata: 3454
gr: [0.00543411 0.00267721 0.00356736 0.00678393 0.00761259 0.00919989
 0.01165459 0.02382521 0.02613

>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate support bound: True
>>> equiv points bound: True
>>> lookahead bound: True
prior_metric= curiosity
COUNT_UNIQLEAVES: 21
COUNT_LEAFLOOKUPS: 0
total time:  0.08065104484558105
lambda:  0.1
leaves:  [(-1,), (1,)]
num_captured:  [2381, 1073]
num_captured_incorrect:  [874, 355]
prediction:  [0, 1]
Objective:  0.555819339895773
Accuracy:  0.644180660104227
COUNT of the best tree:  1
time when the best tree is achieved:  0.07988691329956055
TOTAL COUNT:  11
best_is_cart False
[1 1 1 ... 1 1 1]
Testing Accuracy: 0.6336518969012453
nrule: 12
ndata: 3453
gr: [0.00346032 0.00509632 0.00622846 0.00631345 0.00878234 0.00509816
 0.00593233 0.01578993 0.02611275 0.00274281 0.00053302 0.03359428]
order: [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
odr: [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
the order of x's columns:  [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate supp

gr: [0.00346032 0.00509632 0.00622846 0.00631345 0.00878234 0.00509816
 0.00593233 0.01578993 0.02611275 0.00274281 0.00053302 0.03359428]
order: [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
odr: [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
the order of x's columns:  [11, 8, 7, 4, 3, 2, 6, 5, 1, 0, 9, 10]
>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate support bound: True
>>> equiv points bound: True
>>> lookahead bound: True
prior_metric= curiosity
COUNT_UNIQLEAVES: 21
COUNT_LEAFLOOKUPS: 0
total time:  0.08341503143310547
lambda:  0.1
leaves:  [()]
num_captured:  [3453]
num_captured_incorrect:  [1604]
prediction:  [0]
Objective:  0.5645236026643499
Accuracy:  0.5354763973356501
COUNT of the best tree:  0
time when the best tree is achieved:  1560879058.467228
TOTAL COUNT:  11
best_is_cart False
[1 1 1 ... 1 1 1]
Testing Accuracy: 0.5390851187029531
nrule: 12
ndata: 3454
gr: [0.00543411 0.00267721 0.00356736 0.00678393 0.00761259 0.00919989
 0.01165459 0.02382521 0.02613

gr: [0.00543411 0.00267721 0.00356736 0.00678393 0.00761259 0.00919989
 0.01165459 0.02382521 0.02613283 0.00385476 0.00036193 0.03908285]
order: [11, 8, 7, 6, 5, 4, 3, 0, 9, 2, 1, 10]
odr: [11, 8, 7, 6, 5, 4, 3, 0, 9, 2, 1, 10]
the order of x's columns:  [11, 8, 7, 6, 5, 4, 3, 0, 9, 2, 1, 10]
>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate support bound: True
>>> equiv points bound: True
>>> lookahead bound: True
prior_metric= curiosity
COUNT_UNIQLEAVES: 21
COUNT_LEAFLOOKUPS: 0
total time:  0.1690969467163086
lambda:  0.1
leaves:  [(-1,), (1,)]
num_captured:  [2381, 1073]
num_captured_incorrect:  [874, 355]
prediction:  [0, 1]
Objective:  0.555819339895773
Accuracy:  0.644180660104227
COUNT of the best tree:  1
time when the best tree is achieved:  0.15732216835021973
TOTAL COUNT:  11
best_is_cart False
[1 1 1 ... 1 1 1]
Testing Accuracy: 0.6336518969012453
nrule: 12
ndata: 3453
gr: [0.00346032 0.00509632 0.00622846 0.00631345 0.00878234 0.00509816
 0.005932

gr: [0.00543411 0.00267721 0.00356736 0.00678393 0.00761259 0.00919989
 0.01165459 0.02382521 0.02613283 0.00385476 0.00036193 0.03908285]
order: [11, 8, 7, 6, 5, 4, 3, 0, 9, 2, 1, 10]
odr: [11, 8, 7, 6, 5, 4, 3, 0, 9, 2, 1, 10]
the order of x's columns:  [11, 8, 7, 6, 5, 4, 3, 0, 9, 2, 1, 10]
>>> log: False
>>> support bound: True
>>> accu_support: True
>>> accurate support bound: True
>>> equiv points bound: True
>>> lookahead bound: True
prior_metric= curiosity
COUNT_UNIQLEAVES: 21
COUNT_LEAFLOOKUPS: 0
total time:  0.10791206359863281
lambda:  0.1
leaves:  [(-1,), (1,)]
num_captured:  [2381, 1073]
num_captured_incorrect:  [874, 355]
prediction:  [0, 1]
Objective:  0.555819339895773
Accuracy:  0.644180660104227
COUNT of the best tree:  1
time when the best tree is achieved:  0.1065669059753418
TOTAL COUNT:  11
best_is_cart False
[1 1 1 ... 1 1 1]
Testing Accuracy: 0.6336518969012453


ValueError: max_leaf_nodes 1 must be either None or larger than 1

In [None]:
# Saved Previous Results
# (Note: these are training accuracies)
x_lamb = np.arange(0.005, 0.05, 0.001)
y_osdt = [0.669031417402635, 0.669031417402635, 0.6622267265093384, 0.6551324743014333,
 0.6551324743014333, 0.6551324743014334, 0.6551324743014334, 0.6551324743014334,
 0.6551324743014334, 0.6551324743014334, 0.6551324743014333, 0.6551324743014333,
 0.6389170406833647, 0.6389170406833647, 0.6389170406833647, 0.6389170406833647,
 0.6389170406833647, 0.6389170406833647, 0.6389170406833647, 0.6389170406833647,
 0.6389170406833647, 0.6389170406833647, 0.6389170406833647, 0.6389170406833647,
 0.6389170406833647, 0.6389170406833647, 0.6389170406833647, 0.6389170406833647,
 0.6389170406833647, 0.6389170406833647, 0.6389170406833647, 0.6389170406833647,
 0.6389170406833647, 0.6389170406833647, 0.6389170406833647, 0.6389170406833647,
 0.6389170406833647, 0.6389170406833647, 0.6389170406833647, 0.6389170406833647,
 0.6389170406833647, 0.6389170406833647, 0.6389170406833647, 0.6389170406833647,
 0.6389170406833647, 0.6389170406833647]
y_cart = [0.6370967741935484, 0.6370967741935484, 0.5, 0.5,
 0.5, 0.5, 0.5, 0.5,
 0.5, 0.5, 0.5, 0.5,
 0.5, 0.5, 0.5, 0.5,
 0.5, 0.5, 0.5, 0.5,
 0.5, 0.5, 0.5, 0.5,
 0.5, 0.5, 0.5, 0.5,
 0.5, 0.5, 0.5, 0.5,
 0.5, 0.5, 0.5, 0.5,
 0.5, 0.5, 0.5, 0.5,
 0.5, 0.5, 0.5, 0.5,
 0.5, 0.5]

plt.scatter(x_lamb, y_cart, marker='^', label="CART")
plt.scatter(x_lamb, y_osdt, marker='o', label="OSDT")

plt.title('Training Accuracy vs $\lambda$')
plt.legend()
plt.xlabel(r'$\lambda$')
plt.ylabel('Training Accuracy')

plt.savefig('figures/training_accuracy_vs_lambda.png')

In [None]:
# TODO: there appears to be some kind of experiment happening below
# need to refactor this to match the new interface

In [None]:
x = x_monk1
y = y_monk1
clf = tree.DecisionTreeClassifier(max_depth = 4, min_samples_split = math.ceil(lamb*2*len(y)), 
                                      min_samples_leaf = math.ceil(lamb*len(y)), 
                                      max_leaf_nodes = math.floor(1/(2*lamb)), 
                                      min_impurity_decrease = lamb
                                     )
clf = clf.fit(x,y)
clf.score(x,y)

In [None]:
x = x_monk1
y = y_monk1
clf = tree.DecisionTreeClassifier(max_depth = 3, min_samples_split = math.ceil(lamb*2*len(y)), 
                                      min_samples_leaf = math.ceil(lamb*len(y)), 
                                      max_leaf_nodes = math.floor(1/(2*lamb)), 
                                      min_impurity_decrease = lamb
                                     )
clf = clf.fit(x,y)
clf.score(x,y)

In [None]:
lamb = 0.025
x = x_monk2
y = y_monk2
clf = tree.DecisionTreeClassifier(max_depth = 4, min_samples_split = math.ceil(lamb*2*len(y)), 
                                      min_samples_leaf = math.ceil(lamb*len(y)), 
                                      max_leaf_nodes = math.floor(1/(2*lamb)), 
                                      min_impurity_decrease = lamb
                                     )
clf = clf.fit(x,y)
clf.score(x,y)

In [None]:
lamb = 0.025
x = x_monk2
y = y_monk2
clf = tree.DecisionTreeClassifier(max_depth = 3, min_samples_split = math.ceil(lamb*2*len(y)), 
                                      min_samples_leaf = math.ceil(lamb*len(y)), 
                                      max_leaf_nodes = math.floor(1/(2*lamb)), 
                                      min_impurity_decrease = lamb
                                     )
clf = clf.fit(x,y)
clf.score(x,y)

In [None]:
x = x_monk3
y = y_monk3
clf = tree.DecisionTreeClassifier(max_depth = 4, min_samples_split = math.ceil(lamb*2*len(y)), 
                                      min_samples_leaf = math.ceil(lamb*len(y)), 
                                      max_leaf_nodes = math.floor(1/(2*lamb)), 
                                      min_impurity_decrease = lamb
                                     )
clf = clf.fit(x,y)
clf.score(x,y)

In [None]:
x = x_monk3
y = y_monk3
clf = tree.DecisionTreeClassifier(max_depth = 3, min_samples_split = math.ceil(lamb*2*len(y)), 
                                      min_samples_leaf = math.ceil(lamb*len(y)), 
                                      max_leaf_nodes = math.floor(1/(2*lamb)), 
                                      min_impurity_decrease = lamb
                                     )
clf = clf.fit(x,y)
clf.score(x,y)

In [None]:
lamb = 0.05
x = x_tictactoe
y = y_tictactoe
clf = tree.DecisionTreeClassifier(max_depth = 4, min_samples_split = math.ceil(lamb*2*len(y)), 
                                      min_samples_leaf = math.ceil(lamb*len(y)), 
                                      max_leaf_nodes = math.floor(1/(2*lamb)), 
                                      min_impurity_decrease = lamb
                                     )
clf = clf.fit(x,y)
clf.score(x,y)

In [None]:
lamb = 0.05
x = x_tictactoe
y = y_tictactoe
clf = tree.DecisionTreeClassifier(max_depth = 3, min_samples_split = math.ceil(lamb*2*len(y)), 
                                      min_samples_leaf = math.ceil(lamb*len(y)), 
                                      max_leaf_nodes = math.floor(1/(2*lamb)), 
                                      min_impurity_decrease = lamb
                                     )
clf = clf.fit(x,y)
clf.score(x,y)

In [None]:
x = x_balance
y = y_balance
clf = tree.DecisionTreeClassifier(max_depth = 4, min_samples_split = math.ceil(lamb*2*len(y)), 
                                      min_samples_leaf = math.ceil(lamb*len(y)), 
                                      max_leaf_nodes = math.floor(1/(2*lamb)), 
                                      min_impurity_decrease = lamb
                                     )
clf = clf.fit(x,y)
clf.score(x,y)

In [None]:
x = x_balance
y = y_balance
clf = tree.DecisionTreeClassifier(max_depth = 3, min_samples_split = math.ceil(lamb*2*len(y)), 
                                      min_samples_leaf = math.ceil(lamb*len(y)), 
                                      max_leaf_nodes = math.floor(1/(2*lamb)), 
                                      min_impurity_decrease = lamb
                                     )
clf = clf.fit(x,y)
clf.score(x,y)

In [None]:
x = x_compas1
y = y_compas1
clf = tree.DecisionTreeClassifier(max_depth = 4, min_samples_split = math.ceil(lamb*2*len(y)), 
                                      min_samples_leaf = math.ceil(lamb*len(y)), 
                                      max_leaf_nodes = math.floor(1/(2*lamb)), 
                                      min_impurity_decrease = lamb
                                     )
clf = clf.fit(x,y)
clf.score(x,y)

In [None]:
x = x_compas1
y = y_compas1
clf = tree.DecisionTreeClassifier(max_depth = 3, min_samples_split = math.ceil(lamb*2*len(y)), 
                                      min_samples_leaf = math.ceil(lamb*len(y)), 
                                      max_leaf_nodes = math.floor(1/(2*lamb)), 
                                      min_impurity_decrease = lamb
                                     )
clf = clf.fit(x,y)
clf.score(x,y)

In [None]:
lamb = 0.01
x = x_car
y = y_car
clf = tree.DecisionTreeClassifier(max_depth = 4, min_samples_split = math.ceil(lamb*2*len(y)), 
                                      min_samples_leaf = math.ceil(lamb*len(y)), 
                                      max_leaf_nodes = math.floor(1/(2*lamb)), 
                                      min_impurity_decrease = lamb
                                     )
clf = clf.fit(x,y)
clf.score(x,y)

In [None]:
lamb = 0.01
x = x_car
y = y_car
clf = tree.DecisionTreeClassifier(max_depth = 3, min_samples_split = math.ceil(lamb*2*len(y)), 
                                      min_samples_leaf = math.ceil(lamb*len(y)), 
                                      max_leaf_nodes = math.floor(1/(2*lamb)), 
                                      min_impurity_decrease = lamb
                                     )
clf = clf.fit(x,y)
clf.score(x,y)

In [None]:
leaves_c, prediction_c, dic, nrule, ndata, totaltime, time_c, COUNT, C_c, accu = bbound(x_compas1, y_compas1, lamb=0.005, prior_metric="curiosity")

In [None]:
predict(leaves_c, prediction_c, dic, x_compas1, y_compas1)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.005, prior_metric="curiosity")

In [None]:
bbound(x_compas1, y_compas1, lamb=0.0025, prior_metric="curiosity")

In [None]:
bbound(x_compas1, y_compas1, lamb=0.00225, prior_metric="curiosity")

In [None]:
bbound(x_compas1, y_compas1, lamb=0.002, prior_metric="curiosity")

In [None]:
bbound(x_compas1, y_compas1, lamb=0.00175, prior_metric="curiosity")

In [None]:
bbound(x_compas1, y_compas1, lamb=0.0015, prior_metric="curiosity")

In [None]:
bbound(x_compas1, y_compas1, lamb=0.00125, prior_metric="curiosity")

In [None]:
bbound(x_compas1, y_compas1, lamb=0.001, prior_metric="curiosity")

In [None]:
bbound(x_compas1, y_compas1, lamb=0.0009, prior_metric="curiosity")

In [None]:
bbound(x_compas1, y_compas1, lamb=0.0008, prior_metric="curiosity")

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=4, MAXDEPTH=3)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=5, MAXDEPTH=3)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=6, MAXDEPTH=3)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=7, MAXDEPTH=3)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=8, MAXDEPTH=3)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=5, MAXDEPTH=4)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=6, MAXDEPTH=4)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=7, MAXDEPTH=4)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=8, MAXDEPTH=4)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=9, MAXDEPTH=4)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=10, MAXDEPTH=4)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=11, MAXDEPTH=4)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=12, MAXDEPTH=4)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=13, MAXDEPTH=4)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=14, MAXDEPTH=4)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=15, MAXDEPTH=4)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=16, MAXDEPTH=4)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=17)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=18)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=19)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=20)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=21)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=22)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=23)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=24)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=25)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=26)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=27)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=28)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=29)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=30)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=31)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.000000001, prior_metric="curiosity", MAX_NLEAVES=32)

In [None]:
_,_,_,_,accu_OSDT = bbound(x_balance, y_balance, lamb=0.01, prior_metric="curiosity", MAXDEPTH=4)
accu_OSDT

In [None]:
_,_,_,_,accu_OSDT = bbound(x_monk1, y_monk1, lamb=0.025, prior_metric="curiosity", MAXDEPTH=4)
accu_OSDT

In [None]:
_,_,_,_,accu_OSDT = bbound(x_monk1, y_monk1, lamb=0.025, prior_metric="curiosity", MAXDEPTH=3)
accu_OSDT

In [None]:
_,_,_,_,accu_OSDT = bbound(x_monk2, y_monk2, lamb=0.025, prior_metric="curiosity", MAXDEPTH=4)
accu_OSDT

In [None]:
_,_,_,_,accu_OSDT = bbound(x_monk2, y_monk2, lamb=0.025, prior_metric="curiosity", MAXDEPTH=3)
accu_OSDT

In [None]:
_,_,_,_,accu_OSDT = bbound(x_monk3, y_monk3, lamb=0.025, prior_metric="curiosity", MAXDEPTH=4)
accu_OSDT

In [None]:
_,_,_,_,accu_OSDT = bbound(x_monk3, y_monk3, lamb=0.025, prior_metric="curiosity", MAXDEPTH=3)
accu_OSDT

In [None]:
_,_,_,_,accu_OSDT = bbound(x_car, y_car, lamb=0.01, prior_metric="curiosity", MAXDEPTH=4)
accu_OSDT

In [None]:
_,_,_,_,accu_OSDT = bbound(x_car, y_car, lamb=0.01, prior_metric="curiosity", MAXDEPTH=3)
accu_OSDT

In [None]:
_,_,_,_,accu_OSDT = bbound(x_tictactoe, y_tictactoe, lamb=0.005, prior_metric="curiosity", MAXDEPTH=4)
accu_OSDT

In [None]:
_,_,_,_,accu_OSDT = bbound(x_tictactoe, y_tictactoe, lamb=0.005, prior_metric="curiosity", MAXDEPTH=4)
accu_OSDT