In [1]:
#An implementation of Algorithm

In [2]:
import pandas as pd
import numpy as np
import heapq
import math
import time

import gmpy2
from gmpy2 import mpz
import re

In [3]:
# Read in the dataset
df = pd.DataFrame(pd.read_csv('../data/compas-binary.csv'))

In [4]:
x_all = df.as_matrix()[:,:13]

y = df.as_matrix()[:,13]

In [5]:
# Association Rule Mining (Only one feature)

#support
#supp = [(x[:,i]*y).mean() for i in range(13)]
#supp

In [9]:
#confidence
#conf1 = [sum(x_all[:,i]*y)/sum(x_all[:,i]) for i in range(13)]
#conf1

In [10]:
#confidence
#conf0 = [sum((x_all[:,i]==0)*y)/sum((x_all[:,i]==0)) for i in range(13)]
#conf0

In [11]:
#x_idx = [conf1[i]>0.5 or conf0[i]>0.5 for i in range(len(conf1))]
"""
Because Using both conf1 and conf0 would select out too many features, 
which is hard for the algorithm to run out,
just use conf1 to select out a small fraction of feature.
"""
#x_idx = [conf1[i]>0.5 for i in range(len(conf1))]
#x_idx[0] = True # in the CORELS paper, gender is an important feature, so I add it manually
#x_idx

'\nBecause Using both conf1 and conf0 would select out too many features, \nwhich is hard for the algorithm to run out,\njust use conf1 to select out a small fraction of feature.\n'

In [12]:
#select out these features
#x = x_all[:,x_idx]

In [13]:
#manaually select out 5 features, accoring to CORELS paper when lambda=0.01
# sex:Female, age:18-20,age:21-22, juvenile-crimes:=0, priors:>3
x_idx = [0,1,2,8,12]
x = x_all[:,x_idx]
x

array([[0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0],
       [0, 0, 1, 0, 1],
       ...,
       [0, 0, 0, 1, 0],
       [1, 0, 0, 1, 0],
       [1, 0, 1, 1, 0]])

In [14]:
#manaually select out 6 features, accoring to CORELS paper when lambda=0.01
# sex:Female, age:18-20,age:21-22, juvenile-crimes:=0, priors:2-3, priors:>3
x_idx6 = [0,1,2,8,9,12]
x6 = x_all[:,x_idx6]
x6

array([[0, 0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0, 0],
       [0, 0, 1, 0, 0, 1],
       ...,
       [0, 0, 0, 1, 0, 0],
       [1, 0, 0, 1, 1, 0],
       [1, 0, 1, 1, 1, 0]])

In [15]:
nrule = x.shape[1]
ndata = len(y)

In [16]:
### The following functions are copied from rule.py in bbcache ###

"""
    Python implementation of make_default

    Returns a mpz object consisting of length ones

    Note: in order to ensure you have a leading one, pass in
    a length that is 1 greater than your number of samples
"""
def make_all_ones(length):
    ones = pow(2, length) - 1
    default_tt = mpz(ones)
    return default_tt

"""
    Python implementation of rule_vand

    Takes in two truthtables
    Returns the and of the truthtables 
    as well as the number of ones in the and
"""
def rule_vand(tt1, tt2):
    vand = tt1 & tt2
    # subtract 1 to remove leading ones
    cnt = gmpy2.popcount(vand) - 1
    return vand, cnt


In [17]:
"""
    Python implementation of make_default

    Convert a binary vector to a mpz object

    Note: in order to ensure you have a leading one,
    add '1' in the front
"""
def rule_vectompz(vec):
    return mpz('1'+re.sub('[\[\],\s+]','',str(list(vec))),2)

In [18]:
"""
calculate z, which is for the equivalent points bound
z is the vector defined in algorithm 5 of the CORELS paper
z is a binary vector indicating the data with a minority lable in its equivalent set
"""
z = pd.DataFrame([-1]*ndata).as_matrix()
# enumerate through theses samples
for i in range(ndata):
    #if z[i,0]==-1, this sample i has not been put into its equivalent set
    if z[i,0] == -1:
        tag1 = np.array([True]*ndata)
        for j in range(nrule):
            rule_label = x[i][j]
            #tag1 indicates which samples have exactly the same features with sample i
            tag1 = (x[:,j] == rule_label)*tag1
            
        y_l = y[tag1]
        pred = int(y_l.sum()/len(y_l) > 0.5)
        #tag2 indicates the samples in a equiv set which have the minority label
        tag2 = (y_l != pred)
        z[tag1,0] = tag2

z

array([[0],
       [1],
       [0],
       ...,
       [0],
       [0],
       [1]])

In [19]:
class CacheTree:
    """
    A tree data structure.
    prefix: a 2-d tuple to encode the leaves
    num_captured: a list to record number of data captured by the leaves
    """
    def __init__(self, x, y, prefix,
                 lamb, prior_metric = None,
                 num_captured=None,
                 deadleaf = None, 
                 splitleaf = None,
                 lbound=None,
                 p = None, 
                 B0 = None, points_cap = None):
        self.prefix = prefix
        #self.prediction = prediction
        self.num_captured = num_captured
        #self.num_captured_incorrect = num_captured_incorrect
        self.p = p # the proportion of misclassified data in each leaf
        self.deadleaf = deadleaf #a list indicate which leaves will never be split (support bound)
        self.splitleaf = splitleaf #a queue of lists indicating which leaves will be split in next rounds (1 for split, 0 for not split)
        self.lbound = lbound #a list of lower bound
        self.B0 = B0 # a list of b0
        self.points_cap = points_cap #a list of mpz, indicating which data are captured by each leaf
        
        ndata = len(y)
        l = len(prefix)
        
        self.risk = self.lbound[0]+(self.p[0]*self.num_captured[0])/ndata
        
        #print(self.prefix)
        #print(self.lbound)
        #print(self.splitleaf)
        # which metrics to use for the priority queue
        if (self.num_captured[0]==ndata):
            # this case is when constructing the null tree ((),)
            self.metric = 0
        elif (prior_metric=="curiosity"):
            self.metric = min([self.lbound[i]/((ndata-self.num_captured[i])/len(y)) 
                                 if self.splitleaf[0][i]==1 else float('Inf') for i in range(l)])
        elif (prior_metric=="bound"):
            self.metric = min([self.lbound[i] if self.splitleaf[0][i]==1 else float('Inf') for i in range(l)])
        elif (prior_metric=="entropy"): 
            # entropy weighted by number of points captured
            self.entropy = [(-self.p[i]*math.log2(self.p[i])-(1-self.p[i])*math.log2(1-self.p[i]))*self.num_captured[i] 
                            if self.p[i]!=0 and self.p[i]!=1 else 0 for i in range(l)]
            self.metric = min([sum(self.entropy[:i]+self.entropy[i+1:])/(ndata-self.num_captured[i]) 
                               if (ndata-self.num_captured[i])!=0 else 0 for i in range(l)])
        elif (prior_metric=="gini"):
            # gini index weighted by number of points captured
            self.giniindex = [(2*self.p[i]*(1-self.p[i]))*self.num_captured[i] for i in range(l)]
            self.metric = min([sum(self.giniindex[:i]+self.giniindex[i+1:])/(ndata-self.num_captured[i]) 
                               if (ndata-self.num_captured[i])!=0 else 0 for i in range(l)])
        else:
            self.metric = 0

    
    def __lt__(self, other):
        # define <, which will be used in the priority queue
        return self.metric<other.metric

In [20]:
# cache every leaf

class CacheLeaf:
    """
    A data structure to cache every single leaf (symmetry aware)
    """
    def __init__(self, antecedent, x, y, parent_points_cap):
        
        tag = parent_points_cap # points captured by the leaf's parent leaf
        rule_index = abs(antecedent[0][-1])-1 #the leaf's last feature
        rule_label = int(antecedent[0][-1]>0) #this binary feature is 0 or 1
        #print("np.array(x[:,rule_index] == rule_label)",x[:,rule_index] == rule_label)
        tag_rule = rule_vectompz(np.array(x[:,rule_index] == rule_label)*1)
        tag, self.num_captured = rule_vand(tag, tag_rule)

        self.points_cap = tag

        # the y's of these data captured by leaf antecedent[0]
        #y_leaf = y[tag]
        #print("tag",tag)
        #print("y",y)
        _, num_ones = rule_vand(tag,rule_vectompz(y))

        #b0 is defined in (28)
        
        
        tag_z = rule_vectompz(z.reshape(1,-1)[0])
        _, num_errors = rule_vand(tag, tag_z)
        self.B0 = num_errors/ndata

        if self.num_captured:
            self.prediction = int(num_ones/self.num_captured > 0.5)
            if self.prediction == 1:
                self.num_captured_incorrect = self.num_captured-num_ones
            else:
                self.num_captured_incorrect = num_ones
            self.p = self.num_captured_incorrect/self.num_captured
        else:
            self.prediction = 0
            self.num_captured_incorrect = 0
            self.p = 0   

In [21]:
class Eliminate:
    """
    A data structure to record and identify
    whether a tree has been visited/pruned
    """
    def __init__(self, elim_dict = None):
        self.elim_dict = {} # record these trees we have visited
        
    def eliminate(self, prefix):
        self.elim_dict[tuple(sorted(prefix))] = 1
        
    def is_duplicated(self, prefix):
        # if a tree is in the self.elim_dict, then we have already visited it
        return tuple(sorted(prefix)) in self.elim_dict.keys()

In [22]:
def log(lines, lamb, tic, queue_size, prefix_old, tree_new, R, d_c, R_c):
    "log"
    t = tree_new.prefix
    t_c = d_c.prefix
    
    the_time = str(time.time()-tic)
    the_queue_size = str(queue_size)
    the_split_tree = str(prefix_old)
    the_new_tree = str(t)
    the_new_tree_length = str(len(t))
    the_new_tree_objective = str(R)
    the_best_tree = str(t_c)
    the_length = str(len(t_c))
    the_obj = str(R_c)
    the_lbound = str(d_c.lbound)
    the_accuracy = str(1-(R_c - lamb*len(t_c)))
    the_num_cap = str(d_c.num_captured)


    line = ";".join([the_time, the_queue_size, the_split_tree, 
                     the_new_tree, the_new_tree_length, the_new_tree_objective,
                     the_best_tree, the_length, the_obj, 
                     the_lbound, the_accuracy, the_num_cap])
    lines.append(line)

In [23]:
def generate_new_splitleaf(splitleaf_list, cap_l, incorr_l, ndata, t, lb, b0, lamb, R_c):
    """
    generate the new splitleaf for the new tree
    """
    splitleaf_array = np.array(splitleaf_list)
    sl = splitleaf_list.copy()

    #(Lower bound on accurate antecedent support)
    a_l = (sum(cap_l)-sum(incorr_l))/ndata - sum(cap_l)/ndata/2

    #binary vector indicating split or not
    splitleaf1 = [1]*(len(t)) #all leaves labeled as to be split
    splitleaf2 = [0]*(len(t)-2)+[1,1] #l1,l2 labeled as to be split
    splitleaf3 = [1]*(len(t)-2)+[0,0] #dp labeled as to be split

    if lb+b0+lamb>=R_c or lb>=R_c:
        #print("lb+b0+lamb",lb+b0+lamb)
        #print("R_c",R_c)
        # if equivalent points bound combined with the lookahead bound doesn't hold
        # or if the hierarchical objective lower bound doesn't hold
        # we need to split at least one leaf in dp

        if a_l < lamb:
        # if the bound doesn't hold, we need to split the leaf l1/l2 further

            if len(splitleaf_list)>0:
                split_l1_l2 = splitleaf_array[:,-1].sum()+splitleaf_array[:,-2].sum()

                # if dp will have been split
                if splitleaf_array.sum()-split_l1_l2>0:

                    # if l1/l2 will have been split
                    if split_l1_l2>0:
                        sl.append(splitleaf1)

                    # if l1/l2 will not have been split, we need to split l1/l2
                    else:
                        sl.append(splitleaf2)

                # and we need to split leaves in dp, if dp will not have been split
                else:

                    # if l1/l2 will have been split
                    if split_l1_l2>0:
                        sl.append(splitleaf3)

                    # if l1/l2 will not have been split, we need to split l1/l2
                    else:
                        sl.append(splitleaf2)
                        sl.append(splitleaf3)
            else:
                sl.append(splitleaf2)
                sl.append(splitleaf3)


        else:

            if len(splitleaf_list)>0:
                split_l1_l2 = splitleaf_array[:,-1].sum()+splitleaf_array[:,-2].sum()

                # if dp will have been split
                if splitleaf_array.sum()-split_l1_l2>0:
                    sl.append(splitleaf1)

                # and we need to split leaves in dp, if dp will not have been split
                else:
                    sl.append(splitleaf3)
            else:
                sl.append(splitleaf3)
    else:

        if a_l < lamb:
            # if the bound doesn't hold, we need to split the leaf l1/l2 further


            if len(splitleaf_list)>0:
                split_l1_l2 = splitleaf_array[:,-1].sum()+splitleaf_array[:,-2].sum()

                # if l1/l2 will have been split
                if split_l1_l2>0:
                    sl.append(splitleaf1)

                # if l1/l2 will not have been split, we need to split l1/l2
                else:
                    sl.append(splitleaf2)
            else:
                sl.append(splitleaf2)

        else:
            sl.append(splitleaf1)
        
    return sl

In [24]:
def bbound(x, y, lamb, prior_metric = None, MAXDEPTH = 4, niter=float('Inf')):#
    """
    An implementation of Algorithm
    ## one copy of tree
    ## mark which leaves to be split
    """
    
    #Initialize best rule list and objective
    #d_c = None
    #R_c = 1

    nrule = x.shape[1]
    ndata = len(y)
    print("nrule:", nrule)
    print("ndata:", ndata)
    
    E = Eliminate()
    tic = time.time()
    
    lines = [] # a list for log
    leaves = {} # cache leaves

    # initialize the queue to include just empty root
    queue = []
    t = ((),)    
    tree0 = CacheTree(prefix = t, x = x, y = y, lamb=lamb, prior_metric=prior_metric, 
                      num_captured=[ndata], deadleaf = [0], splitleaf = [[1]], lbound=[lamb],
                      p = [min(np.mean(y),1-np.mean(y))], B0 = [np.sum(z)/ndata], points_cap = [make_all_ones(ndata+1)])
    heapq.heappush(queue, (tree0.metric,tree0))
    #queue.append(tree0)
    d_c = tree0
    R_c = tree0.risk
    #log(lines, lamb, tic, len(queue), tuple(), tree0, R, d_c, R_c) 
    
    COUNT = 0 #count the total number of trees in the queue
    while (queue) and COUNT<niter:
        #tree = queue.pop(0)
        (curio, tree)=heapq.heappop(queue)
        d = tree.prefix
        
        
        #print("=======COUNT=======",COUNT)
        #print("d",d)
        #print("R",tree.lbound[0]+(tree.num_captured_incorrect[0])/len(y))
        
        # if we have visited this tree
        if E.is_duplicated(d):
            continue
        else:
            E.eliminate(d)
        
        # the leaves we are going to split
        split_next = tree.splitleaf.copy()
        spl = split_next.pop(0)
        
        # enumerate through all the leaves
        for i in range(len(d)):
            #print("d!!!",d)
            # if the leaf is dead, then continue
            if tree.deadleaf[i]==1:
                continue
            
            #(Lower bound on antecedent support)
            # if this bound doesnot hold, set the leaf to be dead, and continue
            if tree.num_captured[i]/ndata/2 < lamb:
                tree.deadleaf[i] = 1
                continue
            
            # 0 for not split; 1 for split
            #if spl[0][i]==0:
            if spl[i]==0:
                continue

            d0 = d[i] #d0 is the leaf we are going to split
            dp = d[:i]+d[i+1:] #dp is the rest
            
            
            # Restrict the depth of the tree
            if len(d0)>=MAXDEPTH:
                continue
            
            # we are going to split leaf i, and get 2 new leaves
            # we will add the two new leaves to the end of the list
            splitleaf_list = [split_next[k][:i]+split_next[k][i+1:]+split_next[k][i:i+1]*2
                              for k in range(len(split_next))]
            
            
            lb = tree.lbound[i] # the lower bound 
            #print("tree.B0",tree.B0)
            b0 = tree.B0[i] # the b0 defined in (28) of the paper
            
            
            
            # split the leaf d0 with feature j
            for j in range(1, nrule+1):
                if (j not in d0)and(-j not in d0):
                    # split leaf d0 with feature j, and get 2 leaves l1 and l2
                    l1 = d0+(-j,)
                    l2 = d0+(j,)
                    t = dp+(l1, l2) # t is the new tree
                    #print("t",t)

                    # if tree t is duplicated, continue
                    if E.is_duplicated(t):
                        continue
                    
                    pred_l = [0]*2
                    cap_l = [0]*2
                    incorr_l = [0]*2
                    p_l = [0]*2
                    B0_l = [0]*2
                    points_l = [mpz(0)]*2
                    
                    # for the two new leaves, if they have not been visited, calculate their predictions, 
                    l1_sorted = tuple(sorted(l1))
                    l2_sorted = tuple(sorted(l2))
                    
                    i_points = tree.points_cap[i]
                    
                    if l1_sorted not in leaves:
                        leaves[l1_sorted] = CacheLeaf((l1,),x,y,i_points)
                    
                    Cache_l1 = leaves[l1_sorted]    
                    pred_l[0], cap_l[0], incorr_l[0], p_l[0], B0_l[0], points_l[0] = Cache_l1.prediction, Cache_l1.num_captured, Cache_l1.num_captured_incorrect, Cache_l1.p, Cache_l1.B0, Cache_l1.points_cap
                    
                    if l2_sorted not in leaves:
                        leaves[l2_sorted] = CacheLeaf((l2,),x,y,i_points)
                    
                    Cache_l2 = leaves[l2_sorted]
                    pred_l[1], cap_l[1], incorr_l[1], p_l[1], B0_l[1], points_l[1] = Cache_l2.prediction, Cache_l2.num_captured, Cache_l2.num_captured_incorrect, Cache_l2.p, Cache_l2.B0, Cache_l2.points_cap
                    
                    # calculate the bounds for each leaves in the new tree
                    loss_l1 = (incorr_l[0])/ndata
                    loss_l2 = (incorr_l[1])/ndata
                    loss_d0 = tree.p[i]*tree.num_captured[i]/ndata
                    delta = loss_l1+loss_l2-loss_d0+lamb
                    old_lbound = tree.lbound[:i]+tree.lbound[i+1:]
                    new_lbound = [b+delta for b in old_lbound]+[tree.lbound[i]+loss_l2+lamb,tree.lbound[i]+loss_l1+lamb]
                    
                    #generate the new splitleaf for the new tree
                    sl = generate_new_splitleaf(splitleaf_list, cap_l, incorr_l, ndata, t, lb, b0, lamb, R_c)
                    #print("splitleaf_list, cap_l, incorr_l, ndata, t, lb, b0, lamb, R_c",splitleaf_list, cap_l, incorr_l, ndata, t, lb, b0, lamb, R_c)
                    #print('sl',sl)
                    #construct the new tree
                    tree_new = CacheTree(x = x, y = y, prefix = t,
                                         #prediction = tree.prediction[:i]+tree.prediction[i+1:]+pred_l,
                                         num_captured = tree.num_captured[:i]+tree.num_captured[i+1:]+cap_l,
                                         #num_captured_incorrect = tree.num_captured_incorrect[:i]+tree.num_captured_incorrect[i+1:]+incorr_l,
                                         deadleaf = tree.deadleaf[:i]+tree.deadleaf[i+1:]+[0,0],
                                         splitleaf = sl,
                                         lbound = new_lbound,
                                         p = tree.p[:i]+tree.p[i+1:]+p_l,
                                         B0 = tree.B0[:i]+tree.B0[i+1:]+B0_l,
                                         lamb = lamb,
                                         prior_metric=prior_metric,
                                         points_cap = tree.points_cap[:i]+tree.points_cap[i+1:]+points_l
                                        )

                    #queue.append(tree_new)
                    """print("t:",t)
                    print("tree_new.num_captured:",tree_new.num_captured)
                    print("tree_new.deadleaf:",tree_new.deadleaf)
                    print("tree_new.splitleaf:",tree_new.splitleaf)
                    print("tree_new.p:",tree_new.p)
                    print("tree_new.B0:", tree_new.B0)"""
                    
                    heapq.heappush(queue, (tree_new.metric,tree_new))
                    R = tree_new.risk
                    if R<R_c:
                        d_c = tree_new
                        R_c = R
                    
                    COUNT = COUNT+1
                    
                    #log(lines, lamb, tic, len(queue), d, tree_new, R, d_c, R_c)
                    
                   
                
    """            
    header = ['time', 'queue_size', 'split_tree', 'new_tree', 'new_tree_length', 'new_tree_objective',
              'best_tree', 'best_tree_length', 'objective', 'lower_bound', 'accuracy', 'num_captured']
    
    fname = "_".join([str(nrule),str(ndata),prior_metric,str(lamb),".txt"])
    with open(fname, 'w') as f:
        f.write('%s\n' % ";".join(header))
        f.write('\n'.join(lines))"""


    print("d_c", d_c.prefix)
    print("R_c", R_c)
    print("COUNT", COUNT)
    return

#### compas-binary

In [25]:
import cProfile

In [26]:
cProfile.run("bbound(x[:,:], y, lamb=0.01, prior_metric=\"entropy\")")

nrule: 5
ndata: 6907
d_c ((-4,), (4, -5), (4, 5))
R_c 0.3748675256985667
COUNT 961365
         78633100 function calls in 465.201 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 <ipython-input-16-85107050ef81>:11(make_all_ones)
      594    0.004    0.000    0.005    0.000 <ipython-input-16-85107050ef81>:23(rule_vand)
      594    2.267    0.004    3.747    0.006 <ipython-input-17-94c5f54bcb5c>:9(rule_vectompz)
   961365   58.817    0.000   95.102    0.000 <ipython-input-19-bf33e6033a7b>:45(<listcomp>)
   961365   42.506    0.000   67.497    0.000 <ipython-input-19-bf33e6033a7b>:47(<listcomp>)
   158231    0.392    0.000    0.392    0.000 <ipython-input-19-bf33e6033a7b>:58(__lt__)
   961366   20.913    0.000  189.521    0.000 <ipython-input-19-bf33e6033a7b>:7(__init__)
      198    0.019    0.000    3.773    0.019 <ipython-input-20-b16472b1b858>:7(__init__)
  3204936   25.898   

In [27]:
cProfile.run("bbound(x[:,:], y, lamb=0.01, prior_metric=\"gini\")")

nrule: 5
ndata: 6907
d_c ((-4,), (4, -5), (4, 5))
R_c 0.3748675256985667
COUNT 961709
         59319226 function calls in 376.797 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 <ipython-input-16-85107050ef81>:11(make_all_ones)
      594    0.003    0.000    0.005    0.000 <ipython-input-16-85107050ef81>:23(rule_vand)
      594    2.244    0.004    3.676    0.006 <ipython-input-17-94c5f54bcb5c>:9(rule_vectompz)
   961709    7.256    0.000    7.256    0.000 <ipython-input-19-bf33e6033a7b>:51(<listcomp>)
   961709   42.722    0.000   67.385    0.000 <ipython-input-19-bf33e6033a7b>:52(<listcomp>)
   163704    0.405    0.000    0.405    0.000 <ipython-input-19-bf33e6033a7b>:58(__lt__)
   961710   20.524    0.000  101.129    0.000 <ipython-input-19-bf33e6033a7b>:7(__init__)
      198    0.019    0.000    3.701    0.019 <ipython-input-20-b16472b1b858>:7(__init__)
  3206586   25.922   

In [28]:
%%time
#all data

bbound(x, y, lamb=0.01, prior_metric="curiosity")

nrule: 5
ndata: 6907
d_c ((5,), (-5, -4), (-5, 4))
R_c 0.3748675256985667
COUNT 1987208
CPU times: user 3min 35s, sys: 680 ms, total: 3min 36s
Wall time: 3min 36s


In [29]:
%%time
#all data, 5 features

bbound(x, y, lamb=0.01, prior_metric="bound")

nrule: 5
ndata: 6907
d_c ((-4,), (4, -5), (4, 5))
R_c 0.3748675256985667
COUNT 1878542
CPU times: user 2min 41s, sys: 444 ms, total: 2min 41s
Wall time: 2min 41s


In [30]:
%%time
#all data, 5 features

bbound(x, y, lamb=0.01, prior_metric="entropy")

nrule: 5
ndata: 6907
d_c ((-4,), (4, -5), (4, 5))
R_c 0.3748675256985667
COUNT 961365
CPU times: user 1min 39s, sys: 64 ms, total: 1min 40s
Wall time: 1min 40s


In [31]:
%%time
#all data, 5 features

bbound(x, y, lamb=0.01, prior_metric="gini")

nrule: 5
ndata: 6907
d_c ((-4,), (4, -5), (4, 5))
R_c 0.3748675256985667
COUNT 961709
CPU times: user 1min 31s, sys: 52 ms, total: 1min 31s
Wall time: 1min 31s


In [None]:
%%time
#all data, 5 features

bbound(x6, y, lamb=0.01, prior_metric="entropy")

nrule: 6
ndata: 6907


In [None]:
%%time
#all data, 5 features

bbound(x6, y, lamb=0.01, prior_metric="gini")