In [13]:
import numpy as np

## v01 - Select Points at Rnadom


In [15]:
class RandomTree: 
    
    def __init__(self,X,y,max_depth=2,depth=0,min_leaf_size=1):
        
        self.X = np.array(X)
        self.y = np.array(y)
        self.n = len(y)
        self.depth = depth
        
        # self.classes
        # self.class_counts
        # self.prediction
        # self.gini
        
        # self.axis
        # self.t
        
        if depth == max_depth:
            self.left = None
            self.right = None
            return
        # Randomly split points
        sel = np.random.choice([True,False],self.n)
        
        if (np.sum(sel) < min_leaf_size) or (np.sum(~sel) < min_leaf_size): 
            # np.sum(sel) the number of observation that goes to left or the number of observation that goes to right
            self.left = None
            self.right = None
            return
        
        self.left = RandomTree(X[sel,:],y[sel],max_depth,depth+1,min_leaf_size)
        self.right = RandomTree(X[~sel,:],y[~sel],max_depth,depth+1,min_leaf_size)
        
    def print_tree(self):
        msg = '  ' * self.depth + '* Size = ' + str(self.n)
        print(msg)
        
        if self.left != None:
            self.left.print_tree()
            self.right.print_tree()
        

In [16]:
np.random.seed(1)

n = 1000
X = np.random.uniform(0,10,5*n).reshape(n,5)
y = np.random.choice(['a','b','c'],n)

In [21]:
tree_mod = RandomTree(X,y,max_depth=4,min_leaf_size=120)

In [22]:
tree_mod.print_tree()

* Size = 1000
  * Size = 483
    * Size = 262
      * Size = 122
      * Size = 140
    * Size = 221
  * Size = 517
    * Size = 261
      * Size = 132
      * Size = 129
    * Size = 256


In [6]:
print(tree_mod.n)

1000


In [7]:
print(tree_mod.left.n)
print(tree_mod.right.n)

494
506


In [8]:
print(tree_mod.right.left.n)

262


In [9]:
X_test = np.random.uniform(0,10,15).reshape(5,3)
print(X_test)

[[ 6.87178523  8.47307416  4.83688633]
 [ 1.01351733  1.0295804   1.15627705]
 [ 4.94911146  6.22024983  0.33261817]
 [ 0.62583476  9.86549177  2.91103749]
 [ 4.7673164   6.3063432   7.1489902 ]]


In [10]:
sel = [True, False, True,True,True]

print(X_test[3][:])
print(X_test[sel,:])

[ 0.62583476  9.86549177  2.91103749]
[[ 6.87178523  8.47307416  4.83688633]
 [ 4.94911146  6.22024983  0.33261817]
 [ 0.62583476  9.86549177  2.91103749]
 [ 4.7673164   6.3063432   7.1489902 ]]


## version2 - Select 

In [30]:
class RandomTree: 
    
    def __init__(self,X,y,max_depth=2,depth=0,min_leaf_size=1):
        
        self.X = np.array(X)
        self.y = np.array(y)
        self.n = len(y)
        self.depth = depth
        
        # self.classes
        # self.class_counts
        # self.prediction
        # self.gini
        
        
        if depth == max_depth:
            self.left = None
            self.right = None
            self.axis = None
            self.t = None
            return
        
        # Create a raondom cut
        self.axis = np.random.choice(range(self.X.shape[1]))
        self.t = np.random.uniform(np.min(self.X[:,self.axis]), np.max(self.X[:,self.axis]))
        
        sel = self.X[:,self.axis] <= self.t # <= or < dont matter
        
        if (np.sum(sel) < min_leaf_size) or (np.sum(~sel) < min_leaf_size): 
            # np.sum(sel) the number of observation that goes to left or the number of observation that goes to right
            self.axis = None
            self.t = None
            self.left = None
            self.right = None
            return
        
        self.left = RandomTree(X[sel,:],y[sel],max_depth,depth+1,min_leaf_size)
        self.right = RandomTree(X[~sel,:],y[~sel],max_depth,depth+1,min_leaf_size)
        
    def print_tree(self):
        msg = '  ' * self.depth + '* Size = ' + str(self.n)
        
        if(self.axis != None):
            msg += ', Axis = ' + str(self.axis) + ', t = ' + str(round(self.t,2))
                                                                 
        print(msg)
        
        if self.left != None:
            self.left.print_tree()
            self.right.print_tree()
        

In [31]:
tree_mod = RandomTree(X,y,max_depth=4,min_leaf_size=10)
tree_mod.print_tree()

* Size = 1000, Axis = 1, t = 8.41
  * Size = 833, Axis = 2, t = 5.64
    * Size = 482, Axis = 4, t = 5.65
      * Size = 275, Axis = 3, t = 5.25
        * Size = 150
        * Size = 125
      * Size = 207, Axis = 1, t = 1.18
        * Size = 42
        * Size = 165
    * Size = 351, Axis = 0, t = 3.76
      * Size = 129
      * Size = 222, Axis = 1, t = 3.67
        * Size = 90
        * Size = 132
  * Size = 167, Axis = 2, t = 3.62
    * Size = 58
    * Size = 109, Axis = 0, t = 7.87
      * Size = 84
      * Size = 25, Axis = 1, t = 8.84
        * Size = 11
        * Size = 14
