In [None]:
import numpy as np

class TreeNode:
    def __init__(self, feature=None, value=None, left=None, right=None, is_leaf=False, prediction=None):
        self.feature = feature
        self.value = value
        self.left = left
        self.right = right
        self.is_leaf = is_leaf
        self.prediction = prediction

class RTLearner: # left is ngt, right is gt
    def __init__(self, leaf_size = 1, verbose = False):
        self.leaf_size = leaf_size
        self.verbose = verbose
        self.tree = None
    def add_evidence(self, data_x, data_y):
        self.build_tree(data_x, data_y)
        
    def build_tree(self, data_x: np.ndarray, data_y: np.ndarray)-> TreeNode:
        if data_x.shape[0] <= self.leaf_size: # smaller than leaf size
            return TreeNode(None, None, None, None, True, data_y.mean())
        if np.all(data_y == data_y[0]):
            return TreeNode(None, None, None, None, True, data_y[0])
        
        #best_feature = self.determine_best_feature(data_x,data_y)
        best_feature = np.random.randint(0,data_x.shape[1])
        split_val = np.median(data_x[:,best_feature])
        left_indices = data_x[:,best_feature] <= split_val
        right_indices = data_x[:,best_feature] > split_val
        
        left_tree = self.build_tree(data_x[left_indices],data_y[left_indices])
        right_tree = self.build_tree(data_x[right_indices],data_y[right_indices])
        
        return TreeNode(best_feature,split_val,left_tree,right_tree,False,None)
    
    def determine_best_feature(self, data_x: np.ndarray, data_y: np.ndarray):
        best_score = float('inf')
        best_feature = -1
        
        for feature in range(data_x.shape[1]):
            split_val = np.median(data_x[:,feature])
            left_indices = np.where(data_x[:,feature] <= split_val)[0]
            right_indices = np.where(data_x[:,feature] > split_val)[0]
            if left_indices.size == 0 or right_indices.size == 0 :
                continue
            left_y = data_y[left_indices]
            right_y = data_y[right_indices]
            score = np.var(left_y) * left_y.size + np.var(right_y) * right_y.size
            if score < best_score:
                best_score = score
                best_feature = feature
        return best_feature
    def query_point(self, point):
        if self.tree is None:
            raise ValueError("The model has not been trained yet.")
        self.query_point_for_subtree(point, self.tree)
         
    def query_point_for_subtree(self, point, node:TreeNode): # return prediction.
        if node.is_leaf:
            return node.prediction
        if point[node.feature] <= node.value:
            return self.query_point_for_subtree(point,node.left)
        else:
            return self.query_point_for_subtree(point,node.right)
        


# node represenation:
# ['leaf'/'internal', feature, value, left_tree, right_tree]
# top-down approach.
a = np.array([-1,0,0.0,np.nan, np.nan ])
b = np.array([1,1,0.0,np.nan, np.nan ])
c = np.vstack((a,b))
d = np.array([-1,0,0.0,1,np.nan])

e = np.vstack((d,c))
print(c)
print(e)

IndentationError: expected an indented block after 'while' statement on line 55 (213322510.py, line 61)

In [27]:
import numpy as np

a = np.array([[1.0,2.0,4.0],
              [2.0,5.0,4.0]
              ])
# b = np.array([2.0,2.0,4.0])
indices = a[:,1]<=3.0
c = np.random.choice(a.shape[0],a.shape[0],replace=True)
print(c)
selected_a = a[c]
print(selected_a)
print(indices)
print(indices.size)
b = a[indices]
b

[1 1]
[[2. 5. 4.]
 [2. 5. 4.]]
[ True False]
2


array([[1., 2., 4.]])

In [28]:
data_x = np.array([
    [1.0,2.0,3.0,4.0],
    [2.0,3.0,4.0,1.0]
])

data_y = np.array([
    1.0,2.0
])

np.corrcoef(data_x[:,1],data_y)

array([[1., 1.],
       [1., 1.]])

In [22]:
kwargs = {
    "leaf_size": 1,
    "verbose": False,
}
def func(**args):
    print(args)
func(**kwargs)

{'leaf_size': 1, 'verbose': False}
