In [51]:
from typing import List
import numpy as np
from sklearn.datasets import make_blobs

In [2]:
X, Y = make_blobs(n_samples=200, centers=2, n_features=2, cluster_std=0.4, random_state=0)

In [53]:
class ExNode():
    def __init__(self, size):
        self.size = size


class InNode():
    def __init__(self, left, right, split_attr, split_value):
        self.left = left
        self.right = right
        self.split_attr = split_attr
        self.split_value = split_value


def itree(X_sample: np.ndarray, e: int, height: int) -> InNode or ExNode:
    '''
    X_sample: sample data
    e: current tree height
    height: height limit
    '''
    if e >= height or X_sample.shape[0] <= 1:
        return ExNode(X_sample.shape[0])
    
    # Randomly select a feature
    q = np.random.choice(X_sample.shape[1])
    # Randomly select a split point
    p = np.random.choice(X_sample[:, q])

    X_left = X_sample[X_sample[:, q] < p]
    X_right = X_sample[X_sample[:, q] >= p]

    return InNode(left=itree(X_left, e+1, height), right=itree(X_right, e+1, height), split_attr=q, split_value=p)


def iforest(n_trees: int = 100, phai: int = 128) -> List[InNode or ExNode]:
    height = np.log2(phai)
    forest = []

    for i in range(n_trees):
        X_sample = X[np.random.choice(X.shape[0], phai, replace=False)]
        tree = itree(X_sample, 0, height)
        forest.append(tree)
    
    return forest

def path_length():
    pass


In [31]:
isolation_forest = iforest()

In [39]:
tree = isolation_forest[0]

In [50]:
print(tree.split_value)
print(tree.left.split_value)
print(tree.left.left.split_value)
print(tree.left.left.left.split_value)

2.0943575085188204
4.31651955075813
0.9915986262341816
0.678368149727119
