In [1]:
# lets take decision trees as granted

In [2]:
import numpy as np

def random_patches(X, y, size=0.4, features=2):
    m, n = X.shape
    sample_size = int(m * size)
    sample_indices = np.random.choice(m, size=sample_size, replace=True)
    feature_indices = np.random.choice(n, size=features, replace=False)
    
    X_patch = X[sample_indices][:, feature_indices]
    y_patch = y[sample_indices]
    
    return X_patch, y_patch, feature_indices

In [3]:
from sklearn.tree import DecisionTreeClassifier

def random_forest_classifier(X, y, n_estimators=10):
    models = []
    feature_sets = []
    n = X.shape[1]
    
    for _ in range(n_estimators):
        f = int(np.sqrt(n))
        X_patch, y_patch, feat_idx = random_patches(X, y, size=0.4, features=f)
        tree = DecisionTreeClassifier()
        tree.fit(X_patch, y_patch)
        models.append(tree)
        feature_sets.append(feat_idx)
    
    return models, feature_sets

In [4]:
from scipy.stats import mode

def predict_random_forest_one(x, models, feature_sets):
    predictions = []
    for tree, feat_idx in zip(models, feature_sets):
        x_subset = x[feat_idx].reshape(1, -1)  # Fix 1: subset + reshape
        pred = tree.predict(x_subset)[0]
        predictions.append(pred)
    
    # Majority vote 
    return mode(predictions, keepdims=False).mode

def predict_random_forest(X, models, feature_sets):
    return np.array([predict_random_forest_one(x, models, feature_sets) for x in X])

In [5]:
X = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
    [1, 0, 1],
    [2, 1, 2],
    [3, 3, 3]
])
y = np.array([0, 1, 1, 0, 0, 1])

models, feature_sets = random_forest_classifier(X, y, n_estimators=5)

preds = predict_random_forest(X, models, feature_sets)
print("Predictions:", preds)

Predictions: [0 1 1 0 0 1]


In [12]:
## similarly, random forest regressor

In [22]:
def random_forest_regressor(X, y, n_estimators=10):
    models = []
    feature_sets = []
    n = X.shape[1]
    
    for _ in range(n_estimators):
        f = int(np.sqrt(n))
        X_patch, y_patch, feat_idx = random_patches(X, y, size=0.4, features=f)
        tree = DecisionTreeRegressor()
        tree.fit(X_patch, y_patch)
        models.append(tree)
        feature_sets.append(feat_idx)
    
    return models, feature_sets

In [24]:
def predict_random_forest_regressor_one(x, models, feature_sets):
    predictions = []
    for tree, feat_idx in zip(models, feature_sets):
        x_subset = x[feat_idx].reshape(1, -1)
        pred = tree.predict(x_subset)[0]
        predictions.append(pred)
    return np.mean(predictions)

def predict_random_forest_regressor(X, models, feature_sets):
    return np.array([predict_random_forest_regressor_one(x, models, feature_sets) for x in X])

In [26]:
# Sample data
X = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9],
              [2, 3, 1]])
y = np.array([10, 20, 30, 15])

models, feature_sets = random_forest_regressor(X, y, n_estimators=2)
preds = predict_random_forest_regressor(X, models, feature_sets)
print("Predictions:", preds)

Predictions: [15. 15. 15. 15.]
