In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor

In [2]:
# Define the Random Forest class
class RandomForest:
    def __init__(self, n_estimators=100, max_depth=None, max_features="sqrt"):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.max_features = max_features
        self.trees = []

    def _bootstrap_sample(self, X, y):
        n_samples = X.shape[0]
        indices = np.random.choice(n_samples, n_samples, replace=True)
        return X[indices], y[indices]

    def _get_max_features(self, n_features):
        if isinstance(self.max_features, int):
            return self.max_features
        elif self.max_features == "sqrt":
            return max(1, int(np.sqrt(n_features)))
        elif self.max_features == "log2":
            return max(1, int(np.log2(n_features)))
        else:
            return n_features

    def fit(self, X, y):
        self.trees = []
        n_features = X.shape[1]
        max_features = self._get_max_features(n_features)

        for _ in range(self.n_estimators):
            X_sample, y_sample = self._bootstrap_sample(X, y)
            feature_indices = np.random.choice(n_features, max_features, replace=False)
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X_sample[:, feature_indices], y_sample)
            self.trees.append((tree, feature_indices))

    def predict(self, X):
        tree_preds = []
        for tree, feature_indices in self.trees:
            tree_preds.append(tree.predict(X[:, feature_indices]))
        return np.mean(tree_preds, axis=0)

In [3]:
# Load the California housing dataset
housing = fetch_california_housing()
X = pd.DataFrame(housing.data, columns=housing.feature_names).values
y = pd.Series(housing.target).values

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Initialize and train the Random Forest model
model = RandomForest(n_estimators=100, max_depth=10, max_features="sqrt")
model.fit(X_train, y_train)

In [6]:
# Make predictions
y_pred = model.predict(X_test)

In [7]:
# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.6134396393666356
