In [7]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [8]:
# Creating class Decision Tree
class DecisionTree:
    def __init__(self):
        self.left = None
        self.right = None
        self.feature = None
        self.threshold = None
        self.leaf = False
        self.prediction = None

    def fit(self, X, y):
        if len(np.unique(y)) == 1:
            self.leaf = True
            self.prediction = y[0]
        else:
            best_feature, best_threshold = self.find_best_split(X, y)
            if best_feature is None or best_threshold is None:
                self.leaf = True
                self.prediction = stats.mode(y)[0][0]
            else:
                left_indices = X[:, best_feature] < best_threshold
                right_indices = ~left_indices
                self.left = DecisionTree()
                self.right = DecisionTree()
                self.left.fit(X[left_indices], y[left_indices])
                self.right.fit(X[right_indices], y[right_indices])
                self.feature = best_feature
                self.threshold = best_threshold

    def find_best_split(self, X, y):
        best_feature = None
        best_threshold = None
        best_gini = 1

        for feature in range(X.shape[1]):
            thresholds = np.unique(X[:, feature])
            for threshold in thresholds:
                left_indices = X[:, feature] < threshold
                y_left = y[left_indices]
                y_right = y[~left_indices]
                if len(y_left) == 0 or len(y_right) == 0:
                    continue
                gini = (len(y_left) / len(y)) * self.gini_impurity(y_left) + \
                       (len(y_right) / len(y)) * self.gini_impurity(y_right)
                if gini < best_gini:
                    best_feature = feature
                    best_threshold = threshold
                    best_gini = gini

        return best_feature, best_threshold

    def predict_one(self, x):
        if self.leaf:
            return self.prediction
        if x[self.feature] < self.threshold:
            return self.left.predict_one(x)
        else:
            return self.right.predict_one(x)

    def predict(self, X):
        return np.array([self.predict_one(x) for x in X])

    @staticmethod
    def gini_impurity(y):
        _, counts = np.unique(y, return_counts=True)
        probabilities = counts / len(y)
        return 1 - np.sum(probabilities**2)



In [9]:
# Creating class Random Forest
class RandomForest:
    def __init__(self, n_trees, max_depth, n_features):
        self.n_trees = n_trees
        self.max_depth = max_depth
        self.n_features = n_features
        self.trees = []

    def fit(self, X, y):
        n_samples, n_features = X.shape
        for i in range(self.n_trees):
            tree = DecisionTree()
            indices = np.random.choice(n_samples, n_samples, replace=True)
            if self.n_features == "sqrt":
                n_subset = int(np.sqrt(n_features))
            else:
                n_subset = self.n_features
            subset = np.random.choice(n_features, n_subset, replace=False)
            tree.fit(X[indices][:, subset], y[indices])
            self.trees.append(tree)

    def predict(self, X):
        predictions = np.array([tree.predict(X) for tree in self.trees])
        return stats.mode(predictions)[0][0]

In [10]:
# Load Iris dataset
data = load_iris()
X = data.data
y = data.target

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize random forest classifier with 5 trees, maximum depth of 3, and "sqrt" number of features
rf = RandomForest(n_trees=5, max_depth=3, n_features="sqrt")

# Train the random forest classifier on the training data
rf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = rf.predict(X_test)

# Calculate accuracy of the predictions
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.36666666666666664
