In [33]:
import numpy as np
from collections import Counter
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
class RandomForest:
    def __init__(self, n_estimators=10, max_depth=None, max_features='sqrt'):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.max_features = max_features
        self.trees = []

    def fit(self, X, y):
        for _ in range(self.n_estimators):
            # Bootstrap sampling for creating new dataset
            indices = np.random.choice(len(X), size=len(X), replace=True)
            X_bootstrap = X[indices]
            y_bootstrap = y[indices]

            # Train decision tree on the bootstrapped dataset
            tree = DecisionTreeClassifier(max_depth=self.max_depth, max_features=self.max_features)

            # 随机反转10%的La
            Ty_bootstrap = y_bootstrap.copy()
            mask = np.random.rand(len(Ty_bootstrap)) < 0.4
            Ty_bootstrap[mask] = -Ty_bootstrap[mask]

            tree.fit(X_bootstrap, Ty_bootstrap)
            self.trees.append(tree)

    def predict(self, X):
        predictions = np.zeros((X.shape[0], len(self.trees)))
        for i, tree in enumerate(self.trees):
            predictions[:, i] = tree.predict(X)

        # Use majority voting for final prediction
        final_predictions = []
        for preds in predictions:
            # 创建一个字典，用于统计每个元素出现的次数
            counts = {}
            for pred in preds:
                if pred in counts:
                    counts[pred] += 1
                else:
                    counts[pred] = 1
            
            # 寻找出现次数最多的元素
            max_count = 0
            most_common_pred = None
            for pred, count in counts.items():
                if count > max_count:
                    max_count = count
                    most_common_pred = pred
            final_predictions.append(most_common_pred)

        return np.array(final_predictions)
# Load iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Create and train the random forest model
rf = RandomForest(n_estimators=10, max_depth=None, max_features='sqrt')
rf.fit(X_train, y_train)

# Make predictions on the test set
predictions = rf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print("MAIN_Accuracy:", accuracy)

for i in rf.trees:
    # 测试每个决策树的准确率
    predictions = i.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    print("SON_Accuracy:", accuracy)

MAIN_Accuracy: 0.6266666666666667
SON_Accuracy: 0.52
SON_Accuracy: 0.6666666666666666
SON_Accuracy: 0.5866666666666667
SON_Accuracy: 0.7066666666666667
SON_Accuracy: 0.7466666666666667
SON_Accuracy: 0.6133333333333333
SON_Accuracy: 0.7466666666666667
SON_Accuracy: 0.72
SON_Accuracy: 0.5733333333333334
SON_Accuracy: 0.6933333333333334
