In [434]:
import numpy as np

def stump(X, y, weights):
    m, n = X.shape
    best_stump = {}
    min_error = float('inf')
    
    for j in range(n):  # 对每个特征
        feature_values = np.unique(X[:, j])
        for threshold in feature_values:
            for inequality in ["lt", "gt"]:  # "lt" - less than, "gt" - greater than
                predicted_labels = np.where(X[:, j] < threshold, -1, 1)
                if inequality == "gt":
                    predicted_labels = -predicted_labels
                errors = weights[y != predicted_labels]
                weighted_error = np.sum(errors)
                
                if weighted_error < min_error:
                    min_error = weighted_error
                    best_stump['feature'] = j
                    best_stump['threshold'] = threshold
                    best_stump['ineq'] = inequality
                    best_stump['label'] = predicted_labels
    
    return best_stump, min_error

def adaboost_train(X_t, y_t, M=10):
    weights = np.full(len(X_t), 1/len(X_t))
    alpha = []
    classifiers = []

    for _ in range(M):
        # 每次随机删除一部分数据集合 以弱化基础分类器性能
        # idx = np.random.choice(len(X_t), len(X_t), replace=True)
        X = X_t
        # y = y_t[idx]

        y = y_t.copy()
        mask = np.random.rand(len(y)) < 0.45
        y[mask] = -y[mask]
    

        stump_info, error = stump(X, y, weights)
        alpha_m = 0.5 * np.log((1 - error) / max(error, 1e-10))
        alpha.append(alpha_m)
        classifiers.append(stump_info)
        
        weights *= np.exp(-alpha_m * y * stump_info['label'])
        weights /= np.sum(weights)
    
    return classifiers, alpha

def adaboost_predict(X, classifiers, alpha):
    predictions = np.zeros(len(X))
    for alpha_m, classifier in zip(alpha, classifiers):
        predictions += alpha_m * np.where(X[:, classifier['feature']] < classifier['threshold'], -1, 1) * (1 if classifier['ineq'] == 'lt' else -1)
    return np.sign(predictions)

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np

# 加载数据集
data = load_iris()
X = data.data
y = data.target

y = np.where(y == 2, 1, -1)

# 随机扰乱
np.random.seed()
# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=3)

# 训练AdaBoost模型
classifiers, alpha = adaboost_train(X_train, y_train, M=30)


def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

# 使用训练好的AdaBoost模型进行预测
y_pred_train = adaboost_predict(X_train, classifiers, alpha)
y_pred_test = adaboost_predict(X_test, classifiers, alpha)

# 计算训练和测试精度
train_accuracy = accuracy(y_train, y_pred_train)
test_accuracy = accuracy(y_test, y_pred_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")

# 分别测试每个classifiers 的 accuracy
for i in range(len(classifiers)):
    y_pred_train = adaboost_predict(X_train, classifiers[:i+1], alpha[:i+1])
    y_pred_test = adaboost_predict(X_test, classifiers[:i+1], alpha[:i+1])
    train_accuracy = accuracy(y_train, y_pred_train)
    test_accuracy = accuracy(y_test, y_pred_test)
    print(f"Training Accuracy with {i+1} classifiers: {train_accuracy * 100:.2f}%")
    print(f"Testing Accuracy with {i+1} classifiers: {test_accuracy * 100:.2f}%")
    print()

Training Accuracy: 93.33%
Testing Accuracy: 90.00%
Training Accuracy with 1 classifiers: 47.78%
Testing Accuracy with 1 classifiers: 45.00%

Training Accuracy with 2 classifiers: 32.22%
Testing Accuracy with 2 classifiers: 25.00%

Training Accuracy with 3 classifiers: 3.33%
Testing Accuracy with 3 classifiers: 6.67%

Training Accuracy with 4 classifiers: 3.33%
Testing Accuracy with 4 classifiers: 6.67%

Training Accuracy with 5 classifiers: 11.11%
Testing Accuracy with 5 classifiers: 11.67%

Training Accuracy with 6 classifiers: 67.78%
Testing Accuracy with 6 classifiers: 65.00%

Training Accuracy with 7 classifiers: 75.56%
Testing Accuracy with 7 classifiers: 71.67%

Training Accuracy with 8 classifiers: 67.78%
Testing Accuracy with 8 classifiers: 65.00%

Training Accuracy with 9 classifiers: 51.11%
Testing Accuracy with 9 classifiers: 51.67%

Training Accuracy with 10 classifiers: 67.78%
Testing Accuracy with 10 classifiers: 66.67%

Training Accuracy with 11 classifiers: 53.33%
Testi

In [426]:
alpha

[0.32177511971028633,
 0.2234377884778482,
 0.3181898635895324,
 0.20001228068855928,
 0.3287237173032285,
 0.2871642630687942,
 0.3254009238446642,
 0.3010268115978227]