# Stacking算法
Stacking 的基本思想是将多个不同的基学习器的预测结果作为新的特征，再训练一个元学习器来综合这些基学习器的信息，从而得到最终的预测结果，与深度学习中的特征融合(Feature Fusion)和混合专家模型(Mixture of Experts, MoE) 最为接近。

## iris分类任务

In [5]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# 生成示例分类数据集
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_classes=2, random_state=42)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义基分类器
estimators = [
    ('dt', DecisionTreeClassifier(random_state=42)),
    ('svm', SVC(random_state=42)),
    ('lr', LogisticRegression(random_state=42))
]

# 创建 Stacking 分类器，使用逻辑回归作为元分类器
stacking_clf = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(random_state=42)
)

# 训练 Stacking 分类器
stacking_clf.fit(X_train, y_train)

# 在测试集上进行预测
y_pred = stacking_clf.predict(X_test)

# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print(f"Stacking 分类器的准确率: {accuracy:.2f}")

Stacking 分类器的准确率: 0.94


## 回归任务

In [6]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# 生成示例回归数据集
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, random_state=42)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义基回归器
estimators = [
    ('dt', DecisionTreeRegressor(random_state=42)),
    ('svr', SVR()),
    ('lr', LinearRegression())
]

# 创建 Stacking 回归器，使用线性回归作为元回归器
stacking_reg = StackingRegressor(
    estimators=estimators,
    final_estimator=LinearRegression()
)

# 训练 Stacking 回归器
stacking_reg.fit(X_train, y_train)

# 在测试集上进行预测
y_pred = stacking_reg.predict(X_test)

# 计算均方误差
mse = mean_squared_error(y_test, y_pred)
print(f"Stacking 回归器的均方误差: {mse:.2f}")

Stacking 回归器的均方误差: 0.00
