In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import accuracy_score

# 加载数据集，这里以鸢尾花数据集为例
iris = datasets.load_iris()
X = iris.data
y = iris.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 创建基础分类器
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()

# 硬投票
voting_clf_hard = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')
voting_clf_hard.fit(X_train, y_train)
y_pred_hard = voting_clf_hard.predict(X_test)
print(f"硬投票准确率: {accuracy_score(y_test, y_pred_hard):.2f}")

# 软投票
# 注意：要使用软投票，LogisticRegression 和 RandomForestClassifier 需要设置 probability=True 或者默认支持 predict_proba 方法
voting_clf_soft = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft')
voting_clf_soft.fit(X_train, y_train)
y_pred_soft = voting_clf_soft.predict(X_test)
print(f"软投票准确率: {accuracy_score(y_test, y_pred_soft):.2f}")

硬投票准确率: 1.00
软投票准确率: 1.00
