In [16]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder


iris = datasets.load_iris()
X, y = iris.data[50:, [1, 2]], iris.target[50:]
le = LabelEncoder()
y = le.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5)
sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1, stratify=y)

In [17]:
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline


clf1 = LogisticRegression(penalty='l2', C=0.001, solver='lbfgs', random_state=1)
clf2 = DecisionTreeClassifier(criterion='entropy', max_depth=1, random_state=0)
clf3 = KNeighborsClassifier(n_neighbors=1, p=2, metric='minkowski')

pipe1 = Pipeline([['sc', StandardScaler()], ['clf', clf1]])
pipe3 = Pipeline([['sc', StandardScaler()], ['clf', clf3]])

clf_labels = ['Logistic Regression', 'Decision Tree', 'KNN']
# в VotingClassifier нет функции predict_proba, поэтому невозможно вычислить ROC AUC
voting_clf = VotingClassifier(estimators=[(label, clf) for label, clf in zip(clf_labels, [pipe1, clf2, pipe3])], voting='hard') # voting='hard' - мажоритарное голосование
clf_labels += ['Majority Vote']
all_clf = [pipe1, clf2, pipe3, voting_clf]
for clf, label in zip(all_clf, clf_labels):
    scores = cross_val_score(estimator=clf, X=X_train, y=y_train, cv=10, scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std() / 2.0, label))

Accuracy: 0.44 (+/- 0.04) [Logistic Regression]
Accuracy: 0.86 (+/- 0.10) [Decision Tree]
Accuracy: 0.84 (+/- 0.07) [KNN]
Accuracy: 0.84 (+/- 0.10) [Majority Vote]


In [18]:
new_voting_clf = VotingClassifier(estimators=[(label, clf) for label, clf in zip(['Decision Tree', 'KNN'], [clf2, pipe3])], voting='hard')
scores = cross_val_score(estimator=new_voting_clf, X=X_train, y=y_train, cv=10, scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std() / 2.0, label))

Accuracy: 0.86 (+/- 0.10) [Majority Vote]
