In [229]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
from sklearn.base import clone,BaseEstimator, TransformerMixin, ClassifierMixin
from mlxtend.classifier import StackingClassifier
# 重点是利用base库建立StackingClassifier，base库里含三个参数BaseEstimator, TransformerMixin, ClassifierMixin

from sklearn.ensemble import AdaBoostClassifier,GradientBoostingClassifier,RandomForestClassifier
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
import pandas as pd
from sklearn import model_selection

In [230]:
#自己定义类的方法，利用base库进行建立StackingClassifier
class StackingClassifier(BaseEstimator,ClassifierMixin, TransformerMixin):

    def __init__(self,classifiers,meta_classifier):
        self.classifiers=classifiers
        self.meta_classifier=meta_classifier
        #self.meta_classifier=DecisionTreeClassifier()
        #默认的合并分类器利用的是决策树，没有参数
    
    def fit(self,X,y):
        for clf in self.classifiers:
            clf.fit(X,y)
        
        self.meta_classifier.fit(self._get_meta_features(X),y)
        return self
    
    def _get_meta_features(self,X):
        probas=np.asarray([clf.predict_proba(X) for clf in self.classifiers])
        return np.concatenate(probas,axis=1)
    
    def predict(self,X):
        return self.meta_classifier.predict(self._get_meta_features(X))
    
    def predict_proba(self,X):
        return self.meta_classifier.predict_proba(self._get_meta_features(X))
        

In [231]:
X,y=make_classification(
    n_samples=1000,n_features=50,n_informative=30,n_clusters_per_class=3,random_state=11)
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=11)

In [232]:
lr=LogisticRegression()
lr.fit(X_train,y_train)
predictions=clf.predict(X_test)
print(lr.score(X_test,y_test))

0.816


In [233]:
knn_clf=KNeighborsClassifier()
knn_clf.fit(X_train,y_train)
predictions=lr.predict(X_test)
print(knn_clf.score(X_test,y_test))

0.836


In [234]:
lrr=GaussianNB()
lrr.fit(X_train,y_train)
predictions=lrr.predict(X_test)
print(classification_report(y_test,predictions))

             precision    recall  f1-score   support

          0       0.84      0.82      0.83       131
          1       0.80      0.82      0.81       119

avg / total       0.82      0.82      0.82       250



In [235]:
Dct_clf=DecisionTreeClassifier(random_state=11)
Dct_clf.fit(X_train,y_train)
predictions=Dct_clf.predict(X_test)
print(classification_report(y_test,predictions))

             precision    recall  f1-score   support

          0       0.70      0.72      0.71       131
          1       0.68      0.66      0.67       119

avg / total       0.69      0.69      0.69       250



In [236]:
base_classifiers=[lr,knn_clf]
stacking_clf=StackingClassifier(base_classifiers,Dct_clf)
stacking_clf.fit(X_train,y_train)
predictions=stacking_clf.predict(X_test)
print(stacking_clf.score(X_test,y_test))

0.852


In [237]:
#利用mlxtend 进行stacking
clf1 = KNeighborsClassifier(n_neighbors=1)
clf2 = RandomForestClassifier(random_state=1)
clf3 = GaussianNB()
lr = LogisticRegression()
sclf = StackingClassifier(classifiers=[clf1, clf2, clf3], meta_classifier=lr)
print('3-fold cross validation:\n')
for clf, label in zip([clf1, clf2, clf3, sclf], 
                      ['KNN', 
                       'Random Forest', 
                       'Naive Bayes',
                       'StackingClassifier']):
    scores = model_selection.cross_val_score(clf, X, y,cv=3, scoring='accuracy')
 
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))
sclf.fit(X_train,y_train)
predictions=sclf.predict(X_test)
print(classification_report(y_test,predictions))

3-fold cross validation:

Accuracy: 0.82 (+/- 0.01) [KNN]
Accuracy: 0.74 (+/- 0.01) [Random Forest]
Accuracy: 0.79 (+/- 0.02) [Naive Bayes]
Accuracy: 0.82 (+/- 0.01) [StackingClassifier]
             precision    recall  f1-score   support

          0       0.84      0.81      0.82       131
          1       0.80      0.83      0.81       119

avg / total       0.82      0.82      0.82       250

