# mlxtend库
## 使用前面分类器产生的特征输出作为最后总的meta-classifier的输入数据

In [1]:
from sklearn import datasets
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.naive_bayes import GaussianNB 
from sklearn.ensemble import RandomForestClassifier 
from mlxtend.classifier import StackingClassifier 
import warnings
warnings.filterwarnings("ignore")
import numpy as np 

In [2]:
iris=datasets.load_iris() 
X,y=iris.data[:,1:3],iris.target

clf1=KNeighborsClassifier(n_neighbors=1)
clf2=RandomForestClassifier(random_state=1)
clf3=GaussianNB()
lr=LogisticRegression()
sclf=StackingClassifier(classifiers=[clf1,clf2,clf3],meta_classifier=lr)
print('3-fold cross validation:\n')
for clf,label in zip([clf1,clf2,clf3,sclf],['KNN','Random Forest','Naive Bayes','SatckingClassifier ']):
    scores=model_selection.cross_val_score(clf,X,y,cv=3,scoring='accuracy')
    print("Accuracy :%0.2f (+?- %0.2f ) [%s]" % (scores.mean(),scores.std(),label))

3-fold cross validation:

Accuracy :0.91 (+?- 0.01 ) [KNN]
Accuracy :0.93 (+?- 0.05 ) [Random Forest]
Accuracy :0.92 (+?- 0.03 ) [Naive Bayes]
Accuracy :0.95 (+?- 0.03 ) [SatckingClassifier ]


## 使用第一层基本分类器产生的类别概率值作为meta-classfier的输入，这种情况下需要将StackingClassifier的参数设置为 use_probas=True。如果将参数设置为 average_probas=True，那么这些基分类器对每一个类别产生的概率值会被平均，否则会拼接


In [3]:
iris=datasets.load_iris() 
X,y=iris.data[:,1:3],iris.target

clf1=KNeighborsClassifier(n_neighbors=1)
clf2=RandomForestClassifier(random_state=1)
clf3=GaussianNB()
lr=LogisticRegression()
sclf=StackingClassifier(classifiers=[clf1,clf2,clf3],use_probas=True,average_probas=False,meta_classifier=lr)
print('3-fold cross validation:\n')
for clf,label in zip([clf1,clf2,clf3,sclf],['KNN','Random Forest','Naive Bayes','SatckingClassifier ']):
    scores=model_selection.cross_val_score(clf,X,y,cv=3,scoring='accuracy')
    print("Accuracy :%0.2f (+?- %0.2f ) [%s]" % (scores.mean(),scores.std(),label))

3-fold cross validation:

Accuracy :0.91 (+?- 0.01 ) [KNN]
Accuracy :0.93 (+?- 0.05 ) [Random Forest]
Accuracy :0.92 (+?- 0.03 ) [Naive Bayes]
Accuracy :0.94 (+?- 0.03 ) [SatckingClassifier ]


## 对训练基中的特征维度进行操作的，这次不是给每一个基分类器全部的特征，而是给不同的基分类器分不同的特征，即比如基分类器1训练前半部分特征，基分类器2训练后半部分特征（可以通过sklearn 的pipelines 实现）。最终通过StackingClassifier组合起来

In [4]:
from sklearn.datasets import load_iris 
from mlxtend.classifier import StackingClassifier 
from mlxtend.feature_selection import ColumnSelector
from sklearn.pipeline import make_pipeline 
from sklearn.linear_model import LogisticRegression  
iris=load_iris()
X=iris.data 
y=iris.target  



In [5]:
pipe1=make_pipeline(ColumnSelector(cols=(0,2)),LogisticRegression())
pipe2=make_pipeline(ColumnSelector(cols=(1,2,3)),LogisticRegression())
sclf=StackingClassifier(classifiers=[pipe1,pipe2],meta_classifier=LogisticRegression())
sclf.fit(X,y)

StackingClassifier(average_probas=False,
          classifiers=[Pipeline(memory=None,
     steps=[('columnselector', ColumnSelector(cols=(0, 2), drop_axis=False)), ('logisticregression', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None,...nalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False))])],
          drop_last_proba=False,
          meta_classifier=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False),
          store_train_meta_features=False, use_clones=True,
          use_features_in_secondary=False, use_probas=False, verbose=0)