In [1]:
# Helper classes and data
from sklearn import datasets
from sklearn.model_selection import train_test_split as tts
from sklearn.preprocessing import StandardScaler
#from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import VotingClassifier

In [2]:
Iris_data = datasets.load_iris()

In [3]:
X, y = Iris_data.data, Iris_data.target
print(X.shape)

(150, 4)


In [4]:
X_train,X_test,y_train,y_test = tts(X,y,stratify = y,test_size=0.5,random_state=1)

In [5]:
X_train.shape

(75, 4)

In [6]:
# Classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
# Pipeline and validation
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
# numpy
import numpy as np
# Since it is a multiclass classification let's imports binerizerabs
from sklearn.preprocessing import label_binarize
from sklearn.metrics import make_scorer
from sklearn.metrics import roc_auc_score
y_train_bin = label_binarize(y_train,classes = [0,1,2]) 
multi_roc = make_scorer(score_func=roc_auc_score)

In [45]:
#We have 4 classifiers here
clf1 = LogisticRegression(penalty='l2',random_state=1,C = 0.001,solver='lbfgs',multi_class='auto')
clf2 = DecisionTreeClassifier(random_state=1,max_depth=None,criterion='gini')
clf3 = KNeighborsClassifier(n_neighbors=3,p=2,metric='minkowski')
clf4 = SVC(kernel='rbf',probability=True)

In [46]:
#Pipelines
Pipe_Log = Pipeline([
    ['sc',StandardScaler()],
    ['Log_Reg',clf1]
])
Pipe_svc = Pipeline([
    ['sc',StandardScaler()],
    ['svc',clf4]
])
Pipe_Knn = Pipeline([
    ['sc',StandardScaler()],
    ['knn',clf3]
])

In [47]:
clf_labels = ['Logistic Regression','Decision Tree','Knn','SVM']
print(X_train.shape,y_train_bin.shape)

(75, 4) (75, 3)


In [10]:
y_preds_list = {}
from sklearn.model_selection import cross_val_predict
for clf, label in zip([Pipe_Log,clf2,Pipe_Knn,Pipe_svc],clf_labels):
    predictions = cross_val_predict(estimator=clf,X=X_train,y=y_train,cv=10)        
    y_preds_list['{}'.format(label)] = predictions

In [48]:
Bin_lab_dict ={}
for key,values in y_preds_list.items():
    Bin_lab_dict[key] = label_binarize(values,classes=[0,1,2])

In [49]:
# Binarized labels to get the roc_auc values
ROC_AUC ={}
for key in Bin_lab_dict.keys():
    label_scores = []
    y_cal = Bin_lab_dict[key]
    for i in range(3):
        auc_score = roc_auc_score(y_train_bin[:,i],y_cal[:,i])
        label_scores.append(auc_score)
    ROC_AUC[key] = label_scores

In [50]:
for label , values in ROC_AUC.items():
    print('AUC_ROC of {} : {} +/- {}'.format(label,np.mean(values),np.std(values)))

AUC_ROC of Logistic Regression : 0.88 +/- 0.09416297927883689
AUC_ROC of Decision Tree : 0.93 +/- 0.04966554808583782
AUC_ROC of Knn : 0.94 +/- 0.04242640687119289
AUC_ROC of SVM : 0.93 +/- 0.04082482904638629
AUC_ROC of VotingClassifier(estimators=[('Pipe_Log',
                              Pipeline(memory=None,
                                       steps=[['sc',
                                               StandardScaler(copy=True,
                                                              with_mean=True,
                                                              with_std=True)],
                                              ['Log_Reg',
                                               LogisticRegression(C=0.001,
                                                                  class_weight=None,
                                                                  dual=False,
                                                                  fit_intercept=True,
                    

In [51]:
Voting_clf = VotingClassifier(estimators=[('Pipe_Log',Pipe_Log),('clf2',clf2),
                                          ('Pipe_Knn',Pipe_Knn),('Pipe_svc',Pipe_svc)],
                                          voting='soft'
                                         )

In [52]:
predictions = cross_val_predict(estimator=Voting_clf,X=X_train,y=y_train,cv=10)        
y_preds_list['voting_clf'] = predictions

In [53]:
Bin_lab_dict['voting_clf'] = label_binarize(y_preds_list['voting_clf'],classes=[0,1,2])

In [54]:
label_scores = []
y_cal = Bin_lab_dict['voting_clf']
for i in range(3):
    auc_score = roc_auc_score(y_train_bin[:,i],y_cal[:,i])
    label_scores.append(auc_score)
ROC_AUC['voting_clf'] = label_scores

In [55]:
for label , values in ROC_AUC.items():
    print('AUC_ROC of {} : {} +/- {}'.format(label,np.mean(values),np.std(values)))

AUC_ROC of Logistic Regression : 0.88 +/- 0.09416297927883689
AUC_ROC of Decision Tree : 0.93 +/- 0.04966554808583782
AUC_ROC of Knn : 0.94 +/- 0.04242640687119289
AUC_ROC of SVM : 0.93 +/- 0.04082482904638629
AUC_ROC of VotingClassifier(estimators=[('Pipe_Log',
                              Pipeline(memory=None,
                                       steps=[['sc',
                                               StandardScaler(copy=True,
                                                              with_mean=True,
                                                              with_std=True)],
                                              ['Log_Reg',
                                               LogisticRegression(C=0.001,
                                                                  class_weight=None,
                                                                  dual=False,
                                                                  fit_intercept=True,
                    

In [34]:
Voting_clf.get_params

<bound method _BaseVoting.get_params of VotingClassifier(estimators=[('Pipe_Log',
                              Pipeline(memory=None,
                                       steps=[['sc',
                                               StandardScaler(copy=True,
                                                              with_mean=True,
                                                              with_std=True)],
                                              ['Log_Reg',
                                               LogisticRegression(C=0.001,
                                                                  class_weight=None,
                                                                  dual=False,
                                                                  fit_intercept=True,
                                                                  intercept_scaling=1,
                                                                  l1_ratio=None,
                                 

In [57]:
Voting_clf.fit(X_train,y_train)

VotingClassifier(estimators=[('Pipe_Log',
                              Pipeline(memory=None,
                                       steps=[['sc',
                                               StandardScaler(copy=True,
                                                              with_mean=True,
                                                              with_std=True)],
                                              ['Log_Reg',
                                               LogisticRegression(C=0.001,
                                                                  class_weight=None,
                                                                  dual=False,
                                                                  fit_intercept=True,
                                                                  intercept_scaling=1,
                                                                  l1_ratio=None,
                                                                  max_ite

In [58]:
y_pred_proba = Voting_clf.predict_proba(X_train)

In [72]:
roc_auc_score?

In [None]:
! conda update scikit-learn