In [2]:
# Helper classes and data
from sklearn import datasets
from sklearn.model_selection import train_test_split as tts
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import VotingClassifier

In [3]:
Iris_data = datasets.load_iris()

In [11]:
X, y = Iris_data.data, Iris_data.target
print(X.shape)

(150, 4)


In [24]:
lenc  =LabelEncoder()
y = lenc.fit_transform(y)

In [25]:
X_train,X_test,y_train,y_test = tts(X,y,stratify = y,test_size=0.3,random_state=1)

In [26]:
X_train.shape

(105, 4)

In [61]:
# Classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
# Pipeline and validation
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
# numpy
import numpy as np
# Since it is a multiclass classification let's imports binerizerabs
from sklearn.preprocessing import label_binarize
from sklearn.metrics import make_scorer
from sklearn.metrics import roc_auc_score
y_train_bin = label_binarize(y_train,classes = [0,1,2]) 
multi_roc = make_scorer(score_func=roc_auc_score)

In [51]:
#We have 4 classifiers here
clf1 = LogisticRegression(penalty='l2',random_state=1,C = 0.001,solver='lbfgs')
clf2 = DecisionTreeClassifier(random_state=1,max_depth=None,criterion='gini')
clf3 = KNeighborsClassifier(n_neighbors=3,p=2,metric='minkowski')
clf4 = SVC(kernel='rbf')

In [52]:
#Pipelines
Pipe_Log = Pipeline([
    ['sc',StandardScaler()],
    ['Log_Reg',clf1]
])
Pipe_svc = Pipeline([
    ['sc',StandardScaler()],
    ['svc',clf4]
])
Pipe_Knn = Pipeline([
    ['sc',StandardScaler()],
    ['knn',clf3]
])

In [58]:
clf_labels = ['Logistic Regression','Decision Tree','Knn','SVM']
print(X_train.shape,y_train_bin.shape)

(105, 4) (105, 3)


In [103]:
scores_list = []
for clf, label in zip([Pipe_Log,clf2,Pipe_Knn,Pipe_svc],clf_labels):
    clf_auc =[]
    for i in range(3):
        scores = cross_val_score(estimator=clf,X=X_train,y=y_train_bin[:,i],scoring='roc_auc',cv=10)
        
        clf_auc.append((np.mean(scores,axis=0),np.std(scores,axis=0)))
    scores_list.append(np.array(clf_auc))

In [107]:
mean_std =[]
for scr in scores_list:
    mean_std.append(np.mean(scr,axis=0))

In [109]:
for label , values in zip(clf_labels,mean_std):
    print('AUC_ROC of {} : {} +/- {}'.format(label,values[0],values[1]))

AUC_ROC of Logistic Regression : 0.8964285714285714 +/- 0.07472594728488739
AUC_ROC of Decision Tree : 0.9462301587301588 +/- 0.04926907133014047
AUC_ROC of Knn : 0.9880952380952381 +/- 0.02722959229377937
AUC_ROC of SVM : 0.9928571428571429 +/- 0.013804600760498102


In [128]:
Voting_clf = VotingClassifier(estimators=[('Pipe_Log',Pipe_Log),('clf2',clf2),
                                          ('Pipe_Knn',Pipe_Knn),('Pipe_svc',Pipe_svc)],
                                          voting='hard'
                                         )

In [135]:
y_pred = Voting_clf.predict(X_train)
y_pred_bin = label_binarize(y_pred,classes=[0,1,2])

In [137]:
voting_auc = []
for i in range(3):
    score = roc_auc_score(y_train_bin[:,i],y_pred_bin[:,i])
    voting_auc.append(score)

In [140]:
print(np.mean(voting_auc))

0.9785714285714286
