In [15]:
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
 
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score, GridSearchCV
from sklearn.tree import plot_tree
from sklearn.metrics import roc_auc_score,accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC


In [2]:
cancer = pd.read_csv('BreastCancer.csv')
X = cancer.drop('Class',axis=1)
y = cancer['Class']

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=24, test_size=0.3,stratify=y)


In [16]:
knn = KNeighborsClassifier()
nb = GaussianNB()
dt = DecisionTreeClassifier(random_state=24)
svm = SVC(probability=True,random_state=24)
rf= RandomForestClassifier(random_state=24)

stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('DT',dt),('SVM',svm)]  , final_estimator = rf)

In [17]:
stack.fit(X_train,y_train)
y_pred = stack.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.9571428571428572


In [18]:
y_pred_proba = stack.predict_proba(X_test)[:,1]
print(roc_auc_score(y_test,y_pred_proba))

0.9683977455716587


In [21]:
stack.get_params()

{'cv': None,
 'estimators': [('KNN', KNeighborsClassifier()),
  ('NB', GaussianNB()),
  ('DT', DecisionTreeClassifier(random_state=24)),
  ('SVM', SVC(probability=True, random_state=24))],
 'final_estimator__bootstrap': True,
 'final_estimator__ccp_alpha': 0.0,
 'final_estimator__class_weight': None,
 'final_estimator__criterion': 'gini',
 'final_estimator__max_depth': None,
 'final_estimator__max_features': 'sqrt',
 'final_estimator__max_leaf_nodes': None,
 'final_estimator__max_samples': None,
 'final_estimator__min_impurity_decrease': 0.0,
 'final_estimator__min_samples_leaf': 1,
 'final_estimator__min_samples_split': 2,
 'final_estimator__min_weight_fraction_leaf': 0.0,
 'final_estimator__monotonic_cst': None,
 'final_estimator__n_estimators': 100,
 'final_estimator__n_jobs': None,
 'final_estimator__oob_score': False,
 'final_estimator__random_state': 24,
 'final_estimator__verbose': 0,
 'final_estimator__warm_start': False,
 'final_estimator': RandomForestClassifier(random_state=

In [25]:
params= { 'final_estimator__max_depth': [3,5],
        'SVM__C':np.linspace(0.0001,3,5),
        'final_estimator__n_estimators':[10],
         'DT__max_depth':[None,2],
        'passthrough':[True,False]}

gcv = GridSearchCV(stack,param_grid=params,scoring='roc_auc',verbose=3)

In [26]:
gcv.fit(X,y)

Fitting 5 folds for each of 40 candidates, totalling 200 fits
[CV 1/5] END DT__max_depth=None, SVM__C=0.0001, final_estimator__max_depth=3, final_estimator__n_estimators=10, passthrough=True;, score=0.985 total time=   0.3s
[CV 2/5] END DT__max_depth=None, SVM__C=0.0001, final_estimator__max_depth=3, final_estimator__n_estimators=10, passthrough=True;, score=0.980 total time=   0.3s
[CV 3/5] END DT__max_depth=None, SVM__C=0.0001, final_estimator__max_depth=3, final_estimator__n_estimators=10, passthrough=True;, score=0.999 total time=   0.3s
[CV 4/5] END DT__max_depth=None, SVM__C=0.0001, final_estimator__max_depth=3, final_estimator__n_estimators=10, passthrough=True;, score=0.976 total time=   0.3s
[CV 5/5] END DT__max_depth=None, SVM__C=0.0001, final_estimator__max_depth=3, final_estimator__n_estimators=10, passthrough=True;, score=0.996 total time=   0.3s
[CV 1/5] END DT__max_depth=None, SVM__C=0.0001, final_estimator__max_depth=3, final_estimator__n_estimators=10, passthrough=Fals

In [28]:
print(gcv.best_params_)
print(gcv.best_score_)

{'DT__max_depth': None, 'SVM__C': 0.0001, 'final_estimator__max_depth': 5, 'final_estimator__n_estimators': 10, 'passthrough': True}
0.9906635659519422


Serializing

In [29]:
import pickle

In [31]:
best_stack = gcv.best_estimator_

In [32]:
pkfile = open('stacking_breast_cancer.pkl','wb')

In [33]:
pickle.dump(best_stack,pkfile)