In [8]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt
from sklearn.calibration import LabelEncoder

In [9]:
bcancer = pd.read_csv('BreastCancer.csv', index_col=0)
lbl = LabelEncoder()
bcancer['Class'] = lbl.fit_transform(bcancer['Class'])

X = bcancer.drop('Class', axis=1)
y = bcancer['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=23, stratify=y)

In [14]:

from sklearn.metrics import accuracy_score
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC


svm = SVC(probability=True, random_state=23)
lr = LogisticRegression()
nb = GaussianNB()
voting = VotingClassifier(estimators=[('SVM',svm),('LR',lr),('NB',nb)], voting='soft')
voting.fit(X_train, y_train)
y_pred_prob = voting.predict_proba(X_test)[:,1]
print(log_loss(y_test, y_pred_prob))
y_pred = voting.predict(X_test)
print(accuracy_score(y_test, y_pred))


0.1377657178736647
0.9571428571428572


Grid Search CV

In [None]:
print(voting.get_params)

In [17]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=23)
params = {'SVM__gamma':['scale','auto'],'SVM__C':np.linspace(0.001,5,5), 'LR__penalty':['l1','l2','elastic',None], 'NB__var_smoothing':np.linspace(0.0001,0.999,5)}
gcv = GridSearchCV(voting, param_grid=params, cv=kfold,verbose=3, scoring='neg_log_loss', n_jobs=8)
gcv.fit(X,y)
print(gcv.best_params_)
print(gcv.best_score_)

Fitting 5 folds for each of 200 candidates, totalling 1000 fits
[CV 1/5] END LR__penalty=l1, NB__var_smoothing=0.0001, SVM__C=0.001, SVM__gamma=scale;, score=nan total time=   0.1s
[CV 4/5] END LR__penalty=l1, NB__var_smoothing=0.0001, SVM__C=0.001, SVM__gamma=scale;, score=nan total time=   0.1s
[CV 2/5] END LR__penalty=l1, NB__var_smoothing=0.0001, SVM__C=0.001, SVM__gamma=scale;, score=nan total time=   0.2s
[CV 2/5] END LR__penalty=l1, NB__var_smoothing=0.0001, SVM__C=0.001, SVM__gamma=auto;, score=nan total time=   0.1s
[CV 5/5] END LR__penalty=l1, NB__var_smoothing=0.0001, SVM__C=0.001, SVM__gamma=scale;, score=nan total time=   0.2s
[CV 3/5] END LR__penalty=l1, NB__var_smoothing=0.0001, SVM__C=0.001, SVM__gamma=auto;, score=nan total time=   0.2s
[CV 1/5] END LR__penalty=l1, NB__var_smoothing=0.0001, SVM__C=1.2507499999999998, SVM__gamma=scale;, score=nan total time=   0.0s
[CV 2/5] END LR__penalty=l1, NB__var_smoothing=0.0001, SVM__C=1.2507499999999998, SVM__gamma=scale;, score

500 fits failed out of a total of 1000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
250 fits failed with the following error:
Traceback (most recent call last):
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/ensemble/_voting.py", line 349, in fit
    return super().fit(X, transformed_y, sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

{'LR__penalty': 'l2', 'NB__var_smoothing': 0.749275, 'SVM__C': 0.001, 'SVM__gamma': 'auto'}
0.9713463514902365
