In [13]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from skopt import BayesSearchCV
from skopt.space import Categorical, Real
from tpot import TPOTClassifier
from sklearn.svm import SVC
import pandas as pd

In [2]:
# Load the data set
cancer = load_breast_cancer()

In [3]:
# Split the data into training and testing sets
X = cancer.data
y = cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

# The logistic regression model
lr = LogisticRegression(solver = 'liblinear', max_iter = 1000)

In [4]:
# C is the inverse of regularization strength. Smaller C will result in stronger regularization.
search_spaces = {'penalty': Categorical(['l1', 'l2']), 'C': Real(0.01, 100, prior='uniform')}

In [5]:
# Create a BayesSearchCV model
clf = BayesSearchCV(lr, search_spaces=search_spaces, n_iter=10)

# Fit the BayesSearchCV model
clf.fit(X_train, y_train)

# Show which hyperparameters performed the best
print(clf.best_estimator_)

LogisticRegression(C=17.474316117954075, max_iter=1000, penalty='l1',
                   solver='liblinear')


In [6]:
# Print the the parameters and mean test score
print(clf.cv_results_['params'])
print(clf.cv_results_['mean_test_score'])


[OrderedDict([('C', 32.37502499624655), ('penalty', 'l2')]), OrderedDict([('C', 17.474316117954075), ('penalty', 'l1')]), OrderedDict([('C', 1.87711566720154), ('penalty', 'l1')]), OrderedDict([('C', 49.99715956440307), ('penalty', 'l2')]), OrderedDict([('C', 11.542539977726387), ('penalty', 'l1')]), OrderedDict([('C', 90.40228761439442), ('penalty', 'l2')]), OrderedDict([('C', 86.48038599998013), ('penalty', 'l1')]), OrderedDict([('C', 99.17712875297852), ('penalty', 'l1')]), OrderedDict([('C', 64.05624598587704), ('penalty', 'l1')]), OrderedDict([('C', 99.10884148406537), ('penalty', 'l2')])]
[0.96949384 0.97181943 0.95543092 0.96949384 0.97181943 0.97181943
 0.96711354 0.96711354 0.9647606  0.97181943]


In [7]:
# Create and print Pandas DataFrame
cv_table = pd.concat([pd.DataFrame(clf.cv_results_['params']), pd.DataFrame(clf.cv_results_['mean_test_score'], columns=['Accuracy'])], axis=1)
 
print(cv_table.sort_values('Accuracy', ascending = False))

           C penalty  Accuracy
1  17.474316      l1  0.971819
4  11.542540      l1  0.971819
5  90.402288      l2  0.971819
9  99.108841      l2  0.971819
0  32.375025      l2  0.969494
3  49.997160      l2  0.969494
6  86.480386      l1  0.967114
7  99.177129      l1  0.967114
8  64.056246      l1  0.964761
2   1.877116      l1  0.955431


In [8]:
# Assess the model's accuracy on the testing data
acc = clf.score(X_test, y_test)
print(acc)

0.9370629370629371


In [14]:
# Create a SVC model
svm = SVC()

In [16]:
from sklearn.model_selection import GridSearchCV
# Dictionary of parameters for GridSearchCV
parameters = {'kernel': ['linear', 'rbf', 'sigmoid'], 'C': [1, 10, 100]}

# Create a GridSearchCV model
grid = GridSearchCV(svm, parameters)

# Fit the GridSearchCV model to the training data
grid.fit(X_train, y_train)

# Print the model and hyperparameters obtained by GridSearchCV
print(grid.best_estimator_)

SVC(C=1, kernel='linear')


In [17]:
# Print a table summarizing the results of GridSearchCV
df = pd.concat([pd.DataFrame(grid.cv_results_['params']), pd.DataFrame(grid.cv_results_['mean_test_score'], columns=['Score'])], axis=1)
cv_table = df.pivot(index='kernel', columns='C')
print(cv_table)

            Score                    
C             1         10        100
kernel                               
linear   0.969494  0.967168  0.962462
rbf      0.927360  0.929658  0.946129
sigmoid  0.513844  0.422134  0.419808


In [18]:
# Print the accuracy of the final model on the test data
print(grid.score(X_test, y_test))

0.9230769230769231


In [19]:
# Dictionary of parameters for BayesSearchCV
search_spaces = {'kernel': Categorical(['linear', 'rbf', 'sigmoid']), 'C': Real(1, 100, prior='uniform')}

# Create a BayesSearchCV model
bayes = BayesSearchCV(svm, search_spaces, n_iter=10)

# Fit the BayesSearchCV model to the training data
bayes.fit(X_train, y_train)

# Print the model and hyperparameters obtained by BayesSearchCV
print(bayes.best_estimator_)


SVC(C=12.441136032413821, kernel='linear')


In [20]:
# Print the accuracy of the final model on the test data
print(bayes.score(X_test, y_test))

0.916083916083916


In [21]:
# Create a TPOTClassifier model
tpot = TPOTClassifier(generations=2, population_size=5, verbosity=2)

In [22]:
#fit and print accuracy
tpot.fit(X_train,y_train)
print(tpot.score(X_test,y_test))

                                                                           
Generation 1 - Current best internal CV score: 0.97890560875513
                                                                            
Generation 2 - Current best internal CV score: 0.97890560875513
                                                                            
Best pipeline: LinearSVC(input_matrix, C=15.0, dual=False, loss=squared_hinge, penalty=l1, tol=0.001)
0.9440559440559441
