In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [2]:
# load the data amnd split it into train , test
data = load_breast_cancer()
x = data.data
y = data.target

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state = 42)

In [3]:
# Set parameters by cross-validation
tuned_parameters = [{'kernel': ['rbf', 'linear'], 'gamma':[1e-3, 1e-4], 'C':[1,10,100,1000]}]
# Kernel transform the training set of the data
# gamma defines how far the influence of a single training example reaches, low values ==> Far , high values ==> close
# C is a regularisation parameter and must be strictlu positive

In [7]:
scores =['precision','recall']

In [8]:
def grid_search (X_train, y_train, x_test, y_test, tunes_params, metric="precision"):
    print("# Tuning hyper-parameters for %s" % metric)
    clf = GridSearchCV(SVC(), tuned_parameters, scoring='%s_macro'% metric)
    clf.fit(x_train, y_train)
    print("Best params found")
    print(clf.best_params_)
    print("Grid scores on development set : ")
    print(clf.best_estimator_)
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
             %(mean, std * 2, params))
    print("Classification report")
    y_true, y_pred = y_test, clf.predict(x_test)
    print(classification_report(y_true, y_pred))

In [9]:
# Grid search using only the precision
import warnings
warnings.filterwarnings("ignore")

grid_search(x_train, y_train, x_test, y_test, tuned_parameters, scores[0])

# Tuning hyper-parameters for precision
Best params found
{'C': 100, 'gamma': 0.001, 'kernel': 'linear'}
Grid scores on development set : 
SVC(C=100, gamma=0.001, kernel='linear')

0.902 (+/-0.045) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.957 (+/-0.052) for {'C': 1, 'gamma': 0.001, 'kernel': 'linear'}
0.933 (+/-0.046) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.957 (+/-0.052) for {'C': 1, 'gamma': 0.0001, 'kernel': 'linear'}
0.897 (+/-0.037) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.955 (+/-0.038) for {'C': 10, 'gamma': 0.001, 'kernel': 'linear'}
0.954 (+/-0.041) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.955 (+/-0.038) for {'C': 10, 'gamma': 0.0001, 'kernel': 'linear'}
0.897 (+/-0.037) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.972 (+/-0.053) for {'C': 100, 'gamma': 0.001, 'kernel': 'linear'}
0.942 (+/-0.031) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.972 (+/-0.053) for {'C': 100, 'gamma': 0.0001, 'kernel': 'linear'}
0.897 (+/-0.037) for {'C': 

### Grid scores on development set : 
###         SVC(C=100, gamma=0.001, kernel='linear')

In [10]:
# Grid search using only the recall
import warnings
warnings.filterwarnings("ignore")

grid_search(x_train, y_train, x_test, y_test, tuned_parameters, scores[1])

# Tuning hyper-parameters for recall
Best params found
{'C': 100, 'gamma': 0.001, 'kernel': 'linear'}
Grid scores on development set : 
SVC(C=100, gamma=0.001, kernel='linear')

0.910 (+/-0.060) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.949 (+/-0.055) for {'C': 1, 'gamma': 0.001, 'kernel': 'linear'}
0.922 (+/-0.070) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.949 (+/-0.055) for {'C': 1, 'gamma': 0.0001, 'kernel': 'linear'}
0.904 (+/-0.052) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.948 (+/-0.051) for {'C': 10, 'gamma': 0.001, 'kernel': 'linear'}
0.938 (+/-0.071) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.948 (+/-0.051) for {'C': 10, 'gamma': 0.0001, 'kernel': 'linear'}
0.904 (+/-0.052) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.968 (+/-0.062) for {'C': 100, 'gamma': 0.001, 'kernel': 'linear'}
0.931 (+/-0.041) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.968 (+/-0.062) for {'C': 100, 'gamma': 0.0001, 'kernel': 'linear'}
0.904 (+/-0.052) for {'C': 100

### Grid scores on development set : 
### SVC(C=100, gamma=0.001, kernel='linear')
