In [2]:
from sklearn.datasets import load_digits

In [3]:
X, y = load_digits(return_X_y=True)

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.4, random_state=42)

In [5]:
y_train = ((y_train == 5) | (y_train == 8))
y_test = ((y_test == 5)| (y_test==8))

In [6]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

In [7]:
pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy="median")),
        ('std_scaler', StandardScaler()),
    ])

X_train_tr = pipeline.fit_transform(X_train)
X_test_tr = pipeline.transform(X_test)

In [8]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_predict

In [9]:
param_grid = [
      {'C': [0.01,0.1,1,10,100], 'kernel': ['linear']},
      {'C': [0.01,0.1,1,10,100], 'gamma': ['auto'], 'kernel': ['rbf']},
      {'C': [0.01,0.1,1,10,100], 'gamma':['auto'], 'kernel':['poly'], 'degree':[2,4,6]},
     ]

In [54]:
my_svc = SVC(gamma='auto')
clf = GridSearchCV(my_svc, param_grid, cv=3, scoring='roc_auc')
clf.fit(X_train_tr, y_train)

GridSearchCV(cv=3, error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid=[{'C': [0.01, 0.1, 1, 10, 100], 'kernel': ['linear']},
                         {'C': [0.01, 0.1, 1, 10, 100], 'gamma': ['auto'],
                          'kernel': ['rbf']},
                         {'C': [0.01, 0.1, 1, 10, 100], 'degree': [2, 4, 6],
                          'gamma': ['auto'], 'kernel': ['poly']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='roc_auc', verbose=0)

In [55]:
y_scores = cross_val_predict(my_svc, X_train_tr, y_train, cv=3)

In [45]:
from sklearn.metrics import auc
from sklearn.metrics import roc_curve

In [46]:
fpr, tpr, thresholds = roc_curve(y_train,y_scores)
print('AUC Score for the train set:', auc(fpr,tpr))

AUC Score for the train set: 0.9262654730327146


In [47]:
search_results = clf.cv_results_
params = search_results['params']
mean_scores = search_results['mean_test_score']
for i in range(len(params)):
    mean_score = mean_scores[i]
    param = params[i]
    print('mean auc_score:', (mean_score), 'hyperparameters: ', param)


mean auc_score: 0.9385359400872052 hyperparameters:  {'C': 0.01, 'kernel': 'linear'}
mean auc_score: 0.9468005395658915 hyperparameters:  {'C': 0.1, 'kernel': 'linear'}
mean auc_score: 0.9433358128780825 hyperparameters:  {'C': 1, 'kernel': 'linear'}
mean auc_score: 0.9415047114661593 hyperparameters:  {'C': 10, 'kernel': 'linear'}
mean auc_score: 0.9423016493343574 hyperparameters:  {'C': 100, 'kernel': 'linear'}
mean auc_score: 0.9755884912036429 hyperparameters:  {'C': 0.01, 'gamma': 'auto', 'kernel': 'rbf'}
mean auc_score: 0.9756881564518561 hyperparameters:  {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
mean auc_score: 0.9902982314832514 hyperparameters:  {'C': 1, 'gamma': 'auto', 'kernel': 'rbf'}
mean auc_score: 0.990955562504992 hyperparameters:  {'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}
mean auc_score: 0.990955562504992 hyperparameters:  {'C': 100, 'gamma': 'auto', 'kernel': 'rbf'}
mean auc_score: 0.9680310851730244 hyperparameters:  {'C': 0.01, 'degree': 2, 'gamma': 'auto', '

In [48]:
best_params = clf.best_params_
print('best hyperparams in search : ', best_params)

best hyperparams in search :  {'C': 100, 'degree': 4, 'gamma': 'auto', 'kernel': 'poly'}


In [49]:
print('best estimator: ', clf.best_estimator_)

best estimator:  SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=4, gamma='auto', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)


In [50]:
final_model = clf.best_estimator_

In [51]:
final_model.fit(X_train_tr, y_train)

SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=4, gamma='auto', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [52]:
y_predicted = final_model.predict(X_test_tr)

In [53]:
fpr, tpr, thresholds = roc_curve(y_test,y_predicted)
print('AUC score for the test test:',auc(fpr,tpr))

AUC score for the test test: 0.9796054811378804
