In [1]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import GridSearchCV,cross_val_score
from sklearn import svm
from sklearn.linear_model import LogisticRegression

In [2]:
data=load_digits()

In [3]:
print(dir(data))

['DESCR', 'data', 'images', 'target', 'target_names']


In [4]:
X=data.data
Y=data.target

In [5]:
print(X.shape,Y.shape)

(1797, 64) (1797,)


In [6]:
def custom_kernel(x1,x2):
    return np.square(np.dot(x1,x2))

In [7]:
params=[{
    'kernel':['poly','linear','sigmoid','rbf',custom_kernel],
    'C':[0.1,0.2,0.3,0.5,1.0,2.0,5.0],
    'coef0':[0.5,1.0,2.0,4.0,8.0]
}]

In [8]:
svm=svm.SVC(probability=True)

In [9]:
svm.fit(X,Y)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [10]:
svm.score(X,Y)

0.996661101836394

## Using Logistic Regression

In [11]:
lr=LogisticRegression()

In [12]:
cross_val_score(lr,X,Y,scoring='accuracy',cv=5,n_jobs=-1).mean()

0.9137650882079852

In [13]:
cross_val_score(svm,X,Y,scoring='accuracy',cv=5,n_jobs=-1).mean()

0.9632838130609718

## Using Grid Search

In [14]:
gs=GridSearchCV(svm,param_grid=params,scoring='accuracy',n_jobs=-1)

In [15]:
gs.fit(X,Y)

GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=True, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=-1,
             param_grid=[{'C': [0.1, 0.2, 0.3, 0.5, 1.0, 2.0, 5.0],
                          'coef0': [0.5, 1.0, 2.0, 4.0, 8.0],
                          'kernel': ['poly', 'linear', 'sigmoid', 'rbf',
                                     <function custom_kernel at 0x0000020A10B9B5E8>]}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

In [16]:
gs.best_estimator_

SVC(C=5.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.5,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [17]:
gs.best_score_

0.9738502011761063

In [18]:
gs.classes_

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [19]:
gs.best_params_

{'C': 5.0, 'coef0': 0.5, 'kernel': 'rbf'}

In [20]:
np.around(gs.predict_proba(X),2)

array([[0.99, 0.  , 0.  , ..., 0.  , 0.  , 0.  ],
       [0.  , 1.  , 0.  , ..., 0.  , 0.  , 0.  ],
       [0.  , 0.01, 0.96, ..., 0.  , 0.02, 0.  ],
       ...,
       [0.  , 0.01, 0.  , ..., 0.  , 0.97, 0.  ],
       [0.  , 0.  , 0.  , ..., 0.  , 0.01, 0.98],
       [0.  , 0.  , 0.  , ..., 0.  , 0.97, 0.01]])