In [15]:
from sklearn import datasets
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import loadmat
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import  StratifiedShuffleSplit,GridSearchCV
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

#### ex page 334

In [None]:
# 334
iris = datasets.load_iris()

print(iris.data)
X = iris.data[:,2:4]# we only take the first two features.
print(X)

y = iris.target
print(list(iris.target_names))

print(y)
y[y==1] = 0 #setosa', 'versicolor - > 0
y[y==2] = 1 #virginica
print(y)


In [None]:
def plot_svc_decision_function(model,ax=None,plot_support=True):
    ax = ax or plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    
    x = np.linspace(xlim[0],xlim[1],30)
    y = np.linspace(ylim[0],ylim[1],30)
    X,Y = np.meshgrid(x,y)
    xy = np.column_stack([X.ravel(),Y.ravel()])
    v = model.decision_function(xy).reshape(X.shape)
    print(v)
    ax.contour(X,Y,v,colors='k',
              levels=[-1,0,1],alpha=0.5,
              linestyles=['--','-','--'])
    
    if plot_support:
        ax.scatter(model.support_vectors_[:,0],
                  model.support_vectors_[:,1],
                  s=300,lw=1,edgecolor='black',color='none')
        

In [None]:
fig,ax = plt.subplots(1,2,figsize=(16,6))
                      
for axi,C in zip(ax,[1,200.0]):
    model = SVC(kernel='linear',C=C).fit(X,y)
    axi.scatter(X[:,0],X[:,1],c=y,s=50,cmap='autumn')
    plot_svc_decision_function(model,axi)
    axi.set_title(f'C={C:.1f}',size=14)
    
    axi.set_xlabel('Petal length')
    axi.set_ylabel('Petal width')


        

                 



#### ex page 347

In [20]:
mnist_path = 'mnist-original.mat'

mnist = loadmat(mnist_path)
X = mnist['data'].T
y = mnist['label'][0]
display(X.shape , y.shape)


sss = StratifiedShuffleSplit(n_splits=10, test_size=0.02,train_size=0.05, random_state=0)
train_index, test_index = next(sss.split(X=X, y=y))   

X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
display(X_train.shape,X_test.shape)




(70000, 784)

(70000,)

(3500, 784)

(1400, 784)

In [None]:

clf = Pipeline([('scale',StandardScaler()),
                ('cls',SVC())])

param_grid = {
    'cls__C': [0.001,1,10,100,1000],
    'cls__kernel': ['poly','rbf'],
    'cls__gamma': [0.1,5,10,20],
    'cls__degree': np.arange(1,20,2),
    'cls__coef0': np.arange(1,20,2)
    
}

grid_search = GridSearchCV(clf,param_grid,cv=3)
grid_search.fit(X_train,y_train)

print(grid_search.best_params_)

results = grid_search.cv_results_
for mean_score,params in zip(results['mean_test_score'],results['params']):
    print(mean_score,params)

In [None]:

clf = Pipeline([('scale',StandardScaler()),
                ('cls',SVC(C=1,degree=3,kernel='poly'))])

clf.fit(X_train,y_train)


y_test_pred = cross_val_predict(clf,X_test,y_test,cv=10)
conf_mat = confusion_matrix(y_test,y_test_pred)
conf_mat



In [27]:
accuracy_score(y_test,y_test_pred)

0.8807142857142857