# Homework 6
You need to classify digits with SVM models in this homework. Use PCA to reduce data dimensionality (80% information remain). Compare performance of linear, radial, and polynomial kernals. Tune kernal parameters using Randomsearch method.

In [10]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, svm, metrics
from sklearn.decomposition import PCA
from sklearn.model_selection import RandomizedSearchCV, cross_val_score, train_test_split
from scipy import stats

# use seaborn plotting defaults
import seaborn as sns; sns.set()
from sklearn import datasets, svm, metrics

# The digits dataset
digits = datasets.load_digits()
X, y = digits.data, digits.target

pca = PCA(.80)
X_reduced = pca.fit_transform(X)
print(f"Original dimensions: {X.shape}")
print(f"Reduced dimensions: {X_reduced.shape}")

X_train, X_test, y_train, y_test = train_test_split(X_reduced, y, test_size=0.2, random_state=42)

param_distributions = {
    'C': stats.uniform(0.1, 10),
    'gamma': stats.uniform(.001, 1),
    'degree': [2, 3, 4],
}

kernels = ['linear', 'rbf', 'poly']
best_params = {}
scores = {}

for kernel in kernels:
    print (f"Tuning hyperparameters for {kernel} kernel...")
    if kernel == 'linear':
        param_distributions_local = {'C': param_distributions['C']}
    elif kernel == 'rbf':
        param_distributions_local = {'C': param_distributions['C'], 'gamma': param_distributions['gamma']}
    elif kernel == 'poly':
        param_distributions_local = {'C': param_distributions['C'], 'gamma': param_distributions['gamma'], 'degree': param_distributions['degree']}

    svc = svm.SVC(kernel=kernel)
    search = RandomizedSearchCV(svc, param_distributions=param_distributions_local, n_iter=50, cv=5, random_state=42, n_jobs=-1)
    search.fit(X_train, y_train)

    best_params[kernel] = search.best_params_
    scores[kernel] = cross_val_score(search.best_estimator_, X_reduced, y, cv=5).mean()
    print(f"Best parameters for {kernel} kernel: {search.best_params_}")
    print(f"Mean Cross-val accuracy for {kernel} kernel: {scores[kernel]:.4f}")


print("\nComparison of kernel performances:")
for kernel, score in scores.items():
    print(f"{kernel.capitalize()} kernel: {score:.4f}")

Original dimensions: (1797, 64)
Reduced dimensions: (1797, 13)
Tuning hyperparameters for linear kernel...
Best parameters for linear kernek: {'C': 3.845401188473625}
Mean Cross-val accuracy for linear kernel: 0.9193
Tuning hyperparameters for rbf kernel...
Best parameters for rbf kernek: {'C': 6.0241456886204245, 'gamma': 0.047450412719997725}
Mean Cross-val accuracy for rbf kernel: 0.3557
Tuning hyperparameters for poly kernel...
Best parameters for poly kernek: {'C': 9.799098521619943, 'degree': 3, 'gamma': 0.21333911067827616}
Mean Cross-val accuracy for poly kernel: 0.9516

Comparison of kernel performances:
Linear kernel: 0.9193
Rbf kernel: 0.3557
Poly kernel: 0.9516
