In [7]:
from sklearn import svm
from sklearn.metrics import accuracy_score
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, ParameterGrid
import pandas as pd

**Get acess to the data from google drive:**

In [8]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


**Defining a function that grayscale, resize and flattens the image:**

In [9]:
def convert_sample(image):
    image = tf.image.rgb_to_grayscale(image)
    image = tf.image.resize(image,[32,32]).numpy()
    image = image.reshape(1,-1)
    return image

**Create X, y and Xtest - the function convert_sample is used:**

In [10]:
X = np.load('/content/gdrive/My Drive/AML_data/Xtrain.npy')
X = np.vstack(list(map(convert_sample,X)))
X = StandardScaler(with_mean=0, with_std=1).fit_transform(X)
print(f'Shape of training data features (observations,features): {X.shape}')

y = np.load('/content/gdrive/My Drive/AML_data//ytrain.npy')
y = y.reshape(-1,)
print(f'Shape of training data labels (observations,): {y.shape}')

Xtest = np.load('/content/gdrive/My Drive/AML_data//Xtest.npy')
Xtest = np.vstack(list(map(convert_sample,Xtest)))
Xtest = StandardScaler(with_mean=0, with_std=1).fit_transform(Xtest)
print(f'Shape of training data features (observations,features): {Xtest.shape}')



Shape of training data features (observations,features): (26214, 1024)
Shape of training data labels (observations,): (26214,)
Shape of training data features (observations,features): (1638, 1024)




In [12]:
X_train, X_val, y_train, y_val = X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

**SVM:**

Radial Basis Function (RBF) Kernel:

Often just referred to as the ’radial’ or
’Gaussian’ kernel, this is like projecting the data into an infinite-dimensional space.

We loop over several values for C and choose the best hyperparameter - C.

C large: More strict model (allows few misclassifications)

C low: Less strict (allows many misclassifications)

In [13]:
param_grid = ParameterGrid({
    'C': [0.1, 1, 10, 100], })

score_ = 0
params_ = None
for params in param_grid:
  svm_rbf = svm.SVC(kernel='rbf', C=params['C'])
  svm_rbf.fit(X_train, y_train)
  y_test_hat_rbf = svm_rbf.predict(X_val)
  accuracy_rbf = accuracy_score(y_test_hat_rbf, y_val)
  if accuracy_rbf > score_:
    score_ = accuracy_rbf
    params_ = params

In [14]:
print(f"For SVM - using radial and the best hyperparameter C: {params}, the accuracy of the model is: {accuracy_rbf}")

For SVM - using radial and the best hyperparameter C: {'C': 100}, the accuracy of the model is: 0.7469006294106427
