In [1]:
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import math_ops
from tensorflow.python.keras import backend as K
from tensorboard.plugins.hparams import api as hp

from sklearn import svm
from sklearn.model_selection import GridSearchCV

import matplotlib.pyplot as plt
import numpy as np

In [2]:
tf.__version__

'2.2.0'

In [3]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [4]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))

In [5]:
X_train_normalized, X_test_normalized = X_train / 255.0, X_test / 255.0

## Model (sklearn)

In [6]:
X_train_normalized_reshaped, X_test_normalized_reshaped = X_train_normalized.reshape(X_train.shape[0], -1), X_test_normalized.reshape(X_test.shape[0], -1)

In [7]:
X_train_normalized_reshaped.shape, X_test_normalized_reshaped.shape

((50000, 3072), (10000, 3072))

In [16]:
train_mask = np.random.choice(2, X_train_normalized_reshaped.shape[0], p=[0.9, 0.1])
test_mask = np.random.choice(2, X_test_normalized_reshaped.shape[0], p=[0.9, 0.1])

X_train_normalized_reshaped = X_train_normalized_reshaped[train_mask == 1]
y_train = y_train[train_mask == 1]

X_test_normalized_reshaped = X_test_normalized_reshaped[test_mask == 1]
y_test = y_test[test_mask == 1]

In [17]:
X_train_normalized_reshaped.shape, X_test_normalized_reshaped.shape

((4902, 3072), (944, 3072))

In [8]:
params = [
    {
        "C": [1, 10, 100, 1000, 10000],
        "kernel": ["linear"],
    },
    {
        "C": [1, 10, 100, 1000, 10000],
        "kernel": ["poly"],
        "degree": [3, 5, 7],
    },
    {
        "C": [1, 10, 100, 1000, 10000],
        "kernel": ["rbf"],
        "gamma": ["auto", "scale"]
    },
]

In [18]:
model = svm.SVC()
model.fit(X_train_normalized_reshaped, y_train)

  y = column_or_1d(y, warn=True)


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [19]:
model.evaluate(X_test_normalized_reshaped, y_test)

AttributeError: 'SVC' object has no attribute 'evaluate'

In [None]:
clf = GridSearchCV(svm.SVC(), params, verbose=4)
clf.fit(X_train_normalized_reshaped, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 30 candidates, totalling 90 fits
[CV] C=1, kernel=linear ..............................................


  y = column_or_1d(y, warn=True)


In [None]:
clf.cv_results_

## Model (tensorflow)

In [21]:
class SVMLoss(tf.keras.losses.Loss):
    def call(self, y_true, y_pred):
        """
        SVM loss is \sum_{l \neq y} max(0, 1 + s_l - s_y) if y is the true label and s_i is score for class i.

        Args:
            y_true: (batch_size, 1)-tensor, indicating the true label.
            y_pred: (batch_size, number_of_classes)-tensor, scores of each class.
            
        Returns:
            (batch_size, 1)-tensor indicating loss for each data point.
            
        """
        y_pred = ops.convert_to_tensor_v2(y_pred)
        y_true = tf.reshape(math_ops.cast(y_true, tf.dtypes.int32), [-1])

        correct_class_score = tf.gather(y_pred, y_true, axis=1, batch_dims=1)

        hinge_losses = tf.math.maximum(0., 1 + y_pred - tf.expand_dims(correct_class_score, 1))
        return K.sum(hinge_losses, axis=1) - 1 # subtract 1 since we ignore the score for correct class

In [None]:
y_true = [[1], [2]]
y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]

SVMLoss().call(y_true, y_pred).numpy()

In [20]:
HP_EPOCH = hp.HParam('epoch', hp.IntInterval(1, 5))
HP_L2_REGULARIZATION = hp.HParam('l2_regularization', hp.Discrete([0.0, 0.0001, 0.001, 0.01, 0.1]))
HP_KERNEL = hp.HParam('kernel', hp.Discrete(["none", "gaussian", "laplacian"]))
HP_KERNEL_DIM = hp.HParam('kernel_dim', hp.Discrete([64, 128, 256, 512, 1024]))

HPARAMS_LIST = [HP_EPOCH, HP_L2_REGULARIZATION, HP_KERNEL, HP_KERNEL_DIM]

METRIC_ACCURACY = 'accuracy'

TF_LOGDIR = "logs/cs231n_svm_hparam_tuning"

with tf.summary.create_file_writer(TF_LOGDIR).as_default():
    hp.hparams_config(
      hparams=HPARAMS_LIST,
      metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
    )

In [None]:
def train_and_evaluate_model(hparams):
    model = tf.keras.models.Sequential([
       tf.keras.layers.Flatten(input_shape=(32, 32, 3)),
    ])
    if hparams[HP_KERNEL] != "none":
        model.add(
            tf.keras.layers.experimental.RandomFourierFeatures(
                output_dim=hparams[HP_KERNEL_DIM],
                kernel_initializer=hparams[HP_KERNEL]
            )
        )
    model.add(
       tf.keras.layers.Dense(
           10, 
           kernel_regularizer=tf.keras.regularizers.l2(hparams[HP_L2_REGULARIZATION])
        )
    )
    
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=[METRIC_ACCURACY]
    )
    
    model.fit(X_train_normalized, y_train, epochs=hparams[HP_EPOCH])
    
    _, accuracy = model.evaluate(X_test_normalized, y_test, verbose=True)
    
    return accuracy

In [None]:
def run(run_dir, hparams):    
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        accuracy = train_and_evaluate_model(hparams)
        tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)

In [None]:
session_num = 0

for epoch in range(HP_EPOCH.domain.min_value, HP_EPOCH.domain.max_value):
    for l2_regularization in HP_L2_REGULARIZATION.domain.values:
        for kernel in HP_KERNEL.domain.values:
            for kernel_dim in HP_KERNEL_DIM.domain.values:
                hparams = {
                    HP_EPOCH: epoch,
                    HP_L2_REGULARIZATION: l2_regularization,
                    HP_KERNEL: kernel,
                    HP_KERNEL_DIM: kernel_dim,
                }

                run_name = "run-%d" % session_num
                print('--- Starting trial: %s' % run_name)
                print({h.name: hparams[h] for h in hparams})
                run(TF_LOGDIR + "/" + run_name, hparams)
                session_num += 1

## Analysis (tensorflow)

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir $TF_LOGDIR