In [None]:
import numpy as np
import matplotlib.pylab as plt

import itertools
import warnings

from utils import load_cifar10, batch_plot, seed_everything, accuracy
from nn import Linear, ReLU, SGD, Sequential, CrossEntropy


seed_everything()
np.set_printoptions(precision=3)
warnings.filterwarnings("ignore")

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
(X_train, y_train), (X_test, y_test) = load_cifar10("../code/cs231n/datasets/cifar-10-batches-py/")

assert X_train.shape == (50000, 32, 32, 3)
assert y_train.shape == (50000,)
assert X_test.shape == (10000, 32, 32, 3)
assert y_test.shape == (10000,)

In [None]:
num_valid = int(len(X_train) * 0.2)
X_valid, y_valid = X_train[-num_valid:], y_train[-num_valid:]
X_train, y_train = X_train[:-num_valid], y_train[:-num_valid]

print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"X_valid: {X_valid.shape}, y_valid: {y_valid.shape}")
print(f"X_test: {X_test.shape}, y_test: {y_test.shape}")

plt.bar(*np.unique(y_train, return_counts=True))
plt.bar(*np.unique(y_valid, return_counts=True))
plt.show()

In [None]:
# Get the indexes of 'batch_size' random digits
batch_size = 16
random_indexes = np.random.randint(X_train.shape[0], size=batch_size)
# Plot digits with labels
batch_plot(X_train[random_indexes], y_train[random_indexes], with_border=False)

In [None]:
mean_image = np.mean(X_train, axis=0)

X_train = (X_train.astype(float) - mean_image).reshape((X_train.shape[0], -1))
X_valid = (X_valid.astype(float) - mean_image).reshape((X_valid.shape[0], -1))
X_test = (X_test.astype(float) - mean_image).reshape((X_test.shape[0], -1))


num_train = 50
X_train_samples = X_train[:num_train]
y_train_samples = y_train[:num_train]

In [None]:
def search_train_accuracies(
    layers=3,
    learning_rates=None,
    weight_scales=None,
    hidden_units=100,
    epochs=10,
    batch_size=25,
    plot=True,
    save_fig=False,
):
    (m, n), num_classes = X_train.shape, y_train.max() + 1

    if learning_rates is None:
        learning_rates = np.asarray(list(reversed(np.logspace(-6, -1, 10))))

    if weight_scales is None:
        weight_scales = np.logspace(-5, 2, 10)

    loss = CrossEntropy()
    accuracies = np.zeros((len(learning_rates), len(weight_scales)))
    for (lr, scale), (i, j) in zip(
        itertools.product(learning_rates, weight_scales),
        itertools.product(range(len(learning_rates)), range(len(weight_scales))),
    ):
        model = Sequential(
            [Linear(n, hidden_units, weight_scale=scale), ReLU()]
            + [Linear(hidden_units, hidden_units, weight_scale=scale), ReLU()] * (layers - 1)
            + [Linear(hidden_units, num_classes, weight_scale=scale)]
        )

        optimizer = SGD(params=model.parameters, learning_rate=lr)
        model.compile(loss=loss, optimizer=optimizer)
        model.fit(
            X_train_samples, y_train_samples, epochs=epochs, store_weights=False, batch_size=batch_size, verbose=False
        )

        train_samples_preds = np.argmax(model.predict(X_train_samples), axis=1)
        train_accuracy = accuracy(y_train_samples, train_samples_preds)
        accuracies[i, j] = train_accuracy

    if plot is True:
        yticks = list(range(0, len(learning_rates), 3))
        xticks = list(range(0, len(weight_scales), 3))
        fig, ax = plt.subplots(figsize=(12, 12))
        pos = ax.imshow(accuracies, cmap=plt.cm.gray_r, vmin=0, vmax=100, interpolation="none")
        ax.set_xticks(xticks)
        ax.set_xticklabels([f"{s:.1e}" for s in weight_scales[xticks]])
        ax.set_yticks(yticks)
        ax.set_yticklabels([f"{s:.1e}" for s in learning_rates[yticks]])
        ax.set_xlabel("weight scales")
        ax.set_ylabel("learning rates")
        ax.set_title(f"{layers} layers nn accuracies")
        fig.colorbar(pos, ax=ax, fraction=0.019, pad=0.02, aspect=50)
        if save_fig is True:
            plt.savefig(f"../code/cs231n/notebook_images/{layers}_layers_nn_accuracies.png", dpi=400, transparent=True)
        plt.show()
    return accuracies, learning_rates, weight_scales

In [None]:
learning_rates = np.asarray(list(reversed(np.logspace(-6, -1, 32))))
weight_scales = np.logspace(-5, 2, 32)

for layers in range(2, 6):
    search_train_accuracies(layers=layers, learning_rates=learning_rates, weight_scales=weight_scales, save_fig=True)

In [None]:
def visualize_weights(
    layers=3,
    learning_rate=5e-3,
    weight_scale=5e-2,
    hidden_units=100,
    epochs=10,
    batch_size=25,
    weight_layer=1,
    subsample_steps=1,
    verbose=False,
):
    (m, n), num_classes = X_train.shape, y_train.max() + 1
    model = Sequential(
        [Linear(n, hidden_units, weight_scale=weight_scale), ReLU()]
        + [Linear(hidden_units, hidden_units, weight_scale=weight_scale), ReLU()] * (layers - 1)
        + [Linear(hidden_units, num_classes, weight_scale=weight_scale)]
    )

    optimizer = SGD(params=model.parameters, learning_rate=learning_rate)
    loss = CrossEntropy()
    model.compile(loss=loss, optimizer=optimizer)
    history = model.fit(
        X_train_samples, y_train_samples, epochs=epochs, store_weights=True, batch_size=batch_size, verbose=verbose
    )

    train_samples_preds = np.argmax(model.predict(X_train_samples), axis=1)
    print(f"train acc: {accuracy(y_train_samples, train_samples_preds)}")

    total_layer_weights = [np.asarray([w[l].value for w in history["weights"]]) for l in range(layers)]
    total_layer_grads = [np.asarray([w[l].grad for w in history["weights"]]) for l in range(layers)]

    layers_weights = list(total_layer_weights[weight_layer].reshape((epochs, -1))[::subsample_steps])
    layers_grads = list(total_layer_grads[weight_layer].reshape((epochs, -1))[::subsample_steps])

    fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(9, 4))
    axs[0].violinplot(layers_weights, showmeans=True, showmedians=True)
    axs[0].set_title("weights")
    axs[1].violinplot(layers_grads, showmeans=True, showmedians=True)
    axs[1].set_title("gradients")

    for ax in axs:
        ax.yaxis.grid(True)
        ax.set_xticks(
            [y + 1 for y in range(len(layers_weights))],
            labels=[i * subsample_steps for i in range(len(layers_weights))],
        )
    plt.show()
    return history

In [None]:
visualize_weights(layers=2, weight_scale=5e-2, learning_rate=1e-3, epochs=20, subsample_steps=2)