#CNN on CIFAR-10



CIFAR-10 dataset contains 32x32 color images from 10 classes: __airplane, automobile, bird, cat, deer, dog, frog, horse, ship, truck__:
<img src="https://github.com/hse-aml/intro-to-dl/blob/master/week3/images/cifar10.jpg?raw=1" style="width:80%">

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import keras

In [None]:
from keras.datasets import cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print("Train samples:", x_train.shape, y_train.shape)
print("Test samples:", x_test.shape, y_test.shape)
NUM_CLASSES = 10
cifar10_classes = ["airplane", "automobile", "bird", "cat", "deer", 
                   "dog", "frog", "horse", "ship", "truck"]

x_mean = np.mean(x_train, axis=(0, 1,2),keepdims=True)
x_std = np.std(x_train, axis=(0, 1,2),keepdims=True)
x_train_norm = (x_train - x_mean) / x_std
x_test_norm = (x_test - x_mean) / x_std
y_train_onehot = pd.get_dummies(y_train.squeeze()).values
y_test_onehot = pd.get_dummies(y_test.squeeze()).values

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Train samples: (50000, 32, 32, 3) (50000, 1)
Test samples: (10000, 32, 32, 3) (10000, 1)


In [None]:
# show random images from train
cols = 8
rows = 2
fig = plt.figure(figsize=(2 * cols - 1, 2.5 * rows - 1))
for i in range(cols):
    for j in range(rows):
        random_index = np.random.randint(0, len(y_train))
        ax = fig.add_subplot(rows, cols, i * rows + j + 1)
        ax.grid('off')
        ax.axis('off')
        ax.imshow(x_train[random_index, :])
        ax.set_title(cifar10_classes[y_train[random_index, 0]])
plt.show()

In [None]:

from functools import partial
tfph = tf.compat.v1.placeholder
tf.compat.v1.disable_eager_execution()
class ResidualConv:
  def __init__(self, layer_sizes):
    self.layers = [partial(tf.compat.v1.layers.conv2d, filters = size, kernel_size = 3, padding = 'SAME') for size in layer_sizes]
  def forward(self, signal):
    x = signal
    self.features = []
    for i, layer in enumerate(self.layers):
      x = layer(x)
      if i < len(self.layers) - 1:
        x = tf.nn.leaky_relu(x)
      self.features.append(x)
    self.output = signal + x
    return self.output

class Conv:
  def __init__(self, *args, **kwargs):
    self.layer = partial(tf.compat.v1.layers.conv2d, *args, **kwargs)
  def forward(self, signal):
    return self.layer(signal)
class BaseConv(Conv):
  def __init__(self, size):
    super().__init__(filters = size, kernel_size = 3, padding='SAME')
class StridedConv(Conv):
  def __init__(self, size):
    super().__init__(filters = size, kernel_size = 5, strides = 2, padding='SAME')
BC, SC, RC = BaseConv, StridedConv, ResidualConv

# layers = [BC(32)] * 2 + [SC(64)] + [BC(64)] * 2 + [SC(128)] + [BC(128)] * 2 # What's the bug here?
# layers = [BC(32), BC(32), SC(64), BC(64), BC(64), SC(128), BC(128), BC(128)] # .68

layers = [BC(32), RC([16, 32]), RC([16, 32]), SC(64), RC([32, 64]), RC([32, 64]), SC(128), RC([64, 128]), RC([64, 128])] # .70
# layers = [BC(32),BC(32), RC([16, 32]), RC([16, 32]), RC([16, 32]), RC([16, 32]), SC(64),
#           RC([32, 64]), RC([32, 64]), RC([32, 64]), RC([32, 64]),
#           SC(128), RC([64, 128]), RC([64, 128]), RC([64, 128]), RC([64, 128]), RC([64, 128]), RC([64, 128]),
#           RC([64, 128]), RC([64, 128]), RC([64, 128]), RC([64, 128])]
# batch_size = 128 # .69
batch_size = 64 # .71
class Model:
  def __init__(self, layers = layers, extra_loss_layers = [SC(64), SC(128)]):
    self.xph = tfph(tf.float32, shape = (None, 32, 32, 3))
    self.yph = tfph(tf.int32, shape = (None, NUM_CLASSES))
    self.layers = layers
    self.features = [self.xph]
    for i, layer in enumerate(layers):
      features = layer.forward(self.features[-1])
      features = tf.nn.leaky_relu(features)
      additional_layer = tf.compat.v1.placeholder_with_default(tf.float32, shape = tf.shape(features))
      features = features + additional_layer
      self.features.append(features)
      if len(layers) // 2 == i:
        pivot_features = features
        for j, extra_layer in enumerate(extra_loss_layers):
          pivot_feautres = layer.forward(pivot_features)
          pivot_feautres = tf.nn.leaky_relu(pivot_feautres)
        pivot_feautres = tf.compat.v1.layers.flatten(pivot_feautres)
        extra_pred = tf.compat.v1.layers.dense(pivot_feautres, NUM_CLASSES)
        self.extra_loss = tf.compat.v1.losses.softmax_cross_entropy(self.yph, extra_pred)
    features = tf.compat.v1.layers.flatten(features)
    features = tf.compat.v1.layers.dense(features, 64)
    features = tf.nn.leaky_relu(features)
    self.features.append(features)
    output_raw = tf.compat.v1.layers.dense(features, NUM_CLASSES)
    self.features.append(output_raw)
    self.yhat = tf.compat.v1.math.softmax(output_raw, axis=1)
    self.celoss = tf.compat.v1.losses.softmax_cross_entropy(self.yph, output_raw)
    tfvars = tf.compat.v1.get_trainable_variables()
    self.reg = tf.reduce_sum([tf.reduce_sum(tf.square(var)) for var in tfvars])
    self.loss = self.celoss + self.extra_loss * .2 + self.reg * 1e-8
    self.opt = tf.compat.v1.train.AdamOptimizer(1e-4).minimize(self.loss)
    self.sess = tf.compat.v1.Session()
    self.sess.run(tf.compat.v1.global_variables_initializer())
    self.losses = []
    self.val_losses = []
    self.val_acc = []
    self.val_fd = {self.xph: x_test_norm, self.yph: y_test_onehot}
    self.grad_x = tf.gradients(self.loss, self.xph)[0]
    
  def train(self, x_test_norm, y_test_onehot, steps = 10000,batch_size = 64):
    for step in range(steps):
      samples = np.random.choice(x_train.shape[0], batch_size)
      x_sample = x_train_norm[samples]
      if step % 3 == 0:
        noise = np.random.randn(*x_train_norm[samples].shape) * 1e-2
        x_trn = x_sample + noise
      elif step % 3 == 1:
        self.fd = {self.xph: x_sample, self.yph: y_train_onehot[samples]}
        grad_x = self.sess.run(self.grad_x, self.fd)
        x_trn = x_sample + (grad_x > 0) * 1e-2 - 1e-2/2
      else:
        self.fd = {self.xph: x_sample, self.yph: y_train_onehot[samples]}
        grad_x = self.sess.run(self.grad_x, self.fd)
        grad_x = np.sqrt(np.abs(grad_x)) * ((grad_x > 0) * 2 - 1)
        norm = np.sqrt(np.sum(np.square(grad_x).reshape(-1, 32*32*3), axis=1))
        distortion = grad_x / norm[:,None,None,None]
        x_trn = x_sample + distortion

      self.fd = {self.xph: x_trn, self.yph: y_train_onehot[samples]}
      ls, _ = self.sess.run([self.loss, self.opt], self.fd)
      self.losses.append(ls)
      if step % 100 == 0:
        val_loss, forecast = self.sess.run([self.loss, self.yhat], self.val_fd)
        actual_pred = np.argmax(forecast, axis=1)
        acc = (actual_pred == y_test.flatten()).mean()
        self.val_acc.append(acc)
        self.val_losses.append(val_loss)
        print(val_loss)

In [None]:

# grad = np.random.randn(64, 32, 32, 3)
# grad.sum(0).shape

In [None]:
# batch norm:
# layer has shape (bs, h, w, filters)
# for each h, w, filters, there are bs of them.
# take those bs values, and force them to be normally distributed.

In [None]:
# grad_x = [-1e10, + .00000001, 10, -12]
# every image, the l2 norm of the change, lets set that to .1
# every image, sqrt the l2 norm of the change, lets set that to .1
# used = [-.005, .005, .005, -.005]

# other image transformations

In [None]:
mdl = Model()
mdl.train(x_test_norm, y_test_onehot, batch_size = batch_size)

In [None]:
np.mean(mdl.losses[-10:]), np.min(mdl.val_losses)

In [None]:
# (0.068675384, 0.9072602)

In [None]:
plt.plot(mdl.losses)

In [None]:
plt.plot(mdl.val_losses)

In [None]:

mdl.val_acc

In [None]:
plt.plot(mdl.val_acc)

In [None]:
# validate model