In [None]:
# This lecture is meant to show how great convolutions are and shows some examples of how they are derived from
# Applying some logic to linear regression.
# It will be followed up with a numpy session actually implementing these tools.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import tensorflow as tf

In [None]:
from tensorflow.keras.datasets import cifar10

(trainX, trainy), (testX, testy) = cifar10.load_data()
print('Train: X=%s, y=%s' % (trainX.shape, trainy.shape))
print('Test: X=%s, y=%s' % (testX.shape, testy.shape))
for i in range(9):
    plt.subplot(330 + 1 + i)
    plt.imshow(trainX[i][:,::-1, :]/255 + np.random.normal(size=trainX[i][:,::-1, :].shape) * 1e-1)

plt.show()

In [None]:
n_data = 100
train_images = trainX[:n_data]
train_labels = pd.get_dummies(trainy[:n_data,0]).values
test_images = testX[:n_data]
test_labels = pd.get_dummies(testy[:n_data,0]).values
n_cls = train_labels.shape[1]

In [None]:
# Image augmentation via flip LR and Transpose
train_images = np.concatenate((train_images, train_images[:,:,::-1,:]), axis=0)
train_labels = np.concatenate((train_labels, train_labels), axis=0)

train_images = np.concatenate((train_images, np.swapaxes(train_images, 1, 2)), axis=0)/255
train_labels = np.concatenate((train_labels, train_labels), axis=0)


In [None]:
tf.compat.v1.disable_eager_execution()

tfph = tf.compat.v1.placeholder
tfvar = tf.compat.v1.get_variable

In [None]:
style = 'conv'

In [None]:
# # FC
# w = np.random.randn(n_in, n_hidden)
# W * W * 3 * W * W * 3
# 9e12

# # making the model locally connected
# W * W * 3 * (3 * 3 * 3)
# 1e8

# # using parameter sharing
# input: 3 x 3 x 3
# output: 3 output filters
# 81 parameters
# realistically, since we have so few parameters, we wouldnt only learn 3 filters, maybe we learn 1000 filters

In [None]:
tf.compat.v1.reset_default_graph()
if style == 'fc':
    img_ph = tfph(shape = (None, 32*32*3), dtype = tf.float32)
else:
    img_ph = tfph(shape = (None, 32, 32, 3), dtype = tf.float32)
features = img_ph
for i, size in enumerate([64, 64, 64, 64, 128, 128, 128]):
    if style == 'conv':
        if i in [0, 4]:
            features = tf.compat.v1.layers.conv2d(features, size, (3, 3), padding = 'SAME', strides = (2, 2))
        else:
            residual = tf.compat.v1.layers.conv2d(features, size, (3, 3), padding = 'SAME')
            residual = tf.nn.leaky_relu(residual)
            residual = tf.compat.v1.layers.conv2d(residual, size, (3, 3), padding = 'SAME')
            features = residual + features
    if style == 'lc':
        features = tf.keras.layers.LocallyConnected2D(3, (3, 3), padding = 'valid')(features)
    if style == 'fc':
        features = tf.compat.v1.layers.dense(features, 64 * 64 * 1)
    features = tf.nn.leaky_relu(features)
if style != 'fc':
    linear_features = tf.keras.layers.Flatten()(features)
else:
    linear_features = features
linear_features = tf.compat.v1.layers.Dense(128)(linear_features)
linear_features = tf.nn.leaky_relu(linear_features)


residual = tf.compat.v1.layers.Dense(128)(linear_features)
residual = tf.nn.leaky_relu(residual)
residual = tf.compat.v1.layers.Dense(128)(residual)
linear_features = residual + linear_features

linear_features = tf.nn.leaky_relu(linear_features)
yhat_raw = tf.compat.v1.layers.Dense(n_cls)(linear_features)
yhat = tf.nn.softmax(yhat_raw, axis=1)
y_true = tfph(shape = (None, n_cls), dtype = tf.float32)
ce_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y_true, logits = yhat_raw, axis=1))
reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(v)) for v in tf.compat.v1.trainable_variables()])
loss = ce_loss + reg_loss * 1e-6 # For now, disabling regularization.
opt = tf.compat.v1.train.AdamOptimizer().minimize(loss)
# opt = tf.compat.v1.train.MomentumOptimizer(1e-3, .9, use_nesterov=True).minimize(loss)
sess = tf.compat.v1.Session()
sess.run(tf.compat.v1.global_variables_initializer())

In [None]:
batch_fd = {img_ph: train_images, y_true: train_labels}
if style == 'fc':
    batch_fd[img_ph] = train_images.reshape(-1, 32*32*3)

valid_fd = {img_ph: test_images}
tst_argmax = test_labels.argmax(1)

In [None]:
train_argmax = train_labels.argmax(1)
pred_argmax = sess.run(yhat, batch_fd).argmax(1)
acc = (pred_argmax == train_argmax).mean()
acc

In [None]:

accs = []

In [None]:
grads_to_input = tf.gradients(loss, img_ph)[0]


In [None]:

for i in range(50000):
    samples = np.random.choice(train_images.shape[0], size = 32, replace = False)
#     _, cur_loss = sess.run(
#         [opt, loss], 
#         {img_ph: train_images[samples] +np.random.normal(size=train_images[samples].shape)*1e-1,
#                                          y_true: train_labels[samples]})
    img = train_images[samples]
    grad = sess.run(grads_to_input, {img_ph: img,y_true: train_labels[samples]})
#     if np.sqrt(np.square(grad).sum(1).sum(1).sum(1)).min() <= 0:
#         breakpoint()
    grad_norm = np.sqrt(np.square(grad).sum(1).sum(1).sum(1))[:,None,None,None]
    grad_norm = np.clip(grad_norm, 1e-8, np.inf)
    img = img + 0.1 * grad/grad_norm
    _, cur_loss = sess.run(
        [opt, loss], {img_ph: img,y_true: train_labels[samples]})
    if i % 50 == 0:
        print(cur_loss)
        predictions = sess.run(yhat, valid_fd)
        pred_argmax = predictions.argmax(1)
        acc = (pred_argmax == tst_argmax).mean()
        accs.append(acc)
        print('test acc', acc)
plt.plot(accs)

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
confusion_matrix(pred_argmax, tst_argmax)

In [None]:
import seaborn as sns
sns.heatmap(confusion_matrix(pred_argmax, tst_argmax), annot=True)