# More CNN
Sean Wade

In [2]:
from __future__ import absolute_import, division, print_function
import numpy as np
from keras.datasets import mnist
from keras.utils import np_utils

Using TensorFlow backend.


In [3]:
# Load Data
(x_train, y_train_num), (x_test, y_test_num) = mnist.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
y_train = np_utils.to_categorical(y_train_num, 100)
y_test = np_utils.to_categorical(y_test_num, 100)

In [4]:
print("Training: ", x_train.shape)
print("Test: ", x_test.shape)

Training:  (60000, 28, 28)
Test:  (10000, 28, 28)


## Random Math

### Cross-Correlation
Given image $I$ and filter $F$ of dimension $KxK$

$$ C(x,y) = \sum_{a=0}^{k-1} \sum_{b=0}^{k-1} I(x+a, y+b)F(a,b)$$

### Convolution
$$ C(x,y) = \sum_{a=0}^{k-1} \sum_{b=0}^{k-1} I(x-a, y-b)F(a,b)$$

Convolution is the same as cross-correlation, except the kernal is flipped (rotated 180).

### Error
$$C = \frac{1}{2} \sum_{p=1}^P (Y - \hat{Y})^2 $$

### Backprop

The Magic:

$$\partial_j^l = f'(u_j^l) \circ \text{conv2}(\partial_j^{l+1}, \text{rot180}(k_j^{l+1}),'\text{full}')$$

In standard neural networks we can define the error of a neuron as 

$\partial^l_j = \frac{\partial C}{\partial^l_j}$

But for convolutions it is 2 dimensional so we instead have $z_{x,y}$

$$z_{x,y}^{l+1} = \sigma ( w_{x,y}^{l+1} \ast z_{x,y}^l + b_{x,y}^{l+1} )= sigma (\sum_a \sum_b w_{x,y}^{l+1} z_{x-a,y-b}^l)$$

$$\partial_{x,y}^l = \frac{\partial C}{\partial z_{x,y}^l} = \sum_{x'} \sum_{y'} \frac{\partial C}{\partial_{x',y'}^{l+1}} \frac{\partial_{x',y'}^{l+1}}{\partial_{x,y}^{l}}$$


### Checking gradients with Finite-Differences
$$\frac{\partial E}{\partial w_i} \approx \frac{E(w_i + \epsilon) - E(w_i - \epsilon)}{2\epsilon}$$

Pick $\epsilon$ around $10^{-8}$ so its small but not too small.

## Model

- conv2d (BS, 28, 28, 1)
- activ (BS, 26, 26, 32)
- conv2d (BS, 26, 26, 32)
- activ (BS, 24, 24, 32)
- flatten (BS, 24, 24, 32)
- dense (BS, 18432)
- activ (BS, 10)

In [5]:
BATCH_SIZE = 28
EPOCHS = 2

In [6]:
def relu(X):
    """ The non-linear relu activation.
    Args:
      x : (np.array) Tensor
    returns:
      relu(x) : (np.array) Tensor of same dimension.
    """
    return X[X < 0] = 0

SyntaxError: invalid syntax (<ipython-input-6-c7ea9af45265>, line 8)

## Convolutions

In [None]:
def conv2d(kern, x):
    h, w = x.shape
    k = len(kern)
    new_h = h - k + 1
    new_w = w - k + 1
    result = np.zeros((new_h, new_w))
    for i in range(new_w):
        for j in range(new_h):
            result[j, i] = np.sum(x[j:j+k,i:i+k].flatten() * kern.flatten())
    return result

In [None]:
# Implementation using regular convolution
def conv(K, X):
    batch, depth, height, width = X.shape
    new_depth, k = K.shape[1], K.shape[3]
    new_height = height-k+1
    new_width = width-k+1
    result = np.zeros((batch, new_depth, new_height, new_width))
    for b in range(batch):
        for d in range(new_depth):
             result[b,d] = conv2d(K[b,d], X[b,d])
    return result

In [None]:
# Complete convolution in one funtion
def full_conv(K, X):
    batch, depth, height, width = X.shape
    new_depth, k = K.shape[1], K.shape[3]
    new_height = height-k+1
    new_width = width-k+1
    result = np.zeros((batch, new_depth, new_height, new_width))
    for b in range(batch):
        for d in range(new_depth):
            for w in range(new_width):
                for h in range(new_height):
                    result[b,d,h:h+k,w:w+k] = np.sum(X[b,d,h:h+k,w:w+k].flatten() * K[b,d].flatten())
    return result

In [None]:
import numpy as np
from numpy.lib.stride_tricks import as_strided
 
# Generate an image
H = 4
W = 5
x = np.arange(1, H*W+1).reshape(H,W)
print(x)
# [[ 1  2  3  4  5]
#  [ 6  7  8  9 10]
#  [11 12 13 14 15]
#  [16 17 18 19 20]]
 
# Generate a filter
stride = 1
HH = 3
WW = 3
 
# Perform an im2col operation on x
OH = (H - HH) / stride + 1  # output height
OW = (W - WW) / stride + 1  # output width
shape = (OH, OW, HH, WW)  # define the shape of output matrix
strides = (stride*W, stride, 1*W, 1)  # define the strides(offset) according to shape
strides = x.itemsize * np.array(strides)  # turn unit of the strides into byte
x_stride = as_strided(x, shape=shape, strides=strides)
x_cols = np.ascontiguousarray(x_stride)  # put our convenience matrix together im memory
print(x_cols[0, 0])
# [[ 1  2  3]
#  [ 6  7  8]
#  [11 12 13]]
print(x_cols[1, 0])
# [[ 6  7  8]
#  [11 12 13]
#  [16 17 18]]
print(x_cols[1, 2])
# [[ 8  9 10]
#  [13 14 15]
#  [18 19 20]]

In [None]:
# Input Image(28,28,1)
# Output (10,)
# (BATCH, DEPTH, HEIGHT, WIDTH)

# Initialize the weights
W1 = np.random.randn(BATCH_SIZE, 1, 3, 3)
# B1 = np.random.randn(32)
W2 = np.random.randn(BATCH_SIZE, 32, 3, 3)
# B1 = np.random.randn(32)
W3 = np.random.randn(18432, 10)
B3 = np.random.randn(10)

In [7]:
for i in range(EPOCHS):
    x = None # batch x
    y = None
    
    # Forward Prop
    h1 = conv(W1, batch)
    a1 = relu(h1) # np.max(0,h1)
    h2 = conv(W2, a1)
    a2 = relu(h2)
    dh3_shape = ap.shape
    h3 = a2.flatten()
    scores = np.dot(h3, W3) + B3
    exp_scores = np.exp(scores)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    
    # compute the loss: average cross-entropy loss and regularization
    correct_logprobs = -np.log(probs[range(BATCH_SIZE), y])
    data_loss = np.sum(corect_logprobs)/BATCH_SIZE
    #reg_loss = 0.5*reg*np.sum(W*W) + 0.5*reg*np.sum(W2*W2) #FIXME
    reg_loss = 0
    loss = data_loss + reg_loss

    # Back Prop
    # ----------------------------------------
    # Loss
    dscores = probs
    dscores[range(BATCH_SIZE), y] -= 1
    dscores /= BATCH_SIZE
    
    # Fully connected layer
    dW3 = np.dot(h3, dscores)
    dB3 = np.sum(dscores, axis=0, keepdims=True)
    dh3 = np.dot(dscores, W3.T)
    
    # Undo flatten
    dh3 = dh3.reshape(dh3_shape)
    
    # Relu backprop
    da2 = dh3
    da2[h3 <= 0] = 0
    
    # 2nd Conv Layer
    dW2
    
    
    
    
    
    
    # ...
    
    if i % 10 == 0:
        print("[%d] Loss: %f" % (i, loss))
    

SyntaxError: invalid syntax (<ipython-input-7-40469a51c827>, line 16)

In [None]:
# --- CALC LOSS ---
# Forward propagation to calculate our predictions
z1 = X.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

# Calculating the loss
corect_logprobs = -np.log(probs[range(num_examples), y])
data_loss = np.sum(corect_logprobs)

# Add regulatization term to loss (optional)
data_loss += reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
return 1./num_examples * data_loss


In [None]:
import scipy as sp
sp.optimize.check_grad(fun, grad_fun, param_init)

In [41]:
a = np.ones((3,4,5,6))

In [78]:
conv(np.ones((2,10,3,3)), np.ones((2,10,10,10)))
conv2d(np.ones((3,3)), np.ones((10,10)))

array([[ 9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.],
       [ 9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.],
       [ 9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.],
       [ 9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.],
       [ 9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.],
       [ 9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.],
       [ 9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.],
       [ 9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.]])

In [None]:
class ConvLayer():
    
    def forwardPass(self, l_input):
        self.last_input = l_input
        self.last_input_shape = l_input.shape
        return conv2d(l_input, self.W)
    
    def backwardPass(self, out_grad):
        grad = np.empty(self.last_input_shape)
        self.dW = np.empty(self.W.shape)
        bprop_conv(self.last_input, out_grad)
        batch_size = out_grad.shape[0]
        self.db = np.sum(out, axis(0, 2, 3)) / batch_size
        self.dW -= self.weight_decay * self.W
        return grad
        

In [93]:
import tensorflow as tf

I = tf.Variable(np.ones((1,5,5,5)))
K = tf.Variable(np.ones((3,3,5,7)))
op = tf.nn.conv2d(I, K, [1,1,1,1], padding='VALID')

sess = tf.Session()
sess.run(tf.initialize_all_variables())
sess.run(op)


InvalidArgumentError: No OpKernel was registered to support Op 'Conv2D' with these attrs
	 [[Node: Conv2D_6 = Conv2D[T=DT_DOUBLE, data_format="NHWC", padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](Variable_16/read, Variable_17/read)]]
Caused by op u'Conv2D_6', defined at:
  File "//anaconda/lib/python2.7/runpy.py", line 162, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "//anaconda/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/seanwade/.local/lib/python2.7/site-packages/traitlets/config/application.py", line 596, in launch_instance
    app.start()
  File "//anaconda/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 389, in start
    ioloop.IOLoop.instance().start()
  File "//anaconda/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 151, in start
    super(ZMQIOLoop, self).start()
  File "//anaconda/lib/python2.7/site-packages/tornado/ioloop.py", line 831, in start
    self._run_callback(callback)
  File "//anaconda/lib/python2.7/site-packages/tornado/ioloop.py", line 604, in _run_callback
    ret = callback()
  File "//anaconda/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "//anaconda/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 234, in enter_eventloop
    self.eventloop(self)
  File "//anaconda/lib/python2.7/site-packages/ipykernel/eventloops.py", line 241, in loop_cocoa
    show.mainloop()
  File "//anaconda/lib/python2.7/site-packages/matplotlib/backends/backend_macosx.py", line 29, in mainloop
    _macosx.show()
  File "//anaconda/lib/python2.7/site-packages/matplotlib/backend_bases.py", line 1305, in _on_timer
    ret = func(*args, **kwargs)
  File "//anaconda/lib/python2.7/site-packages/ipykernel/eventloops.py", line 218, in doi
    kernel.do_one_iteration()
  File "//anaconda/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 267, in do_one_iteration
    stream.flush(zmq.POLLIN, 1)
  File "//anaconda/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 345, in flush
    self._handle_recv()
  File "//anaconda/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 465, in _handle_recv
    self._run_callback(callback, msg)
  File "//anaconda/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 407, in _run_callback
    callback(*args, **kwargs)
  File "//anaconda/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "//anaconda/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 252, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "//anaconda/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 213, in dispatch_shell
    handler(stream, idents, msg)
  File "//anaconda/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 362, in execute_request
    user_expressions, allow_stdin)
  File "//anaconda/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 175, in do_execute
    shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/seanwade/.local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2723, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/seanwade/.local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2825, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/seanwade/.local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2885, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-93-428db4a8cf9b>", line 5, in <module>
    op = tf.nn.conv2d(I, K, [1,1,1,1], padding='VALID')
  File "//anaconda/lib/python2.7/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 394, in conv2d
    data_format=data_format, name=name)
  File "//anaconda/lib/python2.7/site-packages/tensorflow/python/ops/op_def_library.py", line 704, in apply_op
    op_def=op_def)
  File "//anaconda/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2260, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "//anaconda/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1230, in __init__
    self._traceback = _extract_stack()
