<a href="https://colab.research.google.com/github/prasanth5reddy/D2L/blob/master/Convolutional%20Neural%20Networks/convolutions_for_images.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Installing Libraries

In [1]:
!pip install mxnet



Importing Libraries

In [0]:
from mxnet import autograd, nd
from mxnet.gluon import nn

The Cross-Correlation Operator

In [0]:
def corr2d(X, K):
  h, w = K.shape
  Y = nd.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
  for i in range(Y.shape[0]):
    for j in range(Y.shape[1]):
      Y[i, j] = (X[i: i + h, j: j + w] * K).sum()
  return Y

In [4]:
X = nd.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
K = nd.array([[0, 1], [2, 3]])
corr2d(X, K)


[[19. 25.]
 [37. 43.]]
<NDArray 2x2 @cpu(0)>

Convolutional Layers

In [0]:
class Conv2D(nn.Block):
  def __init__(self, kernel_size, **kwargs):
    super(Conv2D, self).__init__(**kwargs)
    self.weight = self.params.get('weight', shape=kernel_size)
    self.bias = self.params.get('bias', shape=(1,))
    
  def forward(self, x):
    return corr2d(x, self.weight.data()) + self.bias().data()

Object Edge Detection in Images

In [6]:
X = nd.ones((6, 8))
X[:, 2:6] = 0
X


[[1. 1. 0. 0. 0. 0. 1. 1.]
 [1. 1. 0. 0. 0. 0. 1. 1.]
 [1. 1. 0. 0. 0. 0. 1. 1.]
 [1. 1. 0. 0. 0. 0. 1. 1.]
 [1. 1. 0. 0. 0. 0. 1. 1.]
 [1. 1. 0. 0. 0. 0. 1. 1.]]
<NDArray 6x8 @cpu(0)>

In [7]:
K = nd.array([[1, -1]])
K


[[ 1. -1.]]
<NDArray 1x2 @cpu(0)>

In [8]:
Y = corr2d(X, K)
Y


[[ 0.  1.  0.  0.  0. -1.  0.]
 [ 0.  1.  0.  0.  0. -1.  0.]
 [ 0.  1.  0.  0.  0. -1.  0.]
 [ 0.  1.  0.  0.  0. -1.  0.]
 [ 0.  1.  0.  0.  0. -1.  0.]
 [ 0.  1.  0.  0.  0. -1.  0.]]
<NDArray 6x7 @cpu(0)>

In [9]:
corr2d(X.T, K)
# The given kernel only detects vertical edges


[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
<NDArray 8x5 @cpu(0)>

Learning a Kernel

In [10]:
conv2d = nn.Conv2D(1, kernel_size=(1, 2))
conv2d.initialize()
conv2d

Conv2D(None -> 1, kernel_size=(1, 2), stride=(1, 1))

In [11]:
# The two-dimensional convolutional layer uses four-dimensional input and output 
# in the format of (no_of_examples, no_of_channels, height, width)

X = X.reshape(1, 1, 6, 8)
Y = Y.reshape(1, 1, 6, 7)

for i in range(10):
  with autograd.record():
    Y_hat = conv2d(X)
    l = (Y_hat - Y) ** 2
  l.backward()
  conv2d.weight.data()[:] -= 3e-2 * conv2d.weight.grad()
  print('batch %d, loss %.3f' % (i + 1, l.sum().asscalar()))

batch 1, loss 12.080
batch 2, loss 4.949
batch 3, loss 2.028
batch 4, loss 0.831
batch 5, loss 0.341
batch 6, loss 0.140
batch 7, loss 0.057
batch 8, loss 0.024
batch 9, loss 0.010
batch 10, loss 0.004


In [12]:
conv2d.weight.data().reshape((1, 2))


[[ 0.9895    -0.9873705]]
<NDArray 1x2 @cpu(0)>