## 【Problem 1】Creating a 2-D convolutional layer

In [18]:
import numpy as np

class Conv2d:
    def __init__(self, in_channels, out_channels, kernel_size):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.W = np.random.randn(out_channels, in_channels, kernel_size, kernel_size)
        self.b = np.zeros((out_channels, 1))

    def forward(self, x):
        # Retrieve dimensions
        N, C, H, W = x.shape
        F, _, HH, WW = self.W.shape
        stride = 1  # Assuming stride=1 for simplicity
        pad = 0  # Assuming no padding for simplicity

        # Calculate output dimensions
        out_h = 1 + (H + 2 * pad - HH) // stride
        out_w = 1 + (W + 2 * pad - WW) // stride

        # Initialize output array
        out = np.zeros((N, F, out_h, out_w))

        # Pad input array
        x_padded = np.pad(x, ((0, 0), (0, 0), (pad, pad), (pad, pad)), mode='constant')

        # Perform convolution
        for n in range(N):
            for f in range(F):
                for i in range(out_h):
                    for j in range(out_w):
                        # Extract the region of interest from the padded input
                        x_slice = x_padded[n, :, i * stride:i * stride + HH, j * stride:j * stride + WW]
                        # Perform the convolution operation and add bias
                        out[n, f, i, j] = np.sum(x_slice * self.W[f]) + self.b[f]

        self.output = out  # Store output for later use in backward pass
        return out

    def backward(self, delta):
        # Retrieve dimensions
        N, F, out_h, out_w = delta.shape
        _, C, HH, WW = self.W.shape

        # Initialize gradients
        dx = np.zeros_like(delta)
        dW = np.zeros_like(self.W)
        db = np.zeros_like(self.b)

        # Pad input array
        x_padded = np.pad(self.input, ((0, 0), (0, 0), (pad, pad), (pad, pad)), mode='constant')

        # Compute gradients
        for n in range(N):
            for f in range(F):
                for i in range(out_h):
                    for j in range(out_w):
                        # Extract the region of interest from the padded input
                        x_slice = x_padded[n, :, i * stride:i * stride + HH, j * stride:j * stride + WW]
                        # Update gradients for weights and biases
                        dW[f] += x_slice * delta[n, f, i, j]
                        db[f] += delta[n, f, i, j]
                        dx[n, :, i * stride:i * stride + HH, j * stride:j * stride + WW] += self.W[f] * delta[n, f, i, j]

        # Remove padding from dx
        dx = dx[:, :, pad:-pad, pad:-pad]

        return dx, dW, db


## [Problem 2] Experiments with 2D convolutional layers on small arrays

In [19]:
# Test with the provided input and expected output
x = np.array([[[[ 1,  2,  3,  4],
                [ 5,  6,  7,  8],
                [ 9, 10, 11, 12],
                [13, 14, 15, 16]]]])

W = np.array([[[ 0.,  0.,  0.],
               [ 0.,  1.,  0.],
               [ 0., -1.,  0.]],

              [[ 0.,  0.,  0.],
               [ 0., -1.,  1.],
               [ 0.,  0.,  0.]]])

# Dummy delta for backward propagation test
delta = np.array([[[ -4,  -4],
                   [ 10,  11]],

                  [[  1,  -7],
                   [  1, -11]]])



In [23]:
import numpy as np

class Conv2d:
    def __init__(self, in_channels, out_channels, kernel_size):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.W = np.random.randn(out_channels, in_channels, kernel_size, kernel_size)
        self.b = np.zeros((out_channels, 1))
        self.output = None  # Initialize output attribute

    def forward(self, x):
        # Retrieve dimensions
        N, C, H, W = x.shape
        F, _, HH, WW = self.W.shape
        stride = 1  # Assuming stride=1 for simplicity
        pad = 0  # Assuming no padding for simplicity

        # Calculate output dimensions
        out_h = H - HH + 1
        out_w = W - WW + 1

        # Initialize output array
        out = np.zeros((N, F, out_h, out_w))

        # Perform convolution
        for n in range(N):
            for f in range(F):
                for i in range(out_h):
                    for j in range(out_w):
                        # Extract the region of interest from the input
                        x_slice = x[n, :, i:i+HH, j:j+WW]
                        # Perform the convolution operation and add bias
                        out[n, f, i, j] = np.sum(x_slice * self.W[f]) + self.b[f][0]

        self.output = out  # Store output for later use in backward pass
        return out

    def backward(self, delta):
        # Retrieve dimensions
        N, F, out_h, out_w = delta.shape
        _, C, HH, WW = self.W.shape

        # Initialize gradients
        dx = np.zeros_like(delta)
        dW = np.zeros_like(self.W)
        db = np.zeros_like(self.b)

        # Pad input array
        x_padded = np.pad(self.input, ((0, 0), (0, 0), (pad, pad), (pad, pad)), mode='constant')

        # Compute gradients
        for n in range(N):
            for f in range(F):
                for i in range(out_h):
                    for j in range(out_w):
                        # Extract the region of interest from the padded input
                        x_slice = x_padded[n, :, i * stride:i * stride + HH, j * stride:j * stride + WW]
                        # Update gradients for weights and biases
                        dW[f] += x_slice * delta[n, f, i, j]
                        db[f] += delta[n, f, i, j]
                        dx[n, :, i * stride:i * stride + HH, j * stride:j * stride + WW] += self.W[f] * delta[n, f, i, j]

        # Remove padding from dx
        dx = dx[:, :, pad:-pad, pad:-pad]

        return dx, dW, db


# Test with the provided input and expected output
x = np.array([[[[ 1,  2,  3,  4],
                [ 5,  6,  7,  8],
                [ 9, 10, 11, 12],
                [13, 14, 15, 16]]]])

W = np.array([[[ 0.,  0.,  0.],
               [ 0.,  1.,  0.],
               [ 0., -1.,  0.]],

              [[ 0.,  0.,  0.],
               [ 0., -1.,  1.],
               [ 0.,  0.,  0.]]])

# Dummy delta for backward propagation test
delta = np.array([[[ -4,  -4],
                   [ 10,  11]],

                  [[  1,  -7],
                   [  1, -11]]])

# Test forward propagation
conv_layer = Conv2d(in_channels=1, out_channels=2, kernel_size=3)
output = conv_layer.forward(x)
print("Forward Propagation Output:")
print(output)
print()

# Expected output from the problem statement
expected_output = np.array([[[-4, -4],
                             [-4, -4]],
                            
                            [[ 1,  1],
                             [ 1,  1]]])

# Verify forward propagation
assert np.allclose(output, expected_output), "Forward Propagation Test Failed!"
print("Forward Propagation Test Passed!")
print()

# Test backward propagation
dx, dW, db = conv_layer.backward(delta)
print("Backward Propagation Gradients:")
print("dx:")
print(dx)
print("dW:")
print(dW)
print("db:")
print(db)
print()

# Expected gradients from the problem statement
expected_dx = np.array([[[[  0.,   0.,   0.,   0.],
                         [  0.,  -4.,   4.,   0.],
                         [  0.,  -5.,  10.,  -5.],
                         [  0.,   4.,  13.,   9.]]]])

expected_dW = np.array([[[[ 21., 29., 19.],
                          [ 27., 37., 24.],
                          [ 18., 24., 15.]]],
                        
                        [[[-15., -7.,  0.],
                          [ -9., -3.,  6.],
                          [-18., -6.,  0.]]]])

expected_db = np.array([[[-10.],
                         [ 12.]]])

# Verify backward propagation
assert np.allclose(dx, expected_dx), "Backward Propagation dx Test Failed!"
assert np.allclose(dW, expected_dW), "Backward Propagation dW Test Failed!"
assert np.allclose(db, expected_db), "Backward Propagation db Test Failed!"
print("Backward Propagation Test Passed!")


Forward Propagation Output:
[[[[ 14.63997078  13.54600775]
   [ 10.26411867   9.17015565]]

  [[-14.21891663 -15.55630623]
   [-19.568475   -20.9058646 ]]]]



AssertionError: Forward Propagation Test Failed!

In [20]:
# Test forward propagation
conv_layer = Conv2d(in_channels=1, out_channels=2, kernel_size=3)
output = conv_layer.forward(x)
print("Forward Propagation Output:")
print(output)
print()

Forward Propagation Output:
[[[[ 2.74008596  4.83831822]
   [11.133015   13.23124726]]

  [[ 5.42397905  7.05288599]
   [11.93960679 13.56851372]]]]



  out[n, f, i, j] = np.sum(x_slice * self.W[f]) + self.b[f]


In [21]:
# Expected output from the problem statement
expected_output = np.array([[[-4, -4],
                             [-4, -4]],
                            
                            [[ 1,  1],
                             [ 1,  1]]])

# Verify forward propagation
assert np.allclose(output, expected_output), "Forward Propagation Test Failed!"
print("Forward Propagation Test Passed!")
print()

AssertionError: Forward Propagation Test Failed!