## [Problem 1] Creating a one-dimensional convolutional layer class that limits the number of channels to one

In [36]:
import numpy as np

class SimpleConv1d:
    def __init__(self, filter_size, initializer, optimizer):
        self.filter_size = filter_size
        self.optimizer = optimizer
        self.W = initializer.W(1, filter_size)  # filter size for one channel
        self.b = initializer.B(1)  # bias term

    def forward(self, X):
        self.X = X
        self.out_size = len(X) - self.filter_size + 1
        self.A = np.array([np.dot(self.X[i:i+self.filter_size], self.W) for i in range(self.out_size)]) + self.b
        return self.A

    def backward(self, dA):
        self.dW = np.zeros_like(self.W, dtype=np.float64)
        self.db = dA.sum()
        self.dX = np.zeros_like(self.X, dtype=np.float64)
        
        for i in range(self.out_size):
            self.dW += dA[i] * self.X[i:i+self.filter_size]
            self.dX[i:i+self.filter_size] += dA[i] * self.W
            
        self = self.optimizer.update(self)
        return self.dX    

class SimpleInitializer:
    def __init__(self, sigma):
        self.sigma = sigma

    def W(self, n_nodes1, n_nodes2):
        return self.sigma * np.random.randn(n_nodes1, n_nodes2).astype(np.float64)

    def B(self, n_nodes):
        return np.zeros(n_nodes, dtype=np.float64)

class SGD:
    def __init__(self, lr):
        self.lr = lr

    def update(self, layer):
        layer.W -= self.lr * layer.dW
        layer.b -= self.lr * layer.db
        return layer



## [Problem 2] Output size calculation after one-dimensional convolution


In [37]:
def calculate_output_size(input_size, filter_size, padding=0, stride=1):
    return (input_size - filter_size + 2 * padding) // stride + 1

# usage
input_size = 10
filter_size = 3
output_size = calculate_output_size(input_size, filter_size)
print(f'Output size: {output_size}')


Output size: 8


## [Problem 3] Experiment of one-dimensional convolutional layer with small array

In [38]:
# Example input, weight, and bias
x = np.array([1, 2, 3, 4], dtype=np.float64)
w = np.array([3, 5, 7], dtype=np.float64)
b = np.array([1], dtype=np.float64)

# Expected output
expected_a = np.array([35, 50], dtype=np.float64)

# Instantiate initializer and optimizer
initializer = SimpleInitializer(sigma=0.01)
optimizer = SGD(lr=0.01)
# Create convolutional layer
conv1d = SimpleConv1d(filter_size=3, initializer=initializer, optimizer=optimizer)
conv1d.W = w  # Manually set weights for testing
conv1d.b = b  # Manually set bias for testing

In [39]:
# Forward propagation
a = conv1d.forward(x)
print(f'Forward propagation output: {a}')
print(f'Expected output: {expected_a}')

Forward propagation output: [35. 50.]
Expected output: [35. 50.]


In [40]:
# Backward propagation
delta_a = np.array([10, 20], dtype=np.float64)
delta_x = conv1d.backward(delta_a)
print(f'Gradient wrt input: {delta_x}')

Gradient wrt input: [ 30. 110. 170. 140.]


## [Problem 4] Creating a one-dimensional convolutional layer class that does not limit the number of channels

In [41]:
import numpy as np

class Conv1d:
    def __init__(self, filter_size, in_channels, out_channels, initializer=None, optimizer=None):
        self.filter_size = filter_size
        self.in_channels = in_channels
        self.out_channels = out_channels
        
        # Initialize weights and biases
        if initializer is None:
            self.initializer = SimpleInitializer(sigma=0.01)
        else:
            self.initializer = initializer
            
        self.W = self.initializer.W(out_channels, in_channels, filter_size)
        self.b = np.zeros(out_channels)
        
        # Optimizer
        if optimizer is None:
            self.optimizer = SGD(lr=0.01)
        else:
            self.optimizer = optimizer
        
        # Variables to store gradients
        self.dW = None
        self.db = None
        self.dX = None
        
        # Variables to store intermediate values during forward pass
        self.X = None
        self.out_size = None
    
    def forward(self, X):
        """
        Forward propagation through the convolutional layer.
        
        Parameters:
        - X: Input data, a 2D array of shape (in_channels, input_size)
        
        Returns:
        - A: Output data after convolution, a 2D array of shape (out_channels, output_size)
        """
        self.X = X
        in_channels, input_size = X.shape
        self.out_size = input_size - self.filter_size + 1
        A = np.zeros((self.out_channels, self.out_size))
        
        for i in range(self.out_channels):
            for j in range(self.in_channels):
                A[i] += np.convolve(X[j], self.W[i, j], mode='valid') + self.b[i]
        
        return A
    
    def backward(self, dA):
        """
        Backward propagation to compute gradients.
        
        Parameters:
        - dA: Gradient of loss with respect to output, a 2D array of shape (out_channels, output_size)
        
        Returns:
        - dX: Gradient of loss with respect to input, a 2D array of shape (in_channels, input_size)
        """
        in_channels, input_size = self.X.shape
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        self.dX = np.zeros_like(self.X)
        
        for i in range(self.out_channels):
            for j in range(self.in_channels):
                self.dW[i, j] += np.convolve(self.X[j], dA[i], mode='valid')
                self.db[i] += np.sum(dA[i])
                self.dX[j] += np.convolve(dA[i], self.W[i, j][::-1], mode='full')  # Reverse W for convolution
            
        self = self.optimizer.update(self)
        return self.dX


## [Problem 5] (Advanced task) Implementing padding

In [42]:
class Conv1d:
    def __init__(self, filter_size, in_channels, out_channels, initializer=None, optimizer=None, padding=0):
        self.filter_size = filter_size
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.padding = padding
        
        # Initialize weights and biases
        if initializer is None:
            self.initializer = SimpleInitializer(sigma=0.01)
        else:
            self.initializer = initializer
            
        self.W = self.initializer.W(out_channels, in_channels, filter_size)
        self.b = np.zeros(out_channels)
        
        # Optimizer
        if optimizer is None:
            self.optimizer = SGD(lr=0.01)
        else:
            self.optimizer = optimizer
        
        # Variables to store gradients
        self.dW = None
        self.db = None
        self.dX = None
        
        # Variables to store intermediate values during forward pass
        self.X = None
        self.out_size = None
    
    def forward(self, X):
        """
        Forward propagation through the convolutional layer.
        
        Parameters:
        - X: Input data, a 2D array of shape (in_channels, input_size)
        
        Returns:
        - A: Output data after convolution, a 2D array of shape (out_channels, output_size)
        """
        self.X = X
        in_channels, input_size = X.shape
        self.out_size = input_size - self.filter_size + 1 + 2 * self.padding
        A = np.zeros((self.out_channels, self.out_size))
        
        padded_X = np.pad(X, ((0, 0), (self.padding, self.padding)), mode='constant')
        
        for i in range(self.out_channels):
            for j in range(self.in_channels):
                A[i] += np.convolve(padded_X[j], self.W[i, j], mode='valid') + self.b[i]
        
        return A
    
    def backward(self, dA):
        """
        Backward propagation to compute gradients.
        
        Parameters:
        - dA: Gradient of loss with respect to output, a 2D array of shape (out_channels, output_size)
        
        Returns:
        - dX: Gradient of loss with respect to input, a 2D array of shape (in_channels, input_size)
        """
        in_channels, input_size = self.X.shape
        padded_X = np.pad(self.X, ((0, 0), (self.padding, self.padding)), mode='constant')
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        self.dX = np.zeros_like(padded_X)
        
        for i in range(self.out_channels):
            for j in range(self.in_channels):
                self.dW[i, j] += np.convolve(padded_X[j], dA[i], mode='valid')
                self.db[i] += np.sum(dA[i])
                self.dX[j] += np.convolve(dA[i], self.W[i, j][::-1], mode='full')  # Reverse W for convolution
        
        # Trim dX to remove padded values
        dX = self.dX[:, self.padding:self.padding + input_size]
        
        self = self.optimizer.update(self)
        return dX


## [Problem 6] (Advanced task) Response to mini batch

In [43]:
class Conv1d:
    def __init__(self, filter_size, in_channels, out_channels, initializer=None, optimizer=None, padding=0, batch_size=1):
        self.filter_size = filter_size
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.padding = padding
        self.batch_size = batch_size
        
        # Initialize weights and biases
        if initializer is None:
            self.initializer = SimpleInitializer(sigma=0.01)
        else:
            self.initializer = initializer
            
        self.W = self.initializer.W(out_channels, in_channels, filter_size)
        self.b = np.zeros(out_channels)
        
        # Optimizer
        if optimizer is None:
            self.optimizer = SGD(lr=0.01)
        else:
            self.optimizer = optimizer
        
        # Variables to store gradients
        self.dW = None
        self.db = None
        self.dX = None
        
        # Variables to store intermediate values during forward pass
        self.X = None
        self.out_size = None
    
    def forward(self, X):
        """
        Forward propagation through the convolutional layer.
        
        Parameters:
        - X: Input data, a 3D array of shape (batch_size, in_channels, input_size)
        
        Returns:
        - A: Output data after convolution, a 3D array of shape (batch_size, out_channels, output_size)
        """
        self.X = X
        batch_size, in_channels, input_size = X.shape
        self.out_size = input_size - self.filter_size + 1 + 2 * self.padding
        A = np.zeros((batch_size, self.out_channels, self.out_size))
        
        padded_X = np.pad(X, ((0, 0), (0, 0), (self.padding, self.padding)), mode='constant')
        
        for b in range(batch_size):
            for i in range(self.out_channels):
                for j in range(self.in_channels):
                    A[b, i] += np.convolve(padded_X[b, j], self.W[i, j], mode='valid') + self.b[i]
        
        return A
    
    def backward(self, dA):
        """
        Backward propagation to compute gradients.
        
        Parameters:
        - dA: Gradient of loss with respect to output, a 3D array of shape (batch_size, out_channels, output_size)
        
        Returns:
        - dX: Gradient of loss with respect to input, a 3D array of shape (batch_size, in_channels, input_size)
        """
        batch_size, in_channels, input_size = self.X.shape
        padded_X = np.pad(self.X, ((0, 0), (0, 0), (self.padding, self.padding)), mode='constant')
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        self.dX = np.zeros_like(padded_X)
        
        for b in range(batch_size):
            for i in range(self.out_channels):
                for j in range(self.in_channels):
                    self.dW[i, j] += np.convolve(padded_X[b, j], dA[b, i], mode='valid')
                    self.db[i] += np.sum(dA[b, i])
                    self.dX[b, j] += np.convolve(dA[b, i], self.W[i, j][::-1], mode='full')  # Reverse W for convolution
        
        # Trim dX to remove padded values
        dX = self.dX[:, :, self.padding:self.padding + input_size]
        
        self = self.optimizer.update(self)
        return dX


## [Problem 7] (Advance assignment) Arbitrary number of strides

In [44]:
class Conv1d:
    def __init__(self, filter_size, in_channels, out_channels, initializer=None, optimizer=None, padding=0, batch_size=1, stride=1):
        self.filter_size = filter_size
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.padding = padding
        self.batch_size = batch_size
        self.stride = stride
        
        # Initialize weights and biases
        if initializer is None:
            self.initializer = SimpleInitializer(sigma=0.01)
        else:
            self.initializer = initializer
            
        self.W = self.initializer.W(out_channels, in_channels, filter_size)
        self.b = np.zeros(out_channels)
        
        # Optimizer
        if optimizer is None:
            self.optimizer = SGD(lr=0.01)
        else:
            self.optimizer = optimizer
        
        # Variables to store gradients
        self.dW = None
        self.db = None
        self.dX = None
        
        # Variables to store intermediate values during forward pass
        self.X = None
        self.out_size = None
    
    def forward(self, X):
        """
        Forward propagation through the convolutional layer.
        
        Parameters:
        - X: Input data, a 3D array of shape (batch_size, in_channels, input_size)
        
        Returns:
        - A: Output data after convolution, a 3D array of shape (batch_size, out_channels, output_size)
        """
        self.X = X
        batch_size, in_channels, input_size = X.shape
        self.out_size = (input_size - self.filter_size + 2 * self.padding) // self.stride + 1
        A = np.zeros((batch_size, self.out_channels, self.out_size))
        
        padded_X = np.pad(X, ((0, 0), (0, 0), (self.padding, self.padding)), mode='constant')
        
        for b in range(batch_size):
            for i in range(self.out_channels):
                for j in range(self.in_channels):
                    for k in range(self.out_size):
                        start = k * self.stride
                        end = start + self.filter_size
                        A[b, i, k] += np.sum(padded_X[b, j, start:end] * self.W[i, j]) + self.b[i]
        
        return A
    
    def backward(self, dA):
        """
        Backward propagation to compute gradients.
        
        Parameters:
        - dA: Gradient of loss with respect to output, a 3D array of shape (batch_size, out_channels, output_size)
        
        Returns:
        - dX: Gradient of loss with respect to input, a 3D array of shape (batch_size, in_channels, input_size)
        """
        batch_size, in_channels, input_size = self.X.shape
        padded_X = np.pad(self.X, ((0, 0), (0, 0), (self.padding, self.padding)), mode='constant')
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        self.dX = np.zeros_like(padded_X)
        
        for b in range(batch_size):
            for i in range(self.out_channels):
                for j in range(self.in_channels):
                    for k in range(self.out_size):
                        start = k * self.stride
                        end = start + self.filter_size
                        self.dW[i, j] += np.dot(dA[b, i, k], padded_X[b, j, start:end])
                        self.db[i] += dA[b, i, k]
                        self.dX[b, j, start:end] += dA[b, i, k] * self.W[i, j]
        
        # Trim dX to remove padded values
        dX = self.dX[:, :, self.padding:self.padding + input_size]
        
        self = self.optimizer.update(self)
        return dX


## [Problem 8] Learning and estimation

In [45]:
class MaxPool1d:
    def __init__(self, pool_size=2, stride=2):
        self.pool_size = pool_size
        self.stride = stride
        self.X = None
        self.arg_max_indices = None
    
    def forward(self, X):
        """
        Forward propagation through the Max Pooling layer.
        
        Parameters:
        - X: Input data, a 3D array of shape (batch_size, channels, input_size)
        
        Returns:
        - A: Output data after pooling, a 3D array of shape (batch_size, channels, output_size)
        """
        self.X = X
        batch_size, channels, input_size = X.shape
        output_size = (input_size - self.pool_size) // self.stride + 1
        A = np.zeros((batch_size, channels, output_size))
        self.arg_max_indices = np.zeros((batch_size, channels, output_size), dtype=np.int32)
        
        for b in range(batch_size):
            for c in range(channels):
                for k in range(output_size):
                    start = k * self.stride
                    end = start + self.pool_size
                    A[b, c, k] = np.max(X[b, c, start:end])
                    self.arg_max_indices[b, c, k] = np.argmax(X[b, c, start:end]) + start
        
        return A
    
    def backward(self, dA):
        """
        Backward propagation through the Max Pooling layer (gradient is passed through max values).
        
        Parameters:
        - dA: Gradient of loss with respect to output of Max Pooling layer, a 3D array of shape (batch_size, channels, output_size)
        
        Returns:
        - dX: Gradient of loss with respect to input of Max Pooling layer, a 3D array of shape (batch_size, channels, input_size)
        """
        batch_size, channels, output_size = dA.shape
        dX = np.zeros_like(self.X)
        
        for b in range(batch_size):
            for c in range(channels):
                for k in range(output_size):
                    idx = self.arg_max_indices[b, c, k]
                    dX[b, c, idx] += dA[b, c, k]
        
        return dX


In [46]:
import numpy as np
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [47]:
# Load MNIST data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Preprocess data
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0
X_train = np.expand_dims(X_train, axis=-1)  # Add channel dimension
X_test = np.expand_dims(X_test, axis=-1)    # Add channel dimension
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [48]:
# Define model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))  # Initial convolutional layer
model.add(Conv2D(64, (3, 3), activation='relu'))  # Additional convolutional layers
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))  # Output layer

In [49]:
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [50]:
# Train the model
model.fit(X_train, y_train, batch_size=128, epochs=10, validation_split=0.1)


Epoch 1/10


2024-07-02 11:17:16.694517: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 169344000 exceeds 10% of free system memory.


[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 139ms/step - accuracy: 0.8770 - loss: 0.3909 - val_accuracy: 0.9848 - val_loss: 0.0553
Epoch 2/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 138ms/step - accuracy: 0.9844 - loss: 0.0512 - val_accuracy: 0.9855 - val_loss: 0.0542
Epoch 3/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 139ms/step - accuracy: 0.9912 - loss: 0.0282 - val_accuracy: 0.9882 - val_loss: 0.0502
Epoch 4/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 139ms/step - accuracy: 0.9935 - loss: 0.0207 - val_accuracy: 0.9873 - val_loss: 0.0500
Epoch 5/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 140ms/step - accuracy: 0.9957 - loss: 0.0123 - val_accuracy: 0.9875 - val_loss: 0.0526
Epoch 6/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 142ms/step - accuracy: 0.9974 - loss: 0.0086 - val_accuracy: 0.9888 - val_loss: 0.0591
Epoch 7/10
[1m422/42

<keras.src.callbacks.history.History at 0x7d2e5c42b880>

In [51]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy on test set: {accuracy:.4f}')

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.9885 - loss: 0.0454
Accuracy on test set: 0.9904
