In [42]:
import tensorflow.experimental.numpy as tnp
import tensorflow as tf
import numpy as np
import struct
from tqdm import tqdm
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()


def load_mnist(images_path, labels_path):
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II', lbpath.read(8))
        labels = np.fromfile(lbpath, dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack('>IIII', imgpath.read(16))
        images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), rows, cols)

    return images, labels

train_images_path = 'train-images-idx3-ubyte'
train_labels_path = 'train-labels-idx1-ubyte'
test_images_path = 't10k-images-idx3-ubyte'
test_labels_path = 't10k-labels-idx1-ubyte'

train_images, train_labels = load_mnist(train_images_path, train_labels_path)
test_images, test_labels = load_mnist(test_images_path, test_labels_path)

def preprocess_data(images):
    images = tnp.array(images, dtype=tnp.float32) / 255.0
    images = tnp.expand_dims(images, axis=-1)
    return images

train_images = preprocess_data(train_images)
test_images = preprocess_data(test_images)

def one_hot_encoding(labels):
    return tnp.array(np.eye(10)[labels])

train_labels = one_hot_encoding(train_labels)
test_labels = one_hot_encoding(test_labels)

def conv2d(input_data, filters):
    num_filters, num_channels, filter_size, _ = filters.shape
    batch_size, input_height, input_width, _ = input_data.shape
    output_height = input_height - filter_size + 1
    output_width = input_width - filter_size + 1

    output_list = []
    for i in range(num_filters):
        filter_output = []
        for j in range(output_height):
            for k in range(output_width):
                patch_sum = tnp.sum(input_data[:, j:j+filter_size, k:k+filter_size, :] * filters[i], axis=(1, 2, 3))
                filter_output.append(patch_sum)
        output_list.append(tnp.stack(filter_output).reshape(batch_size, output_height, output_width))

    return tnp.stack(output_list, axis=1)

def max_pooling2d(input_data):
    batch_size, num_filters, input_height, input_width = input_data.shape
    output_height, output_width = input_height // 2, input_width // 2  # assuming 2x2 pooling with stride of 2
    output = []

    for i in range(batch_size):
        batch_output = []
        for j in range(num_filters):
            filter_output = []
            for k in range(output_height):
                row_output = []
                for l in range(output_width):
                    row_output.append(tnp.max(input_data[i, j, 2*k:2*(k+1), 2*l:2*(l+1)]))
                filter_output.append(row_output)
            batch_output.append(filter_output)
        output.append(batch_output)

    return tnp.array(output)

def fully_connected(input_data, weights, biases):
    return tnp.dot(input_data, weights) + biases

def relu(input_data):
    return tnp.maximum(input_data, 0)

def softmax(input_data):
    exp_data = tnp.exp(input_data - tnp.max(input_data, axis=-1, keepdims=True))
    return exp_data / tnp.sum(exp_data, axis=-1, keepdims=True)

def forward_propagation(input_data):
    conv1 = relu(conv2d(input_data, conv1_filters))
    pool1 = max_pooling2d(conv1)
    conv2 = relu(conv2d(pool1, conv2_filters))
    pool2 = max_pooling2d(conv2)
    
    flattened = pool2.reshape(pool2.shape[0], -1)
    dense1 = relu(fully_connected(flattened, dense1_weights, dense1_biases))
    dense2 = relu(fully_connected(dense1, dense2_weights, dense2_biases))
    return softmax(fully_connected(dense2, dense3_weights, dense3_biases))

def cross_entropy_loss(predictions, labels):
    return -tnp.sum(labels * tnp.log(predictions)) / labels.shape[0]

def accuracy(predictions, labels):
    return tnp.mean(tnp.argmax(predictions, axis=-1) == tnp.argmax(labels, axis=-1))

def calculate_gradients(input_data, labels):
    # TODO: implement backpropagation here to calculate gradients
    pass

# Initialize the weights and biases
conv1_filters = tf.random.normal((6, 1, 5, 5), 0, 0.1)  # 6 filters of size 5x5 with 1 input channel
conv2_filters = tf.random.normal((16, 6, 5, 5), 0, 0.1)  # 16 filters of size 5x5 with 6 input channels
dense1_weights = tf.random.normal((400, 120), 0, 0.1)  # 120 nodes with 400 input nodes
dense2_weights = tf.random.normal((120, 84), 0, 0.1)  # 84 nodes with 120 input nodes
dense3_weights = tf.random.normal((84, 10), 0, 0.1)  # 10 nodes with 84 input nodes

dense1_biases = np.zeros((120,))
dense2_biases = np.zeros((84,))
dense3_biases = np.zeros((10,))

# convert these numpy arrays back to tensorflow tensors for the computations
conv1_filters = tf.convert_to_tensor(conv1_filters, dtype=tf.float32)
conv2_filters = tf.convert_to_tensor(conv2_filters, dtype=tf.float32)
dense1_weights = tf.convert_to_tensor(dense1_weights, dtype=tf.float32)
dense2_weights = tf.convert_to_tensor(dense2_weights, dtype=tf.float32)
dense3_weights = tf.convert_to_tensor(dense3_weights, dtype=tf.float32)
dense1_biases = tf.convert_to_tensor(dense1_biases, dtype=tf.float32)
dense2_biases = tf.convert_to_tensor(dense2_biases, dtype=tf.float32)
dense3_biases = tf.convert_to_tensor(dense3_biases, dtype=tf.float32)

# Training loop
epochs = 10
pbar = tqdm(total=epochs, desc='Training', unit='epoch', ncols=80)

learning_rate = 0.01
for epoch in range(epochs):
    outputs = forward_propagation(train_images)
    loss = cross_entropy_loss(outputs, train_labels)
    acc = accuracy(outputs, train_labels)
    
    print(f'Epoch {epoch+1}, Loss: {loss}, Accuracy: {acc}')
    # Update progress bar
    pbar.update(1)
    # TODO: Use the gradients to update the weights and biases here

# Close progress bar
pbar.close()



Training:   0%|                                       | 0/10 [01:00<?, ?epoch/s]
