In [29]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [30]:
#data load
train_data=np.loadtxt("../dataset/mnist/mnist_train.csv",delimiter=",",skiprows=1)
test_data=np.loadtxt("../dataset/mnist/mnist_test.csv",delimiter=",",skiprows=1)


In [31]:
train_labels = train_data[:, 0]
train_images = train_data[:, 1:]
test_images = test_data

train_images = train_images.reshape(-1, 28, 28, 1) / 255.0
# test_images = test_images.reshape(-1, 28, 28, 1) / 255.0

In [32]:
# CNN architecture
class CNN:
    def __init__(self):
        self.conv1 = ConvolutionalLayer(num_filters=32, filter_size=5)
        self.pool1 = MaxPoolingLayer(pool_size=2)
        self.conv2 = ConvolutionalLayer(num_filters=64, filter_size=5)
        self.pool2 = MaxPoolingLayer(pool_size=2)
        self.fc1 = FullyConnectedLayer(input_size=64 * 7 * 7, output_size=1024)
        self.fc2 = FullyConnectedLayer(input_size=1024, output_size=10)

    def forward(self, input_data):
        conv1_output = self.conv1.apply(input_data)
        pool1_output = self.pool1.apply(conv1_output)
        conv2_output = self.conv2.apply(pool1_output)
        pool2_output = self.pool2.apply(conv2_output)
        fc_input = pool2_output.reshape(-1, 64 * 7 * 7)
        fc1_output = self.fc1.apply(fc_input)
        self.output = self.fc2.apply(fc1_output)

    def backward(self, labels):
        d_output = self.output
        d_output[range(len(labels)), labels] -= 1
        d_fc2 = self.fc2.backward(d_output)
        d_fc1 = self.fc1.backward(d_fc2)
        d_fc1 = d_fc1.reshape(-1, 7, 7, 64)
        d_pool2 = self.pool2.backward(d_fc1)
        d_conv2 = self.conv2.backward(d_pool2)
        d_pool1 = self.pool1.backward(d_conv2)
        d_conv1 = self.conv1.backward(d_pool1)

    def update_params(self, learning_rate):
        self.fc2.update_params(learning_rate)
        self.fc1.update_params(learning_rate)
        self.conv2.update_params(learning_rate)
        self.conv1.update_params(learning_rate)


In [43]:
class ConvolutionalLayer:
    def __init__(self, num_filters, filter_size):
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.filters = np.random.randn(num_filters, filter_size, filter_size) / np.sqrt(filter_size * filter_size)

    def apply(self, image):
        self.last_input = image
        image_height, image_width, num_channels = image.shape  # Use three values from image.shape
        output = np.zeros((image_height - self.filter_size + 1, image_width - self.filter_size + 1, self.num_filters))
        for f in range(self.num_filters):
            for row in range(image_height - self.filter_size + 1):
                for col in range(image_width - self.filter_size + 1):
                    output[row, col, f] = np.sum(image[row:row + self.filter_size, col:col + self.filter_size] * self.filters[f])
        return output
        
    def backward(self, d_output):
        d_filters = np.zeros_like(self.filters)
        image_height, image_width = self.last_input.shape
        for f in range(self.num_filters):
            for row in range(image_height - self.filter_size + 1):
                for col in range(image_width - self.filter_size + 1):
                    d_filters[f] += d_output[row, col, f] * self.last_input[row:row + self.filter_size, col:col + self.filter_size]
        self.filters -= learning_rate * d_filters
        return np.zeros_like(self.last_input)

    def update_params(self, learning_rate):
        pass  # No parameters to update

In [44]:
class MaxPoolingLayer:
    def __init__(self, pool_size):
        self.pool_size = pool_size

    def apply(self, image):
        self.last_input = image
        image_height, image_width, num_channels = image.shape
        new_height = image_height // self.pool_size
        new_width = image_width // self.pool_size
        output = np.zeros((new_height, new_width, num_channels))
        for ch in range(num_channels):
            for row in range(new_height):
                for col in range(new_width):
                    output[row, col, ch] = np.max(image[row * self.pool_size:(row + 1) * self.pool_size,
                                                       col * self.pool_size:(col + 1) * self.pool_size, ch])
        return output

    def backward(self, d_output):
        return np.repeat(np.repeat(d_output, self.pool_size, axis=0), self.pool_size, axis=1)

In [45]:
class FullyConnectedLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size) * np.sqrt(2 / (input_size + output_size))
        self.biases = np.zeros(output_size)

    def apply(self, input_data):
        self.last_input = input_data
        return np.dot(self.weights, input_data) + self.biases

    def backward(self, d_output):
        d_weights = np.outer(d_output, self.last_input)
        d_input = np.dot(self.weights.T, d_output)
        self.weights -= learning_rate * d_weights
        self.biases -= learning_rate * d_output
        return d_input

    def update_params(self, learning_rate):
        pass  # Biases are updated during backward pass

In [46]:
# Training loop
def train_cnn(cnn, train_images, train_labels, epochs, learning_rate):
    for epoch in range(epochs):
        for i in range(0, len(train_images), batch_size):
            batch_images = train_images[i:i+batch_size]
            batch_labels = train_labels[i:i+batch_size]
            cnn.forward(batch_images)
            cnn.backward(batch_labels)
            cnn.update_params(learning_rate)
            loss = calculate_loss(cnn.output, batch_labels)
            print(f"Epoch {epoch+1}, Batch {i//batch_size+1}: Loss = {loss:.4f}")


In [47]:
# Evaluation
def evaluate_cnn(cnn, test_images, test_labels):
    correct = 0
    for i in range(len(test_images)):
        cnn.forward(test_images[i:i+1])
        prediction = np.argmax(cnn.output)
        if prediction == test_labels[i]:
            correct += 1
    accuracy = correct / len(test_images)
    print(f"Test Accuracy: {accuracy:.4f}")

In [48]:
# Calculate cross-entropy loss
def calculate_loss(predictions, labels):
    probs = np.exp(predictions - np.max(predictions, axis=1, keepdims=True))
    probs /= np.sum(probs, axis=1, keepdims=True)
    correct_logprobs = -np.log(probs[range(len(labels)), labels])
    loss = np.sum(correct_logprobs) / len(labels)
    return loss


In [49]:
# Hyperparameters
batch_size = 64
learning_rate = 0.001
epochs = 10


In [50]:
# Create and train the CNN
cnn = CNN()
train_cnn(cnn, train_images, train_labels, epochs, learning_rate)

ValueError: too many values to unpack (expected 3)

In [None]:
# Evaluate the CNN
evaluate_cnn(cnn, test_images, test_labels)