### 1. LeNet5 Model

In [98]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import cv2
import numpy as np

class ScaledTanh(nn.Module):
    def forward(self, x):
        return 1.7159 * torch.tanh(x * 2 / 3)
    
class LeNet5(nn.Module):
    def __init__(self, num_classes=10):
        super(LeNet5, self).__init__()
        self.tanh = ScaledTanh()

        # C1
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1)
        
        # S2
        self.weight2 = nn.Parameter(torch.ones(1, 6, 1, 1))
        self.bias2 = nn.Parameter(torch.zeros(1, 6, 1, 1))
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight2)
        nn.init.uniform_(self.weight2, -2.4 / fan_in, 2.4 / fan_in)
        self.bias2.data.fill_(2.4 / fan_in)

        # C3
        self.weight3 = nn.Parameter(torch.Tensor(16, 6, 5, 5))
        self.bias3 = nn.Parameter(torch.Tensor(1, 16, 1, 1))  # Shape [16] instead of [1, 16, 1, 1]
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight3)
        nn.init.uniform_(self.weight3, -2.4 / fan_in, 2.4 / fan_in)
        self.bias3.data.fill_(2.4 / fan_in)

        # S4
        self.weight4 = nn.Parameter(torch.ones(1, 16, 1, 1))
        self.bias4 = nn.Parameter(torch.zeros(1, 16, 1, 1))
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight4)
        nn.init.uniform_(self.weight4, -2.4 / fan_in, 2.4 / fan_in)
        self.bias4.data.fill_(2.4 / fan_in)

        # C5
        self.conv5 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1)

        # F6
        self.fc6 = nn.Linear(120, 84)

        # Output Layer
        self.prototypes = self.compute_rbf_prototypes()

    def connection_table(self):
        return [
            [0, 1, 2],
            [1, 2, 3],
            [2, 3, 4],
            [3, 4, 5],
            [0, 4, 5],
            [0, 1, 5],
            [0, 1, 2, 3],
            [1, 2, 3, 4],
            [2, 3, 4, 5],
            [0, 3, 4, 5],
            [0, 1, 4, 5],
            [0, 1, 2, 5],
            [0, 1, 3, 4],
            [1, 2, 4, 5],
            [1, 2, 3, 5],
            [0, 1, 2, 3, 4, 5]
        ]

    def compute_rbf_prototypes(self):
        prototypes = []

        image_folder = './digits updated/'
        images = []
        labels = []
        bitmap_size = (7,12)
        num_classes = 10

        for label in range(num_classes):
            class_folder = os.path.join(image_folder, str(label))
            for img_name in os.listdir(class_folder):
                img_path = os.path.join(class_folder, img_name)
                labels.append(label)
                image = cv2.imread(img_path, 0)
                if image is not None:
                    image = cv2.resize(image, bitmap_size)
                    images.append(image)
            if images:
                mean_image = np.mean(images, axis=0)
                # mean_image = cv2.threshold(mean_image, 127, 1, cv2.THRESH_BINARY)[1].astype(np.int16) * -1 + 1 # black = 0, white = 1 for loss later
                prototypes.append(mean_image.flatten())

        prototypes_arr = np.array(prototypes)
        return torch.tensor(prototypes_arr, dtype=torch.float32)

    def compute_rbf_distance(self, x):
        x_expanded = x.unsqueeze(1).expand((x.size(0), self.prototypes.size(0), self.prototypes.size(1)))  
        params_expanded = self.prototypes.unsqueeze(0).expand((x.size(0), self.prototypes.size(0), self.prototypes.size(1)))         
        output = (x_expanded - params_expanded).pow(2).sum(-1)
        return output    

    def forward(self, x):
        # C1
        x = self.conv1(x)
        x = self.tanh(x)

        # S2
        x = F.avg_pool2d(x, kernel_size=2, stride=2) * self.weight2.view(1, -1, 1, 1) + self.bias2.view(1, -1, 1, 1)
        x = self.tanh(x)

        # C3
        batch_size = x.size(0)
        output = torch.zeros(batch_size, 16, x.size(2) - 5 + 1, x.size(3) - 5 + 1).to(x.device)
        for i in range(16):  # For each output channel
            connected_channels = self.connection_table()[i]
            for j, input_channel in enumerate(connected_channels): # input channels 0-5
                input_slice = x[:, input_channel, :, :].unsqueeze(1)  # Select the input channel and add batch dimension
                
                # Create the weight tensor for the convolution (shape: [1, 1, 5, 5])
                weight = self.weight3[i, j, :, :].unsqueeze(0).unsqueeze(0)
                
                # Perform convolution (output will have shape [batch_size, 1, height, width])
                conv_output = F.conv2d(input_slice, weight)
                
                # print(conv_output.shape)
                
                # Accumulate results in the correct output channel
                output[:, i:i+1, :, :] += conv_output
                # print(output.shape)

        bias = self.bias3.view(16)  # shape: [16]
        for i in range(16):
            output[:, i:i+1, :, :] += bias[i]
        # print(output.shape)
        x = output
        x = self.tanh(x)

        # S4
        x = F.avg_pool2d(x, kernel_size=2, stride=2) * self.weight4.view(1, -1, 1, 1) + self.bias4.view(1, -1, 1, 1)
        x = self.tanh(x)

        # C5
        x = self.conv5(x)
        x = self.tanh(x)

        # F6
        x = x.view(x.size(0), -1)
        x = self.fc6(x)

        # Output Layer
        x = self.compute_rbf_distance(x)
        return x

### 2. Load Train and Test Data

In [97]:
from torch.utils.data import TensorDataset, DataLoader
from torchvision import transforms
from PIL import Image
train_image_folder = './data/train/'
test_image_folder = './data/test/'
train_label_file = './data/train_label.txt'
test_label_file = './data/test_label.txt'

train_images = []
train_labels = []
test_images = []
test_labels = []

transform = transforms.Compose([
    transforms.Resize((32, 32)),  # Resize images to 32x32
    transforms.PILToTensor(),
    transforms.ConvertImageDtype(torch.float)
])

with open(train_label_file, 'r') as f:
    label_lines = f.readlines()

    for idx, img_name in enumerate(os.listdir(train_image_folder)):
        img_path = os.path.join(train_image_folder, img_name)
        img = cv2.imread(img_path, 0)
        if img is not None:
            image = Image.fromarray(img)
            image = transform(image)
            train_images.append(image)
            label = int(label_lines[idx].strip())
            train_labels.append(label)

with open(test_label_file, 'r') as f:
    label_lines = f.readlines()

    for idx, img_name in enumerate(os.listdir(test_image_folder)):
        img_path = os.path.join(test_image_folder, img_name)
        img = cv2.imread(img_path, 0)
        if img is not None:
            image = Image.fromarray(img)
            image = transform(image)
            test_images.append(image)
            label = int(label_lines[idx].strip())
            test_labels.append(label)

train_images = torch.stack(train_images)
test_images = torch.stack(test_images)
train_labels = torch.tensor(train_labels, dtype=torch.long)
test_labels = torch.tensor(test_labels, dtype=torch.long)

train_loader = DataLoader(TensorDataset(train_images, train_labels), batch_size=1, shuffle=True)
test_loader = DataLoader(TensorDataset(test_images, test_labels), batch_size=1, shuffle=True)

### 3. Train the Model

In [None]:
def customLoss(outputs, labels):
    # predicted_class = torch.argmax(outputs, dim=1)  # Get the predicted class (32,)
    # # correct_predictions = (predicted_class == labels)  # Tensor of booleans (32,)
    # # print("predicted class:", predicted_class, "labels:", labels)
    # loss = outputs[labels==predicted_class] # .pow(2).sum() # correct classes
    # # print("loss1:", loss)
    # loss += torch.log(np.exp(-0.1) + torch.exp(-outputs[labels!=predicted_class].sum())) # incorrect classes
    # # print("loss2:", loss)
    # loss /= 10 # normalize by number of classes
    # # print("loss3:", loss)
    # return loss

In [108]:
# Define relevant variables
batch_size = 64
num_classes = 10
learning_rate = 0.001
num_epochs = 10

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LeNet5(num_classes=num_classes).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

total_step = len(train_loader)
print(f'Total steps: {total_step}')
for epoch in range(num_epochs):
    model.train()
    correct = 0
    total = 0
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = customLoss(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Tracking accuracy
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        if (i+1) % 200 == 0:
            accuracy = 100 * correct / total
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Training Accuracy: {:.2f}%'.format(epoch+1, num_epochs, i+1, total_step, loss.item(), accuracy))

    accuracy = 100 * correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Training Accuracy: {accuracy:.2f}%')

Total steps: 60000
predicted class: tensor([1]) labels: tensor([7])
loss1: tensor(0., grad_fn=<SumBackward0>)
loss2: tensor(-0.1000, grad_fn=<AddBackward0>)
loss3: tensor(-0.0100, grad_fn=<DivBackward0>)
predicted class: tensor([1]) labels: tensor([7])
loss1: tensor(0., grad_fn=<SumBackward0>)
loss2: tensor(-0.1000, grad_fn=<AddBackward0>)
loss3: tensor(-0.0100, grad_fn=<DivBackward0>)
predicted class: tensor([1]) labels: tensor([2])
loss1: tensor(0., grad_fn=<SumBackward0>)
loss2: tensor(-0.1000, grad_fn=<AddBackward0>)
loss3: tensor(-0.0100, grad_fn=<DivBackward0>)
predicted class: tensor([1]) labels: tensor([0])
loss1: tensor(0., grad_fn=<SumBackward0>)
loss2: tensor(-0.1000, grad_fn=<AddBackward0>)
loss3: tensor(-0.0100, grad_fn=<DivBackward0>)
predicted class: tensor([1]) labels: tensor([2])
loss1: tensor(0., grad_fn=<SumBackward0>)
loss2: tensor(-0.1000, grad_fn=<AddBackward0>)
loss3: tensor(-0.0100, grad_fn=<DivBackward0>)
predicted class: tensor([1]) labels: tensor([6])
loss1: 

KeyboardInterrupt: 