### LeNet5 1998 Implementation

#### 1. Calculating the mean and variance of the MNIST data.

In [5]:
!pip install pyarrow




In [4]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import pandas as pd
from PIL import Image
from data import splits, df_train, df_test

# Create a custom dataset class to handle this data
class CustomMNISTDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        # Get the image data (assuming it's stored in columns 'image')
        image = self.dataframe.iloc[idx]['image']
        label = self.dataframe.iloc[idx]['label']
        
        # Convert the image (which is assumed to be a 28x28 numpy array) to a PIL image
        image = Image.fromarray(image.astype(np.uint8), mode='L')
        
        # Apply transformations if any
        if self.transform:
            image = self.transform(image)
        
        return image, label

# Define the transformation (normalize according to the given formula)
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert to tensor (scales to [0, 1])
    transforms.Lambda(lambda x: x * 1.275 - 0.1)  # Normalize as per the given formula
])

# Create dataset and dataloaders
train_dataset = CustomMNISTDataset(dataframe=df_train, transform=transform)
test_dataset = CustomMNISTDataset(dataframe=df_test, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)

# Check the mean and variance of the transformed data
all_data = []

for images, _ in train_loader:
    all_data.append(images.view(images.size(0), -1))

all_data = torch.cat(all_data, dim=0)
mean = all_data.mean()
variance = all_data.var()

# Mean and Variance to use for MNIST data:
print(f'Mean: {mean.item()}')
print(f'Variance: {variance.item()}')

ImportError: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.

#### 2. Building the LeNet5 1998 model.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
import random
import numpy as np
from collections import defaultdict

random.seed(42)

class LeNet5_S2Layer(nn.Module):
    def __init__(self, num_channels):
        super(LeNet5_S2Layer, self).__init__()
        self.coefficient = nn.Parameter(torch.ones(num_channels))
        self.bias = nn.Parameter(torch.zeros(num_channels))

    def forward(self, x):
        pooled = nn.functional.avg_pool2d(x, kernel_size=2, stride=2)
        pooled = pooled * self.coefficient.view(1, -1, 1, 1)
        pooled = pooled + self.bias.view(1, -1, 1, 1)
        return torch.sigmoid(pooled)

class ScaledTanh(nn.Module):
    def forward(self, x):
        return 1.7159 * torch.tanh(x * 2 / 3)

class SquashingFunction(nn.Module):
    def __init__(self, A=1.7159, S=2/3):
        super(SquashingFunction, self).__init__()
        self.A = A
        self.S = S

    def forward(self, x):
        x = self.S * x
        x = torch.clamp(x, min=-0.999, max=0.999)  # avoid NaNs from atanh
        return self.A * torch.atanh(x)

class C3PartialConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, connection_table):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.connection_table = connection_table

        # All connections are initialized, then masked
        self.weight = nn.Parameter(torch.zeros(out_channels, in_channels, kernel_size, kernel_size))
        self.bias = nn.Parameter(torch.zeros(out_channels))
        self.mask = torch.zeros_like(self.weight)

        # Build the binary mask
        for out_c, in_list in enumerate(connection_table):
            for in_c in in_list:
                self.mask[out_c, in_c, :, :] = 1.0

        # Initialize only the allowed weights
        self.reset_parameters()

    def reset_parameters(self):
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
        nn.init.uniform_(self.weight, -2.4 / fan_in, 2.4 / fan_in)
        self.bias.data.fill_(2.4 / fan_in)

    def forward(self, x):
        # Apply mask before convolution to zero-out unwanted connections
        masked_weight = self.weight * self.mask.to(self.weight.device)
        return F.conv2d(x, masked_weight, self.bias, stride=1)

class DigitDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        # Loop through all class labels (0-9)
        for label in range(10):
            class_dir = os.path.join(image_dir, str(label))
            for image_name in os.listdir(class_dir):
                image_path = os.path.join(class_dir, image_name)
                self.image_paths.append(image_path)
                self.labels.append(label)  # Assign the class label (folder name)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]

        # Load image
        image = Image.open(img_path).convert('L')  # Convert to grayscale ('L' mode)

        # Apply transformation (resize, tensor conversion, normalization)
        if self.transform:
            image = self.transform(image)

        return image, label
        
class EuclideanRBFOutput(nn.Module):
    def __init__(self, num_classes=10, input_dim=84):
        super(EuclideanRBFOutput, self).__init__()
        self.centers = nn.Parameter(torch.randn(num_classes, input_dim))  # trainable prototypes

    def forward(self, x):
        x = x.unsqueeze(1)  # (batch_size, 1, input_dim)
        centers = self.centers.unsqueeze(0)  # (1, num_classes, input_dim)
        distances = torch.sum((x - centers) ** 2, dim=2)  # (batch_size, num_classes)
        return -distances  # negative distances for CrossEntropyLoss

class LeNet5(nn.Module):
    def __init__(self, num_classes):
        super(LeNet5, self).__init__()

        # C1 and S2
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0),
            ScaledTanh(),
            LeNet5_S2Layer(6)
        )

        # C3 and S4
        self.layer2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0),
            ScaledTanh(),
            LeNet5_S2Layer(16)
        )

        # C5
        self.layer3 = C3PartialConv(in_channels=16, out_channels=120, kernel_size=5, connection_table=[random.sample(range(16), 5) for _ in range(120)])

        # F6
        self.fc = nn.Linear(120, 84)
        self.squashing = SquashingFunction(A=1.0, S=1.0)

        # RBF Output layer
        self.rbf_output = EuclideanRBFOutput(num_classes=num_classes, input_dim=84)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        out = self.squashing(out)
        out = self.rbf_output(out)
        return out

    def extract_features(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = self.squashing(x)
        return x

def extract_features_from_digit_dataset(model, digit_loader, device):
    model.eval()
    features_by_class = defaultdict(list)
    
    with torch.no_grad():
        for images, labels in digit_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            # Extract features from penultimate layer
            feats = model.extract_features(images)
            
            for f, label in zip(feats, labels):
                features_by_class[label.item()].append(f.cpu().numpy())
    
    return features_by_class

def compute_rbf_centers(features_by_class, num_classes, input_dim):
    centers = np.zeros((num_classes, input_dim), dtype=np.float32)
    
    for cls in range(num_classes):
        class_feats = np.stack(features_by_class[cls])
        centers[cls] = np.mean(class_feats, axis=0)
    
    return torch.tensor(centers, dtype=torch.float32)
    
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor()  # Output: (1, 32, 32)
])

#### 3. Loading in the MNIST data and splitting into training and testing data sets.

In [None]:
# Training and testing the model on MNIST data:

# Define relevant variables
batch_size = 64
num_classes = 10
learning_rate = 0.001
num_epochs = 10

# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Loading the dataset and preprocessing
train_dataset = torchvision.datasets.MNIST(
    root = './data',
    train = True,
    transform = transforms.Compose([
            transforms.Resize((32,32)),
            transforms.ToTensor(),
            transforms.Normalize(mean = (0.06659211218357086,), std = (0.15432125329971313,))]),
    download = True
)


test_dataset = torchvision.datasets.MNIST(
    root = './data',
    train = False,
    transform = transforms.Compose([
            transforms.Resize((32,32)),
            transforms.ToTensor(),
            transforms.Normalize(mean = (0.06659211218357086,), std = (0.15432125329971313,))]),
    download = False
)


train_loader = torch.utils.data.DataLoader(
    dataset = train_dataset,
    batch_size = batch_size,
    shuffle = False
)


test_loader = torch.utils.data.DataLoader(
    dataset = test_dataset,
    batch_size = batch_size,
    shuffle = False
)

print("data loaded")

In [None]:
model = LeNet5(num_classes).to(device)

# Assuming your images are in 'digit_images/' and labels are stored in a separate file
image_dir = './digits updated/'

digit_dataset = DigitDataset(image_dir=image_dir, transform=transform)
digit_loader = DataLoader(digit_dataset, batch_size=32, shuffle=False)

# Assuming you've loaded DIGIT dataset and computed features
digit_loader = DataLoader(digit_dataset, batch_size=32)
features_by_class = extract_features_from_digit_dataset(model, digit_loader, device)

# Compute RBF centers using the features from DIGIT
rbf_centers = compute_rbf_centers(features_by_class, num_classes=10, input_dim=84)

# Set the computed centers into the model
model.rbf_output.centers.data = rbf_centers.to(device)

#### 4. Training the model on MNIST data subset.

In [None]:
# Training the model on MNIST data:

features_by_class = extract_features_from_digit_dataset(model, digit_loader, device)

# Initialize RBF centers from class means
rbf_centers = compute_rbf_centers(features_by_class, num_classes=10, input_dim=84)
model.rbf_output.centers.data = rbf_centers.to(device)

cost = nn.CrossEntropyLoss()

# Setting the optimizer with the model parameters and learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

total_step = len(train_loader)

for epoch in range(num_epochs):
    model.train()
    correct = 0
    total = 0
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = cost(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Tracking accuracy
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        if (i+1) % 400 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    accuracy = 100 * correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Training Accuracy: {accuracy:.2f}%')

#### 5. Testing the model on MNIST data subset produces ~97% accuracy.

In [None]:
# Testing the model on MNIST data:

model.eval()  # Set the model to evaluation mode

with torch.no_grad():
    correct = 0
    total = 0

    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy of the network on the 10000 test images: {accuracy:.2f} %')