In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Define the LeNet-5 Architecture
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)  # C1 layer
        self.pool = nn.AvgPool2d(2, 2)  # S2 and S4 layers (average pooling)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)  # C3 layer
        self.fc1 = nn.Linear(16 * 4 * 4, 120)  # C5 layer (flattened to 120 units)
        self.fc2 = nn.Linear(120, 84)  # F6 layer
        self.fc3 = nn.Linear(84, 10)  # Output layer (10 classes for USPS)

    def forward(self, x):
        x = torch.relu(self.conv1(x))  # Apply ReLU activation
        x = self.pool(x)  # Apply average pooling
        x = torch.relu(self.conv2(x))  # Apply ReLU activation
        x = self.pool(x)  # Apply average pooling
        x = x.view(-1, 16 * 4 * 4)  # Flatten the tensor
        x = torch.relu(self.fc1(x))  # Fully connected layer 1
        x = torch.relu(self.fc2(x))  # Fully connected layer 2
        x = self.fc3(x)  # Output layer
        return x

In [None]:
# Step 1: Train on MNIST Dataset

# Transformations for the MNIST dataset (normalization to [-1, 1])
transform_mnist = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
])

# Load MNIST dataset (train and test sets)
train_dataset_mnist = datasets.MNIST(root='./data', train=True, download=True, transform=transform_mnist)
test_dataset_mnist = datasets.MNIST(root='./data', train=False, download=True, transform=transform_mnist)

train_loader_mnist = DataLoader(train_dataset_mnist, batch_size=64, shuffle=True)
test_loader_mnist = DataLoader(test_dataset_mnist, batch_size=64, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 10.5MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 352kB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 3.21MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 3.12MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [None]:
# Initialize the LeNet-5 model, loss function, and optimizer
model = LeNet5()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training loop for MNIST
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader_mnist:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader_mnist)}")


Epoch 1/5, Loss: 0.3523835573198476
Epoch 2/5, Loss: 0.09310219040575392
Epoch 3/5, Loss: 0.06368390035464057
Epoch 4/5, Loss: 0.05181862984343108
Epoch 5/5, Loss: 0.04157344125737962


In [None]:
# Test the model on MNIST (just to check the initial performance)
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader_mnist:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Model accuracy on MNIST test set: {100 * correct / total:.2f}%')


Model accuracy on MNIST test set: 98.80%


In [None]:
# Fetch the USPS dataset
usps = fetch_openml('usps', version=1)

# USPS dataset contains 9298 samples, each with 256 features (16x16 pixels)
# Reshape the data to have 1 channel (grayscale) and shape (16x16 pixels)
usps_data = np.array(usps.data).reshape(-1, 1, 16, 16)  # (samples, 1 channel, 16x16 pixels)
usps_labels = np.array(usps.target, dtype=int)


  warn(


In [None]:
usps_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
])

In [None]:
# Custom Dataset for USPS
class USPSDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        image = image.float()  # Ensure the image tensor is of type float32
        print(f"Image shape: {image.shape}")
        return image, label


In [None]:
# Create the USPS dataset
usps_dataset = USPSDataset(usps_data, usps_labels, transform=usps_transform)

# Split the USPS dataset into 40% for training and 60% for testing
train_size = int(0.4 * len(usps_dataset))
test_size = len(usps_dataset) - train_size
train_dataset_usps, test_dataset_usps = random_split(usps_dataset, [train_size, test_size])


In [None]:
# Create DataLoaders for USPS
train_loader_usps = DataLoader(train_dataset_usps, batch_size=64, shuffle=True)
print(train_loader_usps.dataset)
test_loader_usps = DataLoader(test_dataset_usps, batch_size=64, shuffle=False)


<torch.utils.data.dataset.Subset object at 0x7b4718723340>


In [None]:
# Fine-tuning loop on USPS (40%)
num_epochs_finetune = 5
for epoch in range(num_epochs_finetune):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader_usps:
        optimizer.zero_grad()
        # outputs = model(inputs)
        # loss = criterion(outputs, labels)
        # loss.backward()
        # optimizer.step()
        # running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs_finetune}, Loss: {running_loss/len(train_loader_usps)}")



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shape: torch.Size([16, 1, 16])
Image shap

In [None]:

# Test the model on the remaining 60% of USPS dataset
model.eval()
predictions = []
true_labels = []
with torch.no_grad():
    for images, labels in test_loader_usps:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.numpy())
        true_labels.extend(labels.numpy())


In [None]:
# Generate the confusion matrix
cm = confusion_matrix(true_labels, predictions)

# Step 4: Plot the confusion matrix using seaborn
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=[str(i) for i in range(10)], yticklabels=[str(i) for i in range(10)])
plt.title('Confusion Matrix for USPS Dataset (Fine-tuned LeNet-5 Model)')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()