In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(784, 33)
        self.layer2 = nn.Linear(33, 33)
        self.layer3 = nn.Linear(33, 33)
        self.output = nn.Linear(33, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = x.view(-1, 784)
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.relu(self.layer3(x))
        x = self.output(x)
        return x

    def get_intermediate_layers(self, x):
        outputs = []
        x = x.view(-1, 28 * 28)
        x = F.relu(self.layer1(x))
        outputs.append(x.detach())
        x = F.relu(self.layer2(x))
        outputs.append(x.detach())
        x = F.relu(self.layer3(x))
        outputs.append(x.detach())
        return outputs

# Function to load MNIST dataset
def load_mnist(batch_size=200):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (1.0,))
    ])

    train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
    test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

# Set parameters
batch_size = 200
learning_rate = 0.1
epochs = 10

# Load MNIST dataset
train_loader, test_loader = load_mnist(batch_size)

# Initialize and train the MLP
model = MLP()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    model.train()

    for i, (images, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 63098666.33it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 15482578.45it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 18022242.54it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 14756412.68it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [3]:
def extract_features(loader, model):
    features = [[] for _ in range(3)]  # Three sets for three layers
    labels = []
    with torch.no_grad():
        for data, target in loader:
            intermediate_outputs = model.get_intermediate_layers(data)
            for i, output in enumerate(intermediate_outputs):
                features[i].extend(output.numpy())
            labels.extend(target.numpy())
    return features, labels

train_features, train_labels = extract_features(train_loader, model)
test_features, test_labels = extract_features(test_loader, model)

In [25]:
# Logistic regression on intermediate features
for i in range(3):
    # Fit the model
    lr = LogisticRegression(max_iter=1000)
    lr.fit(train_features[i], train_labels)

    # Accuracies
    train_acc = accuracy_score(train_labels, lr.predict(train_features[i]))
    test_acc = accuracy_score(test_labels, lr.predict(test_features[i]))

    print(f"Logistic Regression on Features from Intermediate Layer {i+1} - Train Accuracy: {train_acc:.4f} - Test Accuracy: {test_acc:.4f}")

Logistic Regression on Features from Intermediate Layer 1 - Train Accuracy: 0.9606 - Test Accuracy: 0.9555
Logistic Regression on Features from Intermediate Layer 2 - Train Accuracy: 0.9666 - Test Accuracy: 0.9583
Logistic Regression on Features from Intermediate Layer 3 - Train Accuracy: 0.9668 - Test Accuracy: 0.9596


In [26]:
# Logistic regression on original features
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (1.0,))
])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

original_train_features = train_dataset.data.view(len(train_dataset), -1).numpy()
original_test_features = test_dataset.data.view(len(test_dataset), -1).numpy()
original_train_labels = train_dataset.targets.numpy()
original_test_labels = test_dataset.targets.numpy()

original_lr = LogisticRegression(max_iter=1000)
original_lr.fit(original_train_features, original_train_labels)

original_train_acc = accuracy_score(original_train_labels, original_lr.predict(original_train_features))
original_test_acc = accuracy_score(original_test_labels, original_lr.predict(original_test_features))

print(f"Logistic Regression on Original Features - Train Accuracy: {original_train_acc:.4f} - Test Accuracy: {original_test_acc:.4f}")

Logistic Regression on Original Features - Train Accuracy: 0.9416 - Test Accuracy: 0.9203
