In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

# Load and preprocess the MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
mnist_train = datasets.MNIST('data/', train=True, download=True, transform=transform)
mnist_test = datasets.MNIST('data/', train=False, download=True, transform=transform)

# Randomly select 40k samples from the training set
indices = torch.randperm(len(mnist_train))[:40000]
train_data = torch.utils.data.Subset(mnist_train, indices)

# Define the RFF model
class RFFModel(nn.Module):
    def __init__(self, D, K):
        super(RFFModel, self).__init__()
        self.W = nn.Parameter(torch.randn(D, K) * torch.sqrt(torch.tensor(2.0 / D)))
        self.b = nn.Parameter(torch.zeros(K))
        self.W2 = nn.Parameter(torch.randn(K, 10) * torch.sqrt(torch.tensor(2.0 / K)))
        self.b2 = nn.Parameter(torch.zeros(10))

    def forward(self, x):
        x = x.view(-1, 28*28)
        z = torch.cos(2 * torch.pi * x @ self.W.t() + self.b)
        z = z.mean(dim=0, keepdim=True)
        y = z @ self.W2 + self.b2
        return y

# Set the learning rate
lr = 0.01

# Train the model with increasing number of parameters
n_params_list = [1000, 2000, 5000, 10000, 20000, 50000]
train_errors = []
test_errors = []

for n_params in n_params_list:
    model = RFFModel(28*28, n_params)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    criterion = nn.CrossEntropyLoss()

    train_error = 0.0
    test_error = 0.0

    for epoch in range(100):
        model.train()
        for i, (inputs, labels) in enumerate(torch.utils.data.DataLoader(train_data, batch_size=100, shuffle=True)):
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_error += loss.item()

        model.eval()
        with torch.no_grad():
            for inputs, labels in torch.utils.data.DataLoader(mnist_test, batch_size=100, shuffle=False):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                test_error += loss.item()

    train_errors.append(train_error / len(train_data))
    test_errors.append(test_error / len(mnist_test))

# Plot the training and test error as a function of number of parameters
plt.semilogx(n_params_list, train_errors, label='Train error')
plt.semilogx(n_params_list, test_errors, label='Test error')
plt.xlabel('Number of parameters')
plt.ylabel('Error')
plt.legend()
plt.show()


RuntimeError: mat1 and mat2 shapes cannot be multiplied (100x784 and 1000x784)

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim

In [None]:
# Load MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_data = torchvision.datasets.MNIST('data/', train=True, download=True, transform=transform)
test_data = torchvision.datasets.MNIST('data/', train=False, transform=transform)

# Randomly select 40k samples from the training data
indices = torch.randperm(len(train_data))[:40000]
train_data = torch.utils.data.Subset(train_data, indices)

In [None]:
# Define the RFF model
class RFFModel(nn.Module):
    def __init__(self, n_params):
        super(RFFModel, self).__init__()
        self.W = nn.Parameter(torch.randn(784, n_params) * torch.sqrt(torch.tensor(2 / 784)))
        self.b = nn.Parameter(torch.zeros(n_params))
        self.W2 = nn.Parameter(torch.randn(n_params, 10) * torch.sqrt(torch.tensor(2 / n_params)))
        self.b2 = nn.Parameter(torch.zeros(10))

    def forward(self, x):
        x = x.view(-1, 28*28)
        z = torch.cos(2 * torch.pi * x @ self.W + self.b)
        z = z.mean(dim=0, keepdim=True)
        y = z @ self.W2 + self.b2
        return y

In [18]:
# Hyperparameters
lr = 0.1
momentum = 0.9
n_params_list = [200, 500, 1000, 2000, 5000, 10000]
train_errors = []
test_errors = []

for n_params in n_params_list:
    model = RFFModel(n_params)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    criterion = nn.CrossEntropyLoss()

    # Train the model
    model.train()
    for epoch in range(100):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(torch.utils.data.DataLoader(train_data, shuffle=True)):
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        train_errors.append(running_loss / (i+1))

    # Test the model
    model.eval()
    correct = 0
    with torch.no_grad():
        for inputs, labels in torch.utils.data.DataLoader(test_data, shuffle=False):
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()
    test_errors.append(1 - correct / len(test_data))

# Plot the results
import matplotlib.pyplot as plt
plt.plot(n_params_list, train_errors, label='Train')
plt.plot(n_params_list, test_errors, label='Test')
plt.xscale('log')
plt.xlabel('Number of parameters')
plt.ylabel('Error')
plt.legend()
plt.show()

In [15]:
train_data.dataset.data.shape

torch.Size([60000, 28, 28])