This notebook is providing a minimum code for running MNIST classification with a CCP model (i.e. a polynomial expansion without activation functions) in *PyTorch*. 


*Details*: The model implements a fourth-degree polynomial expansion (and in particular the [CCP model](https://github.com/grigorisg9gr/polynomial_nets) from the $\Pi$-Nets), using a hidden dimension of 16. The network is not optimized for performance reasons, but simply to introduce you to the concept of polynomial nets in PyTorch (verified with PyTorch v.1.13). 

For implementations that obtain state-of-the-art code with polynomial nets, please visit other respositories, such as the https://github.com/grigorisg9gr/polynomial_nets

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
print(torch.__version__)

In [None]:
class Net(nn.Module):
    def __init__(self, hidden_size=16, image_size=28, channels_in=1, n_degree=4, bias=True, n_classes=10):
        """
        This is the initialization function of the network, which in this case is a polynomial network.
        The implementation here relies on the CCP model of $\Pi$-nets. 
        """
        super(Net, self).__init__()
        self.image_size = image_size
        self.channels_in = channels_in
        self.total_image_size = self.image_size * self.image_size * channels_in
        self.hidden_size = hidden_size
        self.n_classes = n_classes
        self.n_degree = n_degree
        for i in range(1, self.n_degree + 1):
            setattr(self, 'U{}'.format(i), nn.Linear(self.total_image_size, self.hidden_size, bias=bias))
        self.C = nn.Linear(self.hidden_size, self.n_classes, bias=True)

    def forward(self, z):
        h = z.view(-1, self.total_image_size)
        out = self.U1(h)
        for i in range(2, self.n_degree + 1):
            out = getattr(self, 'U{}'.format(i))(h) * out + out
        out = self.C(out)
        return out

In [None]:
# Set device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Initialize model and move to device
model = Net().to(device)

# Set loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Load MNIST dataset and apply transformations
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])
train_dataset = MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = MNIST(root='./data', train=False, download=True, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

# Train model
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # Get inputs and labels
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Print statistics
    # Print statistics
    running_loss += loss.item()
    if i % 400 == 399:
        print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 1000:.3f}')
        running_loss = 0.0

print('Finished Training')

# Test model
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {100 * correct / total:.2f}%')