In [1]:
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

import matplotlib.pyplot as plt
import numpy as np

In [2]:
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor() # convertit les images [H,W,C] dont les valeurs sont dans [0,255] en tenseurs [C,H,W] dont les valeurs sont dans [0,1]
)

test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

In [12]:
# Set the hyperparameters
input_dim = 784
num_classes = 10
num_centers = 100
num_epochs = 10
batch_size = 100
learning_rate = 1e-3

epsilon = 0.3
device = torch.device('cpu')

In [8]:

# There we create the class for our Shallow RBF network

class ShallowRBF(nn.Module):
    def __init__(self, input_dim, num_classes, num_centers):
        super(ShallowRBF, self).__init__()
        self.centers = nn.Parameter(torch.randn(num_centers, input_dim))
        self.beta = nn.Parameter(torch.ones(num_centers))
        self.fc = nn.Linear(num_centers, num_classes)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        # Calculate the RBF activations
        rbf_activations = torch.exp(-self.beta * torch.norm(x.unsqueeze(1) - self.centers, dim=2))

        # Normalize the RBF activations
        rbf_activations = rbf_activations / torch.sum(rbf_activations, dim=1, keepdim=True)

        # Pass the normalized RBF activations through the linear layer
        output = self.fc(rbf_activations)

        return output


# Initialize the model
model = ShallowRBF(input_dim, num_classes, num_centers)


In [13]:
criterion = nn.CrossEntropyLoss()
sgd = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True)


train_loader = torch.utils.data.DataLoader(training_data, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=False)

In [14]:

# Boucle d'entraînement
for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        sgd.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        sgd.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))



In [15]:
b_size = 5
data = test_loader.dataset.data[0:b_size].data.unsqueeze(1).float()
target = test_loader.dataset.targets[0:b_size]
pred = model(data)
print(pred.argmax(dim=1, keepdim=False))
print(target)

tensor([0, 0, 0, 0, 0])
tensor([7, 2, 1, 0, 4])


In [16]:
test_loss = 0
correct = 0
# pour ne pas calculer les gradients (gain de temps et de mémoire)
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += criterion(output, target).item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.1f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))


Test set: Average loss: 0.0357, Accuracy: 1305/10000 (13.1%)

