In [1]:
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

import matplotlib.pyplot as plt
import numpy as np

In [2]:
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor() # convertit les images [H,W,C] dont les valeurs sont dans [0,255] en tenseurs [C,H,W] dont les valeurs sont dans [0,1]
)

test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:05<00:00, 1965315.42it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 2418215.99it/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:02<00:00, 822281.20it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 646087.25it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw






In [10]:

# There we create the class for our Shallow RBF network

class ShallowRBF(nn.Module):
    def __init__(self, input_dim, num_classes, num_centers):
        super(ShallowRBF, self).__init__()
        self.centers = nn.Parameter(torch.randn(num_centers, input_dim))
        self.beta = nn.Parameter(torch.ones(num_centers))
        self.fc = nn.Linear(num_centers, num_classes)

    def forward(self, x):
        # Calculate the RBF activations
        rbf_activations = torch.exp(-self.beta * torch.norm(x.unsqueeze(1) - self.centers, dim=2))

        # Normalize the RBF activations
        rbf_activations = rbf_activations / torch.sum(rbf_activations, dim=1, keepdim=True)

        # Pass the normalized RBF activations through the linear layer
        output = self.fc(rbf_activations)

        return output


# Initialize the model
model = ShallowRBF(input_dim, num_classes, num_centers)


In [9]:
# Set the hyperparameters
input_dim = 784
num_classes = 10
num_centers = 100
num_epochs = 10
batch_size = 100
learning_rate = 0.1

epsilon = 0.3
device = torch.device('cpu')

In [11]:
criterion = nn.CrossEntropyLoss()
sgd = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, nesterov=True)


train_loader = torch.utils.data.DataLoader(training_data, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=False)

In [13]:

num_epochs = 10
# Boucle d'entraînement
for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        sgd.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        sgd.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

RuntimeError: The size of tensor a (28) must match the size of tensor b (784) at non-singleton dimension 4