## Quaternion PyTorch - Training a QNN

In [2]:
import torch
from htorch import quaternion, layers, utils
from htorch.layers import QConv2d

In [4]:
Net=QConv2d(1, 20, kernel_size=10, bias=True)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0.])

In [11]:
Net.bias[:]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0.], grad_fn=<SliceBackward0>)

### Step 1 - Loading the data

We provide a `collate_fn` to convert any standard image dataset to a quaternion format (by using the RGB values as imaginary components and the greyscale version as real component).

In [3]:
from torchvision.datasets import CIFAR10
from torchvision import transforms

In [3]:
# Standard loading for the CIFAR-10 dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
data = CIFAR10(root='data', train=True, download=True, transform=transform)

Files already downloaded and verified


In [43]:
# Batch the data using a custom collate_fn function to convert to quaternion-valued images
loader = torch.utils.data.DataLoader(data, batch_size=8, shuffle=True, 
    collate_fn=utils.convert_data_for_quaternion)

In [44]:
xb, yb = next(iter(loader))
print(xb.shape) # We now have 4 input channels as needed

torch.Size([8, 4, 32, 32])


In [45]:
from torch.autograd import Variable
Variable(xb)

tensor([[[[-0.7961, -0.8039, -0.8118,  ..., -0.6863, -0.7882, -0.8353],
          [-0.7804, -0.7961, -0.8118,  ..., -0.5608, -0.8118, -0.8745],
          [-0.7804, -0.7882, -0.7961,  ..., -0.4745, -0.5294, -0.8196],
          ...,
          [ 0.3176,  0.3255,  0.3020,  ..., -0.8275, -0.8118, -0.7961],
          [ 0.2706,  0.2314,  0.2314,  ..., -0.8902, -0.8667, -0.8353],
          [ 0.0980,  0.2000,  0.1529,  ..., -0.8667, -0.8745, -0.8824]],

         [[-0.7255, -0.7412, -0.7412,  ..., -0.7020, -0.7647, -0.8196],
          [-0.7176, -0.7333, -0.7412,  ..., -0.6157, -0.8039, -0.8431],
          [-0.7255, -0.7255, -0.7255,  ..., -0.5216, -0.5686, -0.8196],
          ...,
          [-0.1843, -0.1922, -0.1765,  ..., -0.8824, -0.8667, -0.8588],
          [-0.2471, -0.2784, -0.2549,  ..., -0.9137, -0.8980, -0.8824],
          [-0.3333, -0.2941, -0.3255,  ..., -0.9059, -0.9059, -0.9059]],

         [[-0.5608, -0.5765, -0.5843,  ..., -0.7490, -0.8118, -0.8667],
          [-0.5451, -0.5765, -

In [46]:
variable1=Variable(quaternion.QuaternionTensor(xb))
quaternion.QuaternionTensor(variable1)

real part: tensor([[[[-0.7961, -0.8039, -0.8118,  ..., -0.6863, -0.7882, -0.8353],
          [-0.7804, -0.7961, -0.8118,  ..., -0.5608, -0.8118, -0.8745],
          [-0.7804, -0.7882, -0.7961,  ..., -0.4745, -0.5294, -0.8196],
          ...,
          [ 0.3176,  0.3255,  0.3020,  ..., -0.8275, -0.8118, -0.7961],
          [ 0.2706,  0.2314,  0.2314,  ..., -0.8902, -0.8667, -0.8353],
          [ 0.0980,  0.2000,  0.1529,  ..., -0.8667, -0.8745, -0.8824]]],


        [[[-0.4353, -0.4902, -0.4980,  ..., -0.5922, -0.5922, -0.6000],
          [-0.6157, -0.6314, -0.6392,  ..., -0.5843, -0.5843, -0.5843],
          [-0.6235, -0.6235, -0.6157,  ..., -0.5686, -0.5686, -0.5765],
          ...,
          [-0.6784, -0.6941, -0.6863,  ..., -0.5765, -0.5686, -0.5843],
          [-0.7255, -0.7020, -0.6941,  ..., -0.5922, -0.6235, -0.6471],
          [-0.7412, -0.7255, -0.7098,  ..., -0.6392, -0.6706, -0.6706]]],


        [[[-0.4039, -0.4118, -0.4118,  ..., -0.4196, -0.4196, -0.4196],
          [-0.4

### Step 2 - Building the QNN

We use a simple QNN with three convolutional blocks with split-ReLU activations.

In [6]:
model = torch.nn.Sequential(
    layers.QConv2d(1, 20, kernel_size=10, bias=True), # We only have 1 channel in terms of quaternions
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2), # Max-pool is okay because it acts on the channels
    layers.QConv2d(20, 20, kernel_size=10, bias=True),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),
    torch.nn.Flatten(),
    layers.QLinear(20, 10),
    layers.QuaternionToReal(10), # Take the absolute value before the softmax
)

In [7]:
# Test the model is working correctly
model(xb).shape

torch.Size([8, 10])

### Step 3 - Training loop

At this point, everything is classical PyTorch:
https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

In [8]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [9]:
for epoch in range(2):

    running_loss = 0.0
    for i, data in enumerate(loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

[1,  2000] loss: 1.798
[1,  4000] loss: 1.562
[1,  6000] loss: 1.467
[2,  2000] loss: 1.352
[2,  4000] loss: 1.321
[2,  6000] loss: 1.295
Finished Training


### Converting an existing nn.Module

Using the new [torch.fx](https://pytorch.org/docs/stable/fx.html) functionals, we can also convert an existing PyTorch's `nn.Module` into a quaternion-valued one, provided all shapes are divisible by 4:

In [14]:
# This model is similar to the previous one, but all dimensions are multiplied by 4
model = torch.nn.Sequential(
    torch.nn.Conv2d(4, 80, kernel_size=10, bias=True),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),
    torch.nn.Conv2d(80, 80, kernel_size=10, bias=True),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),
    torch.nn.Flatten(),
    torch.nn.Linear(80, 40),
)

In [15]:
# Convert to a QNN and run on the previous set of images
utils.convert_to_quaternion(model)(xb).shape

torch.Size([8, 40])

In [1]:
import torch

In [2]:
a = torch.rand(2,3,4)

In [3]:
a

tensor([[[0.7327, 0.2658, 0.0568, 0.7222],
         [0.1554, 0.1372, 0.5214, 0.0433],
         [0.8462, 0.7988, 0.9529, 0.5796]],

        [[0.4696, 0.8617, 0.3132, 0.2324],
         [0.4662, 0.8223, 0.7987, 0.0827],
         [0.9092, 0.2384, 0.1312, 0.1774]]])

In [4]:
b=a.flatten(1)

In [6]:
c=a.flatten(2)

In [8]:
d=a.flatten(0)

In [9]:
d

tensor([0.7327, 0.2658, 0.0568, 0.7222, 0.1554, 0.1372, 0.5214, 0.0433, 0.8462,
        0.7988, 0.9529, 0.5796, 0.4696, 0.8617, 0.3132, 0.2324, 0.4662, 0.8223,
        0.7987, 0.0827, 0.9092, 0.2384, 0.1312, 0.1774])

In [10]:
c

tensor([[[0.7327, 0.2658, 0.0568, 0.7222],
         [0.1554, 0.1372, 0.5214, 0.0433],
         [0.8462, 0.7988, 0.9529, 0.5796]],

        [[0.4696, 0.8617, 0.3132, 0.2324],
         [0.4662, 0.8223, 0.7987, 0.0827],
         [0.9092, 0.2384, 0.1312, 0.1774]]])

In [12]:
b

tensor([[0.7327, 0.2658, 0.0568, 0.7222, 0.1554, 0.1372, 0.5214, 0.0433, 0.8462,
         0.7988, 0.9529, 0.5796],
        [0.4696, 0.8617, 0.3132, 0.2324, 0.4662, 0.8223, 0.7987, 0.0827, 0.9092,
         0.2384, 0.1312, 0.1774]])