## Quaternion PyTorch - Basic mechanisms

In [6]:
import torch
from htorch import quaternion,functions, utils
from htorch.layers import QConv2d

Parameter containing:
tensor([[[[ 0.1569,  0.2573,  0.1983,  ...,  0.0991,  0.1114,  0.0115],
          [-0.2004, -0.2154,  0.0436,  ..., -0.1316,  0.1901, -0.1138],
          [ 0.1054,  0.0429,  0.2505,  ..., -0.1653,  0.0764,  0.1345],
          ...,
          [ 0.1480, -0.0251, -0.1950,  ..., -0.0720,  0.1106, -0.1472],
          [ 0.0089,  0.1881,  0.1774,  ..., -0.2189,  0.0059, -0.1799],
          [ 0.1776, -0.2336, -0.3129,  ..., -0.1003,  0.1436,  0.1164]],

         [[ 0.0718,  0.1528,  0.1962,  ..., -0.1262, -0.1284, -0.2470],
          [ 0.1670, -0.1370,  0.1065,  ...,  0.1241,  0.2173, -0.2143],
          [ 0.0535,  0.1223,  0.2508,  ..., -0.0523, -0.1899, -0.0082],
          ...,
          [-0.1213, -0.0746,  0.1694,  ...,  0.1325, -0.1661,  0.1995],
          [ 0.2320, -0.1515, -0.3052,  ..., -0.0093,  0.0595,  0.2220],
          [-0.3129,  0.1389, -0.1485,  ...,  0.0864,  0.1677,  0.2139]],

         [[ 0.1295, -0.1751,  0.1637,  ...,  0.1191, -0.0428, -0.1686],
        

### 1 - Quaternion tensors

A quaternion number is represented by:

$$
x = a + bi + cj + dk
$$

where $a$, $b$, $c$, and $d$ are real values, and $i$, $j$, $k$ are the imaginary parts. A `QuaternionTensor` extends the standard PyTorch `tensor` to handle quaternion values, by specifying the real and imaginary components during initialization. 

The simplest way to initialize a `QuaternionTensor` is to pass all $(a, b, c, d)$ values as a single (..., 4)-dimensional tensor of real values:

In [2]:
# Simple scalar quaternion
x = quaternion.QuaternionTensor([0.0, 0.3, 0.4, 0.5])
print(x)

real part: tensor([0.])
imaginary part (i): tensor([0.3000])
imaginary part (j): tensor([0.4000])
imaginary part (k): tensor([0.5000])


In [3]:
A=torch.randn(1,4,256,256)

In [4]:
noise=torch.FloatTensor(A.size()).normal_(mean=0,std=7.5/255.)

In [5]:
noise.size()

torch.Size([1, 4, 256, 256])

In [15]:
B=quaternion.QuaternionTensor(A)
B

real part: tensor([[[-1.7284e+00,  3.9724e-01, -2.2195e-01,  9.2133e-01],
         [ 7.4087e-01,  1.0755e-01,  1.7271e-01, -6.8133e-01],
         [ 7.5175e-01,  3.1655e-01,  1.2834e+00,  2.2567e-01],
         ...,
         [-6.0341e-01, -7.0706e-02,  1.4704e+00, -1.0301e+00],
         [ 9.6017e-01,  6.7951e-01, -1.2789e-01,  1.8014e+00],
         [ 1.7220e+00,  2.1882e-01,  1.2320e+00, -7.5129e-01]],

        [[ 6.8106e-01,  1.5393e+00, -2.2932e-01,  3.0646e-01],
         [ 1.1933e+00, -1.6130e-01,  1.3013e+00,  2.4748e-03],
         [ 1.6995e+00, -8.6855e-01,  8.3231e-01,  2.2836e-01],
         ...,
         [ 2.5696e-01, -3.9673e-01,  2.6591e+00,  4.7196e-01],
         [ 9.9206e-02, -2.9309e-01,  6.0197e-01,  3.7295e-01],
         [-1.1359e+00, -6.5224e-01,  2.1169e-02,  1.5565e+00]],

        [[-1.5388e+00, -1.5855e+00, -3.4964e-01,  1.4440e+00],
         [-1.3964e-01,  9.5997e-01, -1.8225e-01, -1.8310e+00],
         [-5.7565e-01,  4.8137e-01, -4.6784e-01, -1.6459e+00],
         ...

For a general tensor of shape $(a, b, ...)$, a `QuaternionTensor` will have $(a, b, c, ..., 4)$ real values for its initialization:

In [3]:
# Mini-batch of two scalar quaternions
x = quaternion.QuaternionTensor(torch.rand(2, 4))
print(x)

real part: tensor([[0.4471],
        [0.7484]])
imaginary part (i): tensor([[0.4473],
        [0.6908]])
imaginary part (j): tensor([[0.3906],
        [0.9504]])
imaginary part (k): tensor([[0.3567],
        [0.0238]])


In [4]:
# A mini-batch of 4 vectors, each composed of 2 quaternions
y = quaternion.QuaternionTensor(torch.rand(4, 4, 2))
y.shape

torch.Size([4, 4, 2])

All standard quaternion operations can be applied on the tensor (see the documentation of `QuaternionTensor` for a full list):

In [5]:
# Get the a/b/c/d components
print(x.a)

tensor([[0.4471],
        [0.7484]])


In [6]:
# Shape (always ends with a 4)
x.qshape

torch.Size([2, 4])

In [7]:
# Conjugation
print(x.conj())

real part: tensor([[0.4471],
        [0.7484]])
imaginary part (i): tensor([[-0.4473],
        [-0.6908]])
imaginary part (j): tensor([[-0.3906],
        [-0.9504]])
imaginary part (k): tensor([[-0.3567],
        [-0.0238]])


In [8]:
# Element-wise norm
print(x.norm())

tensor([[0.8245],
        [1.3933]])


In [9]:
# Element-wise angle
print(x.theta())

tensor([[0.9977],
        [1.0037]])


In [10]:
# Quaternion multiplication (element-wise Hamilton product)
print(x * x)

real part: tensor([[-0.2800],
        [-0.8210]])
imaginary part (i): tensor([[0.4000],
        [1.0340]])
imaginary part (j): tensor([[0.3492],
        [1.4227]])
imaginary part (k): tensor([[0.3189],
        [0.0356]])


In [12]:
# Quaternion matrix multiplication
print(x.t() @ x)

tensor([[0.6798, 1.0233],
        [1.0233, 1.9412]])


Importantly, quaternion tensors and real-valued tensors are interoperable (real-valued tensors being casted to quaternion tensors with 0 imaginary parts):

In [13]:
# Quaternion scalar multiplication
print(x * torch.rand(2))

real part: tensor([[0.2381],
        [0.2625]])
imaginary part (i): tensor([[0.2382],
        [0.2423]])
imaginary part (j): tensor([[0.2080],
        [0.3333]])
imaginary part (k): tensor([[0.1899],
        [0.0083]])


### 2 - Quaternion gradients

Gradients can be computed with the PyTorch autograd mechanisms:

In [14]:
x = quaternion.QuaternionTensor(torch.rand(2, 4))
x.requires_grad = True
y = x.norm().sum()
y.backward()
#x.shape

In [15]:
print(x.grad) # The gradient is also a (..., 4)-dimensional tensor

tensor([[0.2434, 0.4977, 0.2937, 0.7790],
        [0.7245, 0.5663, 0.3776, 0.1090]])


In [17]:
from htorch.functions import initialize_linear
import torch.nn.functional as F
from torch import nn
q_weight =  initialize_linear(4, 20)
print(q_weight.shape)
r, i, j, k = q_weight.chunk()
q_weight.shape
r_weight = nn.Parameter(r)
i_weight = nn.Parameter(i)
j_weight = nn.Parameter(j)
k_weight = nn.Parameter(k)
weight = torch.cat([torch.cat([r_weight, i_weight, j_weight, k_weight], dim=0),
         torch.cat([r_weight, i_weight, j_weight, k_weight], dim=0),
         torch.cat([r_weight, i_weight, j_weight, k_weight], dim=0),
         torch.cat([r_weight, i_weight, j_weight, k_weight], dim=0)],dim=1)
         
print(weight.shape)
print(i_weight.shape)
#r_weight.shape
#weight.t
x = quaternion.QuaternionTensor(torch.rand(2, 16))
y = quaternion.QuaternionTensor(F.linear(x, weight.t()))
print(x.shape)
print(y.shape)

torch.Size([4, 80])
torch.Size([16, 80])
torch.Size([4, 20])
torch.Size([2, 16])
torch.Size([2, 80])


### 3 - Quaternion-valued layers

We also provide a number of quaternion-valued layers to implement quaternion neural networks:

In [57]:
from torch import nn
from htorch.layers import QLinear

In [58]:
# Simple model with two quaternion-valued dense layers, and a split ReLU (ReLU applied on each component separately)
model = nn.Sequential(
    QLinear(4, 20, bias=True),
    nn.ReLU(),
    QLinear(20, 1)
)

In [78]:
x = quaternion.QuaternionTensor(torch.rand(2, 16))
x.shape
#model1(x)
y=model(x)
y.shape
#print(model(x))
x
torch.rand(2,16)
x

real part: tensor([[0.6880, 0.0847, 0.9757, 0.5077],
        [0.9420, 0.6194, 0.4373, 0.2948]])
imaginary part (i): tensor([[0.0664, 0.8679, 0.6496, 0.0596],
        [0.6619, 0.8849, 0.8469, 0.7168]])
imaginary part (j): tensor([[0.1720, 0.2184, 0.5141, 0.4599],
        [0.5464, 0.5286, 0.7514, 0.4183]])
imaginary part (k): tensor([[0.4162, 0.6309, 0.1841, 0.1809],
        [0.7883, 0.7348, 0.5910, 0.0243]])

In [21]:
y = QLinear(1,20,bias=True)(x)

We also provide layers to easily integrate quaternion-valued and real-valued blocks:

In [69]:
from htorch.layers import QuaternionToReal
from torch.nn import Softmax

In [70]:
model = nn.Sequential(
    QLinear(4, 10),
    QuaternionToReal(10), # Take the absolute value of each output
    Softmax(1)
)

In [72]:
model(x).shape

torch.Size([2, 10])

See [notebooks/training.ipynb] for an example of a full training.