In [1]:
import torch
import numpy as np

In [2]:
torch.__version__

'1.10.0'

# Single Tensor Operation

Including: `sum()`, `mean()`, `prod()`, `sqrt()`, etc

Inplace Operation: add `_` to the function name, e.g. `sum_()`

In [3]:
sample_t = torch.randn(3, 4)

In [4]:
sample_t

tensor([[ 0.8361, -0.4165, -0.0693,  0.0925],
        [ 0.2939, -1.1079, -0.3846, -0.5302],
        [-1.7553,  0.8901,  2.4044, -1.0195]])

In [5]:
sample_t.sum(), sample_t.mean()

(tensor(-0.7663), tensor(-0.0639))

Sinlge operation will reduce dimension.

To keep dimension, set `keepdim=True`

In [6]:
_ = sample_t.sum(0)

print(_)
print(_.shape)

tensor([-0.6253, -0.6343,  1.9505, -1.4572])
torch.Size([4])


In [7]:
_ = sample_t.sum(0, keepdim=True)

print(_)
print(_.shape)

tensor([[-0.6253, -0.6343,  1.9505, -1.4572]])
torch.Size([1, 4])


In [8]:
sample_t

tensor([[ 0.8361, -0.4165, -0.0693,  0.0925],
        [ 0.2939, -1.1079, -0.3846, -0.5302],
        [-1.7553,  0.8901,  2.4044, -1.0195]])

Inplace operations only work for ones that doesn't change tensor shape like `sqrt_()` but not `sum()`

In [9]:
try:
    sample_t.sum_()
except AttributeError as e:
    print(e)

'Tensor' object has no attribute 'sum_'


In [10]:
sample_t.sqrt_()

tensor([[0.9144,    nan,    nan, 0.3041],
        [0.5421,    nan,    nan,    nan],
        [   nan, 0.9434, 1.5506,    nan]])

In [11]:
sample_t

tensor([[0.9144,    nan,    nan, 0.3041],
        [0.5421,    nan,    nan,    nan],
        [   nan, 0.9434, 1.5506,    nan]])

# Multiple-tensor Operations

In [12]:
sample_t1 = torch.randn(2, 3)
sample_t2 = torch.randn(2, 3)

In [13]:
print(sample_t1)
print(sample_t2)

tensor([[ 0.1205,  0.9371,  1.8299],
        [-0.2666, -0.8590, -0.4210]])
tensor([[ 0.5028,  0.5248, -1.1740],
        [-0.0944, -0.0601, -0.4540]])


The following three variations of `add()` are the same. Same for `sub()`, `mul()` and `div()`, etc..

**Note**: these are *element-wise* operations.

In [14]:
sample_t1 + sample_t2

tensor([[ 0.6233,  1.4619,  0.6559],
        [-0.3610, -0.9190, -0.8750]])

In [15]:
sample_t1.add(sample_t2)

tensor([[ 0.6233,  1.4619,  0.6559],
        [-0.3610, -0.9190, -0.8750]])

In [16]:
torch.add(sample_t1, sample_t2)

tensor([[ 0.6233,  1.4619,  0.6559],
        [-0.3610, -0.9190, -0.8750]])

In [17]:
sample_t1 * sample_t2

tensor([[ 0.0606,  0.4918, -2.1482],
        [ 0.0252,  0.0516,  0.1911]])

# Extreme values

In [18]:
sample_t3 = torch.randn(4, 3)

In [19]:
sample_t3

tensor([[-3.5662,  0.7001,  1.0126],
        [ 0.2637, -0.3328,  1.5878],
        [ 0.9340,  0.8332, -1.2957],
        [-0.0796, -1.6718, -0.7158]])

In [20]:
print(sample_t3.argmax())
print(sample_t3.argmax(0))
print(sample_t3.argmax(1))

tensor(5)
tensor([2, 2, 1])
tensor([2, 2, 0, 0])


In [21]:
print(sample_t3.max())

tensor(1.5878)


In [22]:
sample_t3.max(0)

torch.return_types.max(
values=tensor([0.9340, 0.8332, 1.5878]),
indices=tensor([2, 2, 1]))

In [23]:
print(sample_t3.max(0).values)
print(sample_t3.max(0).indices)

tensor([0.9340, 0.8332, 1.5878])
tensor([2, 2, 1])


In [24]:
print(sample_t3.max(1).values)
print(sample_t3.max(1).indices)

tensor([ 1.0126,  1.5878,  0.9340, -0.0796])
tensor([2, 2, 0, 0])


In [25]:
print(sample_t3.max(-1))

torch.return_types.max(
values=tensor([ 1.0126,  1.5878,  0.9340, -0.0796]),
indices=tensor([2, 2, 0, 0]))


In [26]:
sample_t3.sort()

torch.return_types.sort(
values=tensor([[-3.5662,  0.7001,  1.0126],
        [-0.3328,  0.2637,  1.5878],
        [-1.2957,  0.8332,  0.9340],
        [-1.6718, -0.7158, -0.0796]]),
indices=tensor([[0, 1, 2],
        [1, 0, 2],
        [2, 1, 0],
        [1, 2, 0]]))

In [27]:
sample_t3.sort(-1)

torch.return_types.sort(
values=tensor([[-3.5662,  0.7001,  1.0126],
        [-0.3328,  0.2637,  1.5878],
        [-1.2957,  0.8332,  0.9340],
        [-1.6718, -0.7158, -0.0796]]),
indices=tensor([[0, 1, 2],
        [1, 0, 2],
        [2, 1, 0],
        [1, 2, 0]]))

In [28]:
sample_t3.sort(0)

torch.return_types.sort(
values=tensor([[-3.5662, -1.6718, -1.2957],
        [-0.0796, -0.3328, -0.7158],
        [ 0.2637,  0.7001,  1.0126],
        [ 0.9340,  0.8332,  1.5878]]),
indices=tensor([[0, 3, 2],
        [3, 1, 3],
        [1, 0, 0],
        [2, 2, 1]]))

# Matrix Multiplication

In [29]:
sample_t4 = torch.randn(3, 4)
sample_t5 = torch.randn(4, 3)

There are three identical ways to perform matrix multiplication:
- `a @ b`
- `torch.mm(a, b)`
- `a.mm(b)`

In [30]:
sample_t4 @ sample_t5

tensor([[-1.4109,  1.0740, -1.3934],
        [ 1.2257, -0.0869,  0.3587],
        [ 0.1282, -0.5011,  0.6057]])

In [31]:
sample_t4.mm(sample_t5)

tensor([[-1.4109,  1.0740, -1.3934],
        [ 1.2257, -0.0869,  0.3587],
        [ 0.1282, -0.5011,  0.6057]])

In [32]:
torch.mm(sample_t4, sample_t5)

tensor([[-1.4109,  1.0740, -1.3934],
        [ 1.2257, -0.0869,  0.3587],
        [ 0.1282, -0.5011,  0.6057]])

For batches, we have to use `bmm` instead. Note that `@` operator is still valid for mini batches.

In [33]:
sample_t6 = torch.randn(2, 3, 4)
sample_t7 = torch.randn(2, 4, 3)

In [34]:
_ = sample_t6.bmm(sample_t7)

print(_)
print(_.shape)

tensor([[[-3.6460,  0.0383,  1.5289],
         [-0.2309,  0.2930,  3.8853],
         [-1.9953,  0.0478, -0.7433]],

        [[ 0.3728, -0.1142, -0.9654],
         [ 0.8009, -0.7652, -5.2539],
         [-0.8692,  0.8701,  3.2773]]])
torch.Size([2, 3, 3])


In [35]:
torch.bmm(sample_t6, sample_t7)

tensor([[[-3.6460,  0.0383,  1.5289],
         [-0.2309,  0.2930,  3.8853],
         [-1.9953,  0.0478, -0.7433]],

        [[ 0.3728, -0.1142, -0.9654],
         [ 0.8009, -0.7652, -5.2539],
         [-0.8692,  0.8701,  3.2773]]])

In [36]:
sample_t6 @ sample_t7

tensor([[[-3.6460,  0.0383,  1.5289],
         [-0.2309,  0.2930,  3.8853],
         [-1.9953,  0.0478, -0.7433]],

        [[ 0.3728, -0.1142, -0.9654],
         [ 0.8009, -0.7652, -5.2539],
         [-0.8692,  0.8701,  3.2773]]])

Batch matrix multiplication perform matrix multiplication on the last two dimensions. It is valid to perform `bmm` on more dimensions as long as the batch dimensions are consistent.

In [37]:
(torch.randn(4, 2, 2, 3, 4) @ torch.randn(4, 2, 2, 4, 5)).shape

torch.Size([4, 2, 2, 3, 5])

Contraction: Einstein Summation Convention

https://pytorch.org/docs/stable/generated/torch.einsum.html

In [38]:
torch.einsum("abc, acd -> abd", torch.randn(2, 3, 4), torch.randn(2, 4, 5)).shape

torch.Size([2, 3, 5])

# Combination and Split

`cat()`

In [39]:
torch.cat??

[0;31mDocstring:[0m
cat(tensors, dim=0, *, out=None) -> Tensor

Concatenates the given sequence of :attr:`seq` tensors in the given dimension.
All tensors must either have the same shape (except in the concatenating
dimension) or be empty.

:func:`torch.cat` can be seen as an inverse operation for :func:`torch.split`
and :func:`torch.chunk`.

:func:`torch.cat` can be best understood via examples.

Args:
    tensors (sequence of Tensors): any python sequence of tensors of the same type.
        Non-empty tensors provided must have the same shape, except in the
        cat dimension.
    dim (int, optional): the dimension over which the tensors are concatenated

Keyword args:
    out (Tensor, optional): the output tensor.

Example::

    >>> x = torch.randn(2, 3)
    >>> x
    tensor([[ 0.6580, -1.0969, -0.4614],
            [-0.1034, -0.5790,  0.1497]])
    >>> torch.cat((x, x, x), 0)
    tensor([[ 0.6580, -1.0969, -0.4614],
            [-0.1034, -0.5790,  0.1497],
            [ 0.658

In [40]:
torch.cat(
    tensors=[
        torch.randn(3, 2), torch.randn(3, 3)
    ], 
    dim=1
).shape

torch.Size([3, 5])

In [41]:
torch.cat(
    tensors=[
        torch.randn(4, 2), torch.randn(5, 2)
    ]
).shape

torch.Size([9, 2])

`Stack()`:
- Each tensor should have exact same shape
- `dim` parameter will force the tensor to be stacked at a certain axis

In [42]:
torch.stack??

[0;31mDocstring:[0m
stack(tensors, dim=0, *, out=None) -> Tensor

Concatenates a sequence of tensors along a new dimension.

All tensors need to be of the same size.

Arguments:
    tensors (sequence of Tensors): sequence of tensors to concatenate
    dim (int): dimension to insert. Has to be between 0 and the number
        of dimensions of concatenated tensors (inclusive)

Keyword args:
    out (Tensor, optional): the output tensor.
[0;31mType:[0m      builtin_function_or_method


In [43]:
try:
    torch.stack(
        tensors=[
            torch.randn(3, 2), torch.randn(5, 2)
        ]
    ).shape
except RuntimeError as e:
    print(e)

stack expects each tensor to be equal size, but got [3, 2] at entry 0 and [5, 2] at entry 1


In [44]:
torch.stack(
    tensors=[
        torch.randn(3, 4), torch.randn(3, 4)
    ]
).shape

torch.Size([2, 3, 4])

In [45]:
torch.stack(
    tensors=[
        torch.randn(3, 4), torch.randn(3, 4)
    ],
    dim=1
).shape

torch.Size([3, 2, 4])

In [46]:
torch.stack(
    tensors=[
        torch.randn(3, 4), torch.randn(3, 4)
    ],
    dim=2
).shape

torch.Size([3, 4, 2])

`split()`

In [47]:
torch.split??

[0;31mSignature:[0m [0mtorch[0m[0;34m.[0m[0msplit[0m[0;34m([0m[0mtensor[0m[0;34m,[0m [0msplit_size_or_sections[0m[0;34m,[0m [0mdim[0m[0;34m=[0m[0;36m0[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
[0;32mdef[0m [0msplit[0m[0;34m([0m[0mtensor[0m[0;34m,[0m [0msplit_size_or_sections[0m[0;34m,[0m [0mdim[0m[0;34m=[0m[0;36m0[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34mr"""Splits the tensor into chunks. Each chunk is a view of the original tensor.[0m
[0;34m[0m
[0;34m    If :attr:`split_size_or_sections` is an integer type, then :attr:`tensor` will[0m
[0;34m    be split into equally sized chunks (if possible). Last chunk will be smaller if[0m
[0;34m    the tensor size along the given dimension :attr:`dim` is not divisible by[0m
[0;34m    :attr:`split_size`.[0m
[0;34m[0m
[0;34m    If :attr:`split_size_or_sections` is a list, then :attr:`tensor` will be split[0m
[0;34m    into ``len(split_size_or_sections

In [48]:
total_shape = (5, 13)
split_len = 3
dim = -1

output_ = torch.split(
    torch.randn(total_shape),
    split_len,
    dim
)

for v in output_:
    print(v.shape)

torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 1])


`chunk()`

In [49]:
torch.chunk??

[0;31mDocstring:[0m
chunk(input, chunks, dim=0) -> List of Tensors

Attempts to split a tensor into the specified number of chunks. Each chunk is a view of
the input tensor.


.. note::

    This function may return less then the specified number of chunks!

.. seealso::

    :func:`torch.tensor_split` a function that always returns exactly the specified number of chunks

If the tensor size along the given dimesion :attr:`dim` is divisible by :attr:`chunks`,
all returned chunks will be the same size.
If the tensor size along the given dimension :attr:`dim` is not divisible by :attr:`chunks`,
all returned chunks will be the same size, except the last one.
If such division is not possible, this function may return less
than the specified number of chunks.

Arguments:
    input (Tensor): the tensor to split
    chunks (int): number of chunks to return
    dim (int): dimension along which to split the tensor

Example::
    >>> torch.arange(11).chunk(6)
    (tensor([0, 1]),
     tensor([2

In [50]:
total_shape = (5, 13)
num_splits = 2
dim = -1

output_ = torch.chunk(
    torch.randn(total_shape),
    num_splits,
    dim
)

for v in output_:
    print(v.shape)

torch.Size([5, 7])
torch.Size([5, 6])


# Expand, Squeeze and Broadcase

In [51]:
torch.randn(3, 4).unsqueeze(0).shape

torch.Size([1, 3, 4])

In [52]:
torch.randn(3, 4).unsqueeze(1).shape

torch.Size([3, 1, 4])

In [53]:
torch.randn(3, 4).unsqueeze(-1).shape

torch.Size([3, 4, 1])

In [54]:
torch.randn(3, 4, 1).squeeze().shape

torch.Size([3, 4])

In [55]:
torch.randn(1, 3, 4, 1).squeeze().shape

torch.Size([3, 4])

In [56]:
torch.randn(1, 3, 4, 1).squeeze(0).shape

torch.Size([3, 4, 1])

For *element-wise* operation, boardcast of a matrix can be utilized by `unsequeeze()`

In [57]:
sample_t8 = torch.randn(2, 3, 4)
sample_t9 = torch.randn(2, 4)

In [58]:
try:
    sample_t8 + sample_t9
except RuntimeError as e:
    print(e)

The size of tensor a (3) must match the size of tensor b (2) at non-singleton dimension 1


In [59]:
(sample_t8 + sample_t9.unsqueeze(1)).shape

torch.Size([2, 3, 4])

Here, by default, the expanded tensor will be **broadcased** and repeated certain times to match the other tensor.