In [1]:
# This notebook contains myriad of pytorch methods, that are used in LLama model building. The same is collected here for reference

import torch
from torch import nn
from torch.nn import functional as F

In [3]:
# nn.Parameter, used in RMSNorm class
dim = 10

param = nn.Parameter(torch.ones(10))  # will become part of the Module parameter

param

Parameter containing:
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], requires_grad=True)

In [7]:
x = torch.rand(size=(1, 10))
print(x)
rsqrt = torch.rsqrt(x)  # Returns a new tensor with the reciprocal of the square-root of each elements 
print(rsqrt)

tensor([[0.5778, 0.2681, 0.1957, 0.2452, 0.0567, 0.5619, 0.1121, 0.6420, 0.7265,
         0.2651]])
tensor([[1.3156, 1.9312, 2.2606, 2.0193, 4.1983, 1.3341, 2.9868, 1.2480, 1.1733,
         1.9420]])


In [10]:
v1 = torch.arange(1., 5.) # tensor([1., 2., 3., 4.])
v2 = torch.arange(1., 4.) # tensor([1., 2., 3.])
torch.outer(v1, v2)  # Outer product of :attr:`input` and :attr:`vec2`
# where each element is the product of elements from the input tensors at corresponding indices

tensor([[ 1.,  2.,  3.],
        [ 2.,  4.,  6.],
        [ 3.,  6.,  9.],
        [ 4.,  8., 12.]])

In [14]:
torch.inner(v1.reshape(-1, 1), v2.reshape(-1, 1))
#  summing the element-wise product of corresponding elements.

tensor([[ 1.,  2.,  3.],
        [ 2.,  4.,  6.],
        [ 3.,  6.,  9.],
        [ 4.,  8., 12.]])

In [15]:
v2 = torch.arange(1., 5.)
torch.inner(v1, v2)
#  summing the element-wise product of corresponding elements.

tensor(30.)

In [16]:
v3 = torch.polar(v1, v2)  # compute complex numbers in the polar form c = R * exp(m * theta)
v3

tensor([ 0.5403+0.8415j, -0.8323+1.8186j, -2.9700+0.4234j, -2.6146-3.0272j])

In [20]:
x=torch.randn(4, 2)
x_comp = torch.view_as_complex(x)
x_comp

tensor([-1.3389-0.4209j,  0.0241-0.2106j,  0.9575+0.3471j,  1.4991+0.4845j])

In [22]:
x_real = torch.view_as_real(x_comp)
x_real

tensor([[-1.3389, -0.4209],
        [ 0.0241, -0.2106],
        [ 0.9575,  0.3471],
        [ 1.4991,  0.4845]])

In [34]:
exp_x = x.expand(4, 4, 2) #  Returns a new view of the :attr:`self` tensor with singleton dimensions expanded to a larger size.
exp_x.shape

torch.Size([4, 4, 2])

A singleton dimension is a dimension in a tensor with size 1. In other words, it is a dimension that has only one element along that axis. Singleton dimensions are often introduced for broadcasting purposes, allowing operations between tensors with different shapes

In [35]:
exp_x

tensor([[[-1.3389, -0.4209],
         [ 0.0241, -0.2106],
         [ 0.9575,  0.3471],
         [ 1.4991,  0.4845]],

        [[-1.3389, -0.4209],
         [ 0.0241, -0.2106],
         [ 0.9575,  0.3471],
         [ 1.4991,  0.4845]],

        [[-1.3389, -0.4209],
         [ 0.0241, -0.2106],
         [ 0.9575,  0.3471],
         [ 1.4991,  0.4845]],

        [[-1.3389, -0.4209],
         [ 0.0241, -0.2106],
         [ 0.9575,  0.3471],
         [ 1.4991,  0.4845]]])

In [37]:
y = x.contiguous() # Returns a contiguous in memory tensor containing the same data a

In [39]:
torch.full((2, 3), 3.141592)

tensor([[3.1416, 3.1416, 3.1416],
        [3.1416, 3.1416, 3.1416]])