# Imports

In [1]:
import torch
import numpy as np

# Scalers and Vectors

In [2]:
# create scalers
x = torch.tensor(3)
y = torch.tensor(2)

x, y

(tensor(3), tensor(2))

In [3]:
# basic arithmetic
print("addition:", x + y)
print("multiplication:", x * y)
print("devision:", x / y)
print("exponantiation:", x ** y)

addition: tensor(5)
multiplication: tensor(6)
devision: tensor(1.5000)
exponantiation: tensor(9)


In [4]:
# create random vectors
x = torch.randn(4)
y = torch.randn(4)

x, y

(tensor([ 0.9616,  2.0451,  0.6744, -0.2232]),
 tensor([-0.5103, -0.7429, -2.7413,  0.7365]))

scalers are 0th order tensors

vectors are 1st order tensors (meaning vector have one axis)

matrices are 2nd order tensors (2d table or 2 axis)

In [5]:
# copying a tensor object
x_ = x.clone()

# check the reference id of each
print("Original:", id(x))
print("Cloned:", id(x_))

Original: 4360066576
Cloned: 4670809936


Looks like a new location is created and the data is copied to the new location

The elementwise product of two matrices is called their Hadamard product (denoted ). We can spell out the entries of the Hadamard product of two matrices : A o B

In [6]:
x * y

tensor([-0.4907, -1.5192, -1.8488, -0.1644])

In [7]:
A = torch.randn((3, 4))
A

tensor([[ 1.2673, -1.7436, -0.8464,  0.9848],
        [-0.2607, -0.3795, -0.8633, -0.9791],
        [-0.7231, -0.3067, -0.3546, -0.3126]])

In [8]:
# the default `sum` method add up all the elements of a tensor across all the dimensions
A.sum()

tensor(-4.5174)

In [9]:
A.sum(axis=[0, 1])

tensor(-4.5174)

In [10]:
A.sum(axis=1, keepdims=True)

tensor([[-0.3378],
        [-2.4826],
        [-1.6969]])

In [11]:
A.sum(axis=0, keepdims=True)

tensor([[ 0.2836, -2.4298, -2.0643, -0.3069]])

## Dot Product

In [12]:
# dot product is just aggregation of element-wise product
torch.dot(x, y)

tensor(-4.0230)

In [13]:
# alternatively, we can calculate the dot product as follows
torch.sum(x * y)

tensor(-4.0230)

In [14]:
A.shape, x.shape

(torch.Size([3, 4]), torch.Size([4]))

In [18]:
# vector-matrix dot product
torch.matmul(A, x)

tensor([-3.1377, -1.3906, -1.4918])

In [19]:
# we can use @ that can execute both matrix–vector and matrix–matrix products
A.shape, x.shape, torch.mv(A, x), A @ x

(torch.Size([3, 4]),
 torch.Size([4]),
 tensor([-3.1377, -1.3906, -1.4918]),
 tensor([-3.1377, -1.3906, -1.4918]))

In [22]:
# we can properly reshape the vector as a column vector
# to get a column vector result
torch.matmul(A, x.reshape((4, -1)))

tensor([[-3.1377],
        [-1.3906],
        [-1.4918]])

In [23]:
# create another matrix to try matrix-matrix multiplication
B = torch.randn(A.T.shape)

# perform matrix-matrix multiplication
A.shape, B.shape, torch.mm(A, B), A @ B

(torch.Size([3, 4]),
 torch.Size([4, 3]),
 tensor([[-0.2967, -0.8246,  0.0439],
         [-0.5106,  1.0495, -1.4003],
         [-1.6630,  0.8761, -0.3560]]),
 tensor([[-0.2967, -0.8246,  0.0439],
         [-0.5106,  1.0495, -1.4003],
         [-1.6630,  0.8761, -0.3560]]))

In [29]:
# norm of a vector measures the maginitude
# it is also known as l2 norm i.e. Euclidian length of vector

x.shape, torch.sqrt(torch.sum(x ** 2)), torch.norm(x)

(torch.Size([4]), tensor(2.3689), tensor(2.3689))

In [30]:
# The l1 norm is also common and the associated measure
# is called the Manhattan distance.
# the l1 norm sums the absolute values of a vector’s elements

x.shape, x.abs().sum()

(torch.Size([4]), tensor(3.9042))

In [31]:
x, y

(tensor([ 0.9616,  2.0451,  0.6744, -0.2232]),
 tensor([-0.5103, -0.7429, -2.7413,  0.7365]))

In [33]:
torch.norm(x), torch.norm(y)

(tensor(2.3689), tensor(2.9782))

In [36]:
# dot product using known angle
# u.v = |u||v|cost
# angle between two vectors
# given by t = arccos(u.v / |u||v|)

dot = torch.dot(x, y)
length = torch.norm(x) * torch.norm(y)
print(np.arccos(dot / length))
print(torch.arccos(dot / length))

tensor(2.1776)
tensor(2.1776)


In [40]:
# cosine similarity
# it measures the cosine value between two vectors.
# the idea is if two vectors are near to each other i.e. angle between them is small
# such vectors are similar
# on the other hand, if the angle between them is high, they are disimilar

# randomly sample two vectors
p = torch.randn(3)
q = torch.randn(3)
torch.dot(p, q) / (torch.norm(p) * torch.norm(q))

tensor(-0.4869)

In [41]:
x

tensor([ 0.9616,  2.0451,  0.6744, -0.2232])

In [48]:
p = torch.randn(3, requires_grad=True)
p

tensor([ 0.7844,  0.4785, -0.8139], requires_grad=True)

In [50]:
p.grad

In [51]:
q = 2 * torch.dot(p, p)
q

tensor(3.0133, grad_fn=<MulBackward0>)

In [52]:
q.backward()
p.grad

tensor([ 3.1376,  1.9140, -3.2556])