<a href="https://colab.research.google.com/github/kimgeonhee317/d2l-notes/blob/main/notebook/2_3_Linear_Algebra.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch

## 2.3.1 Scalars

In [3]:
# scalar is 0th-order tensor
# scalar is denoted by lower-cased letter (e.g., x, y, and z)
x = torch.tensor(3.0)
y = torch.tensor(2.0)

x + y, x * y, x / y, x**y

(tensor(5.), tensor(6.), tensor(1.5000), tensor(9.))

## 2.3.2 Vectors

In [6]:
# vector is 1st-order tensor
# vector is denoted by bold lowercase letter (e.g., *x*, *y*, and *z*)
x = torch.arange(3)
x

tensor([0, 1, 2])

By default, we visualize vectors by stacking their elemnts vertically

$$
\mathbf{x} =
\begin{bmatrix}
x_1 \\
. \\
. \\
. \\
x_n
\end{bmatrix}
$$

In [7]:
x[2]

tensor(2)

In [8]:
len(x)

3

In [9]:
# shape output length along each axis, Tensors with just one axis have shapes with jus one element
x.shape

torch.Size([3])

## 2.3.3 Matrices

In [11]:
# Matrice is 2nd-order tensors
# Denoted by bold capital letter (e.g., X, Y, and Z)
A = torch.arange(6).reshape(3, 2)
A

tensor([[0, 1],
        [2, 3],
        [4, 5]])

In [12]:
# Flip the axes (Transpose)
A.T

tensor([[0, 2, 4],
        [1, 3, 5]])

## 2.3.4 Tensors

In [14]:
# Tensors is nth-order (high order tensor)
# Tensors denoted by capital letters with special font face (e.g., X, Y, and Z)
torch.arange(24).reshape(2, 3, 4)

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

## 2.3.5 Basic Properties of Tensor Arithmetic
The element wise product of two matrices is called their Hadamard product (denoted $\bigodot$)

In [17]:
# Elementwise operation produce outputs that have same shape as their operands
A = torch.arange(6, dtype = torch.float32).reshape(2, 3)
B = A.clone()
A, A + B

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 tensor([[ 0.,  2.,  4.],
         [ 6.,  8., 10.]]))

In [18]:
# Adding or multiplying a scalar and a tensor produces a result with same shape as original tensor
a = 2
X = torch.arange(24).reshape(2, 3, 4)
a + X, (a * X).shape

(tensor([[[ 2,  3,  4,  5],
          [ 6,  7,  8,  9],
          [10, 11, 12, 13]],
 
         [[14, 15, 16, 17],
          [18, 19, 20, 21],
          [22, 23, 24, 25]]]),
 torch.Size([2, 3, 4]))

## 2.3.6 Reduction

In [19]:
# sum of elements
x = torch.arange(3, dtype=torch.float32)
x, x.sum()

(tensor([0., 1., 2.]), tensor(3.))

In [20]:
A.shape, A.sum()

(torch.Size([2, 3]), tensor(15.))

In [24]:
# sum over elements along the rows(axis 0)
# "This axis is missing" from the shape of the output
print(A)
print(A.sum(axis=0))
print(A.shape, A.sum(axis=0).shape)

tensor([[0., 1., 2.],
        [3., 4., 5.]])
tensor([3., 5., 7.])
torch.Size([2, 3]) torch.Size([3])


In [25]:
# sum over elements along the columns(axis 1)
# "This axis is missing" from the shape of the output
print(A)
print(A.sum(axis=1))
print(A.shape, A.sum(axis=1).shape)

tensor([[0., 1., 2.],
        [3., 4., 5.]])
tensor([ 3., 12.])
torch.Size([2, 3]) torch.Size([2])


In [26]:
# both rows and columnss
A.sum(axis=[0, 1]) == A.sum()

tensor(True)

In [27]:
# mean
A.mean(), A.sum()/A.numel()

(tensor(2.5000), tensor(2.5000))

In [28]:
# also can reduce a tensor along specific axes
A.mean(axis=0), A.sum(axis=0)/A.shape[0]

(tensor([1.5000, 2.5000, 3.5000]), tensor([1.5000, 2.5000, 3.5000]))

## 2.3.7 Non-Reduction Sum

In [31]:
# Summation but number of axes unchanged
print(A)
sum_A = A.sum(axis=1, keepdims=True)
sum_A, sum_A.shape

tensor([[0., 1., 2.],
        [3., 4., 5.]])


(tensor([[ 3.],
         [12.]]),
 torch.Size([2, 1]))

In [32]:
# Since sum_A keeps it two axes after summing each row,
# we can divide A by sum_A with broadcasting to create a matri where each rows sums up to 1
A / sum_A

tensor([[0.0000, 0.3333, 0.6667],
        [0.2500, 0.3333, 0.4167]])

In [33]:
# cumulative sum of elements
A.cumsum(axis=0)

tensor([[0., 1., 2.],
        [3., 5., 7.]])

## 2.3.8 Dot Products

In [35]:
# Dot product is a sum over the products of the elements at the same position
y = torch.ones(3, dtype = torch.float32)
x, y, torch.dot(x, y)

(tensor([0., 1., 2.]), tensor([1., 1., 1.]), tensor(3.))

In [36]:
# Equivalently, It is elementwise multiplication followed by a sum
torch.sum(x * y)

tensor(3.)

After normalizing two vectors to have unit length, the dot products express the cosine of the angle between them(cosine similarity)

## 2.3.9 Matrix-Vector Products

In [40]:
# Matrix-vector multiplication
# A in R(mxn) as transformation that projects vectors from R(n) to R(m)
A.shape, x.shape, torch.mv(A, x), A@x, (A@x).shape

(torch.Size([2, 3]),
 torch.Size([3]),
 tensor([ 5., 14.]),
 tensor([ 5., 14.]),
 torch.Size([2]))

## 2.3.10 Matrix-Matrix Multiplication

In [42]:
# A(n*k) mm B(k*m) = n*m
# AB as performing m matrix-vector products
# or simply dot product of ith row of A and jth column of B (a_T * b)
B = torch.ones(3, 4)
torch.mm(A, B), A@B

(tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]),
 tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]))

## 2.3.11 Norms

In [43]:
# l2 norm (Euclidean distance)
u = torch.tensor([3.0, -4.0])
torch.norm(u)

tensor(5.)

In [44]:
# l1 norm (Manhattan distance) - less sensitive to outliers
torch.abs(u).sum()


tensor(7.)

In [45]:
# Frobenius norm (l2 norm of matrix-shaped vector)
torch.norm(torch.ones((4, 9)))

tensor(6.)