In [1]:
import torch
print(torch.__version__) 

2.7.0+cpu


<p style="font-family:ComicSansMS; font-size: 30px; color: magenta"> 2.3. Linear Algebra</p>

<p style="font-family:ComicSansMS; font-size: 24px; color: orange"> 2.3.1. Scalars</p>

In [2]:
# The symbol (pronounced “in”) denotes membership in a set. 
# For example, indicates that and are variables that can only take values or.
x = torch.tensor(3.0)
y = torch.tensor(2.0)

x + y, x * y, x / y, x**y

(tensor(5.), tensor(6.), tensor(1.5000), tensor(9.))

<p style="font-family:ComicSansMS; font-size: 24px; color: orange"> 2.3.2. Vectors</p>

In [3]:
x = torch.arange(3)
x

tensor([0, 1, 2])

In [4]:
x[2]

tensor(2)

In [5]:
# To indicate that a vector contains elements, we write. Formally, we call the dimensionality of the vector.
len(x)

3

In [6]:
x.shape

torch.Size([3])

<p style="font-family:ComicSansMS; font-size: 24px; color: orange"> 2.3.3. Matrices</p>

In [7]:
A = torch.arange(6).reshape(3, 2)
A

tensor([[0, 1],
        [2, 3],
        [4, 5]])

In [8]:
# When we exchange a matrix’s rows and columns, the result is called its transpose.
A.T

tensor([[0, 2, 4],
        [1, 3, 5]])

In [9]:
# Symmetric matrices are the subset of square matrices that are equal to their own transposes.
A = torch.tensor([[1, 2, 3], [2, 0, 4], [3, 4, 5]])
A == A.T

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

<p style="font-family:ComicSansMS; font-size: 24px; color: orange"> 2.3.4. Tensors</p>

In [10]:
torch.arange(24).reshape(2, 3, 4)

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

<p style="font-family:ComicSansMS; font-size: 24px; color: orange"> 2.3.5. Basic Properties of Tensor Arithmetic</p>

In [11]:
A = torch.arange(6, dtype=torch.float32).reshape(2, 3)
B = A.clone()  # Assign a copy of A to B by allocating new memory
A, A + B

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 tensor([[ 0.,  2.,  4.],
         [ 6.,  8., 10.]]))

In [13]:
# The elementwise product of two matrices is called their Hadamard product (denoted).
A

tensor([[0., 1., 2.],
        [3., 4., 5.]])

In [14]:
B

tensor([[0., 1., 2.],
        [3., 4., 5.]])

In [12]:
A * B

tensor([[ 0.,  1.,  4.],
        [ 9., 16., 25.]])

In [15]:
# Adding or multiplying a scalar and a tensor produces a result with the same shape as the original tensor.
a = 2
X = torch.arange(24).reshape(2, 3, 4)
a + X, (a * X).shape

(tensor([[[ 2,  3,  4,  5],
          [ 6,  7,  8,  9],
          [10, 11, 12, 13]],
 
         [[14, 15, 16, 17],
          [18, 19, 20, 21],
          [22, 23, 24, 25]]]),
 torch.Size([2, 3, 4]))

<p style="font-family:ComicSansMS; font-size: 24px; color: orange"> 2.3.6. Reduction</p>

In [18]:
x = torch.arange(3, dtype=torch.float32)
x, x.sum()

(tensor([0., 1., 2.]), tensor(3.))

In [19]:
A.shape, A.sum()
# tensor([[0., 1., 2.],
#         [3., 4., 5.]])

(torch.Size([2, 3]), tensor(15.))

In [20]:
# To sum over all elements along the rows (axis 0), we specify axis=0 in sum
A.shape, A.sum(axis=0).shape

(torch.Size([2, 3]), torch.Size([3]))

In [21]:
# Specifying axis=1 will reduce the column dimension (axis 1) by summing up elements of all the columns.
A.shape, A.sum(axis=1).shape

(torch.Size([2, 3]), torch.Size([2]))

In [22]:
A.sum(axis=[0, 1]) == A.sum()  # Same as A.sum()

tensor(True)

In [23]:
# A related quantity is the mean, also called the average.
A.mean(), A.sum() / A.numel()

(tensor(2.5000), tensor(2.5000))

In [24]:
# Likewise, the function for calculating the mean can also reduce a tensor along specific axes.
A.mean(axis=0), A.sum(axis=0) / A.shape[0]

(tensor([1.5000, 2.5000, 3.5000]), tensor([1.5000, 2.5000, 3.5000]))

<p style="font-family:ComicSansMS; font-size: 24px; color: orange"> 2.3.7. Non-Reduction Sum</p>

In [25]:
sum_A = A.sum(axis=1, keepdims=True)
sum_A, sum_A.shape

(tensor([[ 3.],
         [12.]]),
 torch.Size([2, 1]))

In [26]:
A / sum_A

tensor([[0.0000, 0.3333, 0.6667],
        [0.2500, 0.3333, 0.4167]])

In [27]:
# If we want to calculate the cumulative sum of elements of A along some axis, say axis=0 (row by row), 
# we can call the cumsum function.
A.cumsum(axis=0)

tensor([[0., 1., 2.],
        [3., 5., 7.]])

<p style="font-family:ComicSansMS; font-size: 24px; color: orange"> 2.3.8. Dot Products</p>

In [28]:
y = torch.ones(3, dtype = torch.float32)
x, y, torch.dot(x, y)

(tensor([0., 1., 2.]), tensor([1., 1., 1.]), tensor(3.))

In [None]:
# Equivalently, we can calculate the dot product of two vectors 
# by performing an elementwise multiplication followed by a sum:
torch.sum(x * y)

tensor(3.)

<p style="font-family:ComicSansMS; font-size: 24px; color: orange"> 2.3.9. Matrix–Vector Products</p>

In [32]:
A.shape, x.shape, torch.mv(A, x), A@x
# To express a matrix–vector product in code, we use the mv function.

# Python has a convenience operator @ that can execute both matrix–vector 
# and matrix–matrix products (depending on its arguments).


(torch.Size([2, 3]), torch.Size([3]), tensor([ 5., 14.]), tensor([ 5., 14.]))

<p style="font-family:ComicSansMS; font-size: 24px; color: orange"> 2.3.10. Matrix–Matrix Multiplication</p>

In [33]:
B = torch.ones(3, 4)
torch.mm(A, B), A@B

(tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]),
 tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]))

<p style="font-family:ComicSansMS; font-size: 24px; color: orange"> 2.3.11. Norms</p>

In [34]:
u = torch.tensor([3.0, -4.0])
torch.norm(u)

tensor(5.)

In [35]:
torch.abs(u).sum()

tensor(7.)

<p style="font-family:ComicSansMS; font-size: 24px; color: orange"> 2.3.12. Discussion</p>

> Scalars, vectors, matrices, and tensors are the basic mathematical objects used in linear algebra and have zero, one, two, and an arbitrary number of axes, respectively.

> Tensors can be sliced or reduced along specified axes via indexing, or operations such as sum and mean, respectively.

> Elementwise products are called Hadamard products. By contrast, dot products, matrix–vector products, and matrix–matrix products are not elementwise operations and in general return objects having shapes that are different from the the operands.

> Norms capture various notions of the magnitude of a vector (or matrix), and are commonly applied to the difference of two vectors to measure their distance apart.