# Dive into Deep Learning (DDL): Ch. 2.3-2.5

## Linear Algebra (2.3)

In [1]:
import torch

In [2]:
#Scalars
x = torch.tensor(3.0)
y = torch.tensor(2.0)

x + y, x * y, x / y, x**y

(tensor(5.), tensor(6.), tensor(1.5000), tensor(9.))

In [5]:
x = torch.arange(3) #Vetor
print(x)
print(x[2])
print(len(x)) #Dimensionalty of the vector

#order: number of axes, dimensionality: the number of components along a particular axis.
x.shape

tensor([0, 1, 2])
tensor(2)
3


torch.Size([3])

In [8]:
A = torch.arange(6).reshape(3, 2)
A #2nd order tensor with shape (m rows, n columns)

tensor([[0, 1],
        [2, 3],
        [4, 5]])

In [9]:
A.T #Transpose

tensor([[0, 2, 4],
        [1, 3, 5]])

In [10]:
A = torch.tensor([[1, 2, 3], [2, 0, 4], [3, 4, 5]])
A == A.T #Symmmetric matrix

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

In [11]:
#Tensors for nth order arrays, arbritrary num of axes
torch.arange(24).reshape(2, 3, 4)

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

In [13]:
#Elementwise addition
A = torch.arange(6, dtype=torch.float32).reshape(2, 3)
B = A.clone()  # Assign a copy of A to B by allocating new memory
A, A + B

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 tensor([[ 0.,  2.,  4.],
         [ 6.,  8., 10.]]))

In [14]:
#elementwise product of two matrices (Hadamard product)
A * B

tensor([[ 0.,  1.,  4.],
        [ 9., 16., 25.]])

In [15]:
#Adding or multiplying a scalar and a tensor: each element of the tensor is added to (or multiplied by) the scalar

a = 2
X = torch.arange(24).reshape(2, 3, 4)
a + X, (a * X).shape

(tensor([[[ 2,  3,  4,  5],
          [ 6,  7,  8,  9],
          [10, 11, 12, 13]],
 
         [[14, 15, 16, 17],
          [18, 19, 20, 21],
          [22, 23, 24, 25]]]),
 torch.Size([2, 3, 4]))

In [17]:
x = torch.arange(3, dtype=torch.float32)
x, x.sum() #Sum of tensor's elements

(torch.Size([2, 3]), tensor(15.))

In [19]:
A, A.shape, A.sum()

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 torch.Size([2, 3]),
 tensor(15.))

In [24]:
print(A.sum(axis=0), A.sum(axis=0).shape) #Specify axis to reduce the tensor along the rows (axis 0)
print(A.shape, A.sum(axis=1).shape) #Specify axis 1 to reduce column dimension (axis 1)
print(A.sum(axis=[0, 1]) == A.sum())  # Same as A.sum()

tensor([3., 5., 7.]) torch.Size([3])
torch.Size([2, 3]) torch.Size([2])
tensor(True)


In [26]:
A.mean(), A.sum() / A.numel() #Mean
A.mean(axis=0), A.sum(axis=0) / A.shape[0] #Reduce a tensor along the rows (axis 0)

(tensor([1.5000, 2.5000, 3.5000]), tensor([1.5000, 2.5000, 3.5000]))

In [29]:
#Non-reduction sum
sum_A = A.sum(axis=1)
print(sum_A, sum_A.shape)

sum_A = A.sum(axis=1, keepdims=True)
print(sum_A, sum_A.shape)

print(A / sum_A) #Useful for brodcasting, A / sum of each row makes each row sum to 1

tensor([ 3., 12.]) torch.Size([2])
tensor([[ 3.],
        [12.]]) torch.Size([2, 1])
tensor([[0.0000, 0.3333, 0.6667],
        [0.2500, 0.3333, 0.4167]])


In [30]:
#Cumulative sum across an axis (across row when axis=0)
A.cumsum(axis=0)

tensor([[0., 1., 2.],
        [3., 5., 7.]])

In [34]:
y = torch.ones(3, dtype = torch.float32)
print(x, y, torch.dot(x, y)) #Dot product, inner product, sum over products of the elements at the same position
print(torch.sum(x * y)) #Same as elementwise mult followed by sum

tensor([0., 1., 2.]) tensor([1., 1., 1.]) tensor(3.)
tensor(3.)


In [35]:
A.shape, x.shape, torch.mv(A, x), A@x #Matrix vector multiplication

(torch.Size([2, 3]), torch.Size([3]), tensor([ 5., 14.]), tensor([ 5., 14.]))

In [38]:
B = torch.ones(3, 4)
A, B, torch.mm(A, B), A@B #Matrix Matrix Mult

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]),
 tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]))

In [43]:
#Norm of a vector = how big it is (magnititue of a vector's componenets)
u = torch.tensor([3.0, -4.0])
torch.norm(u) #L2 norm is Euclidian length

tensor(5.)

In [44]:
torch.norm(torch.ones((4, 9))) #Frobenius norm (similar to l2 norm of matrix shaped vector, square root of sum of squares of a matrix's elems)

tensor(6.)

## Automatic Differentation (2.5)

In [50]:
#2.4 Calculus Content: 

# derivative is the instantaneous rate of change of f(x) with respect to x
# slope of a function at a particular location.

# concatenating partial derivatives of a multivariate function with respect to all its variables gets a vector that is called the Gradient of the function
# Gradient difficult to calculate because we are workignw ith deeply nested dfuncigons, but we can use chain rule

#evaluating the gradient (of y in terms of x) requires computing a "vector–matrix product" (between a matrix A that contains the derivates of vector u in terms of vector x with the derivate of y in terms of vector u)

In [51]:
# automatic differentiation (autograd): As we pass data through each successive function, the framework builds a computational graph that tracks how each value depends on others. 
# To calculate derivatives, automatic differentiation works backwards through this graph applying the chain rule. 
# The computational algorithm for applying the chain rule in this fashion is called backpropagation.

tensor([0., 1., 2., 3.])

In [60]:
x = torch.arange(4.0, requires_grad=True) #same as x.requires_grad_(True)
x, x.grad # The gradient is None by default

(tensor([0., 1., 2., 3.], requires_grad=True), None)

In [61]:
#Calculate f(x)= 2 xT x and store it in y 
y = 2 * torch.dot(x, x)
y

tensor(28., grad_fn=<MulBackward0>)

In [62]:
y.backward() #take the gradient of y in terms of x
print(x.grad) #access the gradient (an attribute of x)
print(x.grad == 4 * x) #automatic gradient and expected result (4x being gradient) are identical

tensor([ 0.,  4.,  8., 12.])
tensor([True, True, True, True])


In [64]:
x.grad.zero_()  # Reset the gradient
y = x.sum()
print(y)
y.backward()
print(x.grad)

tensor(6., grad_fn=<SumBackward0>)
tensor([1., 1., 1., 1.])


In [65]:
x.grad.zero_()
y = x * x
print(y)

tensor([0., 1., 4., 9.], grad_fn=<MulBackward0>)
