In [1]:
# Importing necessary libraries
import numpy as np
import torch

In [2]:
# Displaying matrices
def display_matrix(matrix, name):
    print(f"{name}:")
    print(matrix)
    print()

# Find rank of square matrix

Rank of a matrix is the maximum number of linearly independent columns (or rows). It is same as the dimension of the vector space generated by its columns (or rows). The rank of a matrix cannot exceed the number of its rows or columns.

In [3]:
square_matrix_1 = np.array([[1, 2, 3], [2, 4, 6], [3, 6, 9]])
square_matrix_2 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
square_matrix_3 = np.array([[1, 0, 0], [0, 2, 0], [0, 0, 3]])

rank_square_matrix_1 = np.linalg.matrix_rank(square_matrix_1)
rank_square_matrix_2 = np.linalg.matrix_rank(square_matrix_2)
rank_square_matrix_3 = np.linalg.matrix_rank(square_matrix_3)

display_matrix(square_matrix_1, "square_matrix_1")
print(f"Rank of square_matrix_1: {rank_square_matrix_1}\n")

display_matrix(square_matrix_2, "square_matrix_2")
print(f"Rank of square_matrix_2: {rank_square_matrix_2}\n")

display_matrix(square_matrix_3, "square_matrix_3")
print(f"Rank of square_matrix_3: {rank_square_matrix_3}\n")

square_matrix_1:
[[1 2 3]
 [2 4 6]
 [3 6 9]]

Rank of square_matrix_1: 1

square_matrix_2:
[[1 2 3]
 [4 5 6]
 [7 8 9]]

Rank of square_matrix_2: 2

square_matrix_3:
[[1 0 0]
 [0 2 0]
 [0 0 3]]

Rank of square_matrix_3: 3



# Rank of non-square matrices

In [4]:
non_square_matrix = np.array([[1, 2, 3], [4, 5, 6]])
rank_non_square_matrix = np.linalg.matrix_rank(non_square_matrix)

display_matrix(non_square_matrix, "Non-Square (2x3)")
print(f"Rank of Non-Square Matrix: {rank_non_square_matrix}\n")

Non-Square (2x3):
[[1 2 3]
 [4 5 6]]

Rank of Non-Square Matrix: 2



# Create 1 rank, 2 rank, 3 rank 3x3 matrices (nk @ kn)

rank(AB) <= min(rank(A), rank(B))

In [5]:
A = np.array([[1], [2], [3]])   # 3x1 matrix, rank=1
B = np.array([[4, 5, 6]])       # 1x3 matrix, rank=1
C = A @ B                       # 3x3 matrix, rank=1

display_matrix(A, "A")
display_matrix(B, "B")
display_matrix(C, "C")
print(f"Rank of A: {np.linalg.matrix_rank(A)}")
print(f"Rank of B: {np.linalg.matrix_rank(B)}")
print(f"Rank of C: {np.linalg.matrix_rank(C)}")

A:
[[1]
 [2]
 [3]]

B:
[[4 5 6]]

C:
[[ 4  5  6]
 [ 8 10 12]
 [12 15 18]]

Rank of A: 1
Rank of B: 1
Rank of C: 1


In [6]:
A = np.array([[1, 2], [3, 4], [5, 6]])          # 3x2 matrix, rank 2
B = np.array([[7, 8, 9, 10], [11, 12, 13, 14]]) # 2x4 matrix, rank 2
C = A @ B                                       # 3x4 matrix, rank 2

display_matrix(A, "A")
display_matrix(B, "B")
display_matrix(C, "C")

print(f"Rank of A: {np.linalg.matrix_rank(A)}")
print(f"Rank of B: {np.linalg.matrix_rank(B)}")
print(f"Rank of C: {np.linalg.matrix_rank(C)}")

A:
[[1 2]
 [3 4]
 [5 6]]

B:
[[ 7  8  9 10]
 [11 12 13 14]]

C:
[[ 29  32  35  38]
 [ 65  72  79  86]
 [101 112 123 134]]

Rank of A: 2
Rank of B: 2
Rank of C: 2


In [7]:
A = np.array([[1, 2, 3], [5, 5, 6], [7, 6, 5]])             # 3x3 matrix, rank=3
B = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9,10,11,12]])    # 3x4 matrix, rank=2
C = A @ B                                                   # 3x4 matrix, rank=2

display_matrix(A, "A")
display_matrix(B, "B")
display_matrix(C, "C")

print(f"Rank of A: {np.linalg.matrix_rank(A)}")
print(f"Rank of B: {np.linalg.matrix_rank(B)}")
print(f"Rank of C: {np.linalg.matrix_rank(C)}")

A:
[[1 2 3]
 [5 5 6]
 [7 6 5]]

B:
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]

C:
[[ 38  44  50  56]
 [ 84 100 116 132]
 [ 82 100 118 136]]

Rank of A: 3
Rank of B: 2
Rank of C: 2


In [19]:
A = np.array([[1, 2, 3], [5, 5, 6], [7, 6, 5]])         # 3x3 matrix, rank=3
B = np.array([[2, 2, 0, 0], [7, 9, 8, 14], [3,4,2, 8]]) # 3x4 matrix, rank=2
C = A @ B                                               # 3x4 matrix, rank=2

display_matrix(A, "A")
display_matrix(B, "B")
display_matrix(C, "C")

print(f"Rank of A: {np.linalg.matrix_rank(A)}")
print(f"Rank of B: {np.linalg.matrix_rank(B)}")
print(f"Rank of C: {np.linalg.matrix_rank(C)}")

A:
[[1 2 3]
 [5 5 6]
 [7 6 5]]

B:
[[ 2  2  0  0]
 [ 7  9  8 14]
 [ 3  4  2  8]]

C:
[[ 25  32  22  52]
 [ 63  79  52 118]
 [ 71  88  58 124]]

Rank of A: 3
Rank of B: 3
Rank of C: 3


# Finding inverse

In [9]:
# Find inverse using linalg.inv

A = np.array([[3, 5, 2], [4, 7, 6], [4, 6, 9]])
B = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

A_inverse = np.linalg.inv(A)
B_inverse = np.linalg.inv(B)

display_matrix(A, "A")
display_matrix(B, "B")

print(f"Rank of A: {np.linalg.matrix_rank(A)}")
print(f"Rank of B: {np.linalg.matrix_rank(B)}\n")

display_matrix(A_inverse, "A inverse using linalg.inv")
display_matrix(B_inverse, "B inverse using linalg.inv")

A:
[[3 5 2]
 [4 7 6]
 [4 6 9]]

B:
[[1 2 3]
 [4 5 6]
 [7 8 9]]

Rank of A: 3
Rank of B: 2

A inverse using linalg.inv:
[[ 2.07692308 -2.53846154  1.23076923]
 [-0.92307692  1.46153846 -0.76923077]
 [-0.30769231  0.15384615  0.07692308]]

B inverse using linalg.inv:
[[ 3.15251974e+15 -6.30503948e+15  3.15251974e+15]
 [-6.30503948e+15  1.26100790e+16 -6.30503948e+15]
 [ 3.15251974e+15 -6.30503948e+15  3.15251974e+15]]



In [10]:
# Find inverse using linalg.solve
# Ax = B, where A is the original matrix, x is the unknown matrix, and B is the identity matrix
I = np.eye(3)
A_inverse_solve = np.linalg.solve(A, I)
B_inverse_solve = np.linalg.solve(B, I)

display_matrix(A_inverse_solve, "A_inverse using linalg.solve")
display_matrix(B_inverse_solve, "B_inverse using linalg.solve")

A_inverse using linalg.solve:
[[ 2.07692308 -2.53846154  1.23076923]
 [-0.92307692  1.46153846 -0.76923077]
 [-0.30769231  0.15384615  0.07692308]]

B_inverse using linalg.solve:
[[ 3.15251974e+15 -6.30503948e+15  3.15251974e+15]
 [-6.30503948e+15  1.26100790e+16 -6.30503948e+15]
 [ 3.15251974e+15 -6.30503948e+15  3.15251974e+15]]



# Pseudoinverse (Moore-Penrose Inverse)

The pseudoinverse, often denoted as $A^+$, is a generalization of the matrix inverse for non-square matrices (overdetermined or underdetermined systems of linear equations) or matrices that do not have a unique inverse due to singularity.

For a matrix $A$, the pseudoinverse $A^+$ is defined as follows:

1. **If $A$ is a square, invertible matrix:**
   $$A^{+} =  A^{-1}$$

2. **If $A$ is a rectangular matrix with more rows than columns (tall matrix, overdetermined system):**
   $$ A^+ = (A^T A)^{-1} A^T $$

3. **If $A$ is a rectangular matrix with more columns than rows (wide matrix, underdetermined system):**
   $$ A^+ = A^T (A A^T)^{-1} $$

The pseudoinverse satisfies the following conditions:

- If $A$ has an exact inverse $A^{-1}$, then $A^+ = A^{-1}$.
- If $A$ is a singular matrix (non-invertible), $A^+$ is a generalization that allows for the solution of linear systems in an approximate sense.
- It minimizes the sum of squared differences between the estimated solution and the actual solution in the least squares sense.

Numerical methods, such as Singular Value Decomposition (SVD), are often used to compute the pseudoinverse of a matrix. The pseudoinverse is widely used in applications such as solving linear systems, least squares problems, and in machine learning algorithms.


In [11]:
# Pseudo-inverse of non-square matrix using pinv
non_square_matrix = np.array([[1, 2, 3], [4, 5, 6]])
pinv_matrix = np.linalg.pinv(non_square_matrix)
display_matrix(pinv_matrix, "Pseudo-Inverse of Non-Square Matrix")

Pseudo-Inverse of Non-Square Matrix:
[[-0.94444444  0.44444444]
 [-0.11111111  0.11111111]
 [ 0.72222222 -0.22222222]]



In [12]:
non_square_matrix = np.array([[1, 2], [3, 4], [5, 6]])
pinv_matrix = np.linalg.pinv(non_square_matrix)
display_matrix(pinv_matrix, "Pseudo-Inverse of Non-Square Matrix")

Pseudo-Inverse of Non-Square Matrix:
[[-1.33333333 -0.33333333  0.66666667]
 [ 1.08333333  0.33333333 -0.41666667]]



# Differentiation in Torch

In [18]:
# here, 'in' and 'out' is refered to the input variable (x) and output variable (y).

# Scalar in -> Scalar out
x_scalar = torch.tensor(2.0, requires_grad=True)
y_scalar = x_scalar**2
y_scalar.backward()
print("Scalar Input -> Scalar Output:")
print("x: ", x_scalar)
print("y: ", y_scalar)
print(f"Derivative dy/dx at x = {x_scalar.item()}: {x_scalar.grad.item()}\n")

# Vector in -> Scalar out
x_vector = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y_scalar = torch.sum(x_vector**2)
y_scalar.backward()
print("Vector Input -> Scalar Output:")
print("x: ", x_vector)
print("y: ", y_scalar)
print(f"Derivative dy/dx at x = {x_vector.data.numpy()}: {x_vector.grad.data.numpy()}\n")

# Vector in -> Vector out
x_vector_out = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y_vector_out = x_vector_out**2
y_vector_out.sum().backward()
print("Vector Input -> Vector Output:")
print("x: ", x_vector_out)
print("y: ", y_vector_out)
print(f"Derivative dy/dx at x = {x_vector_out.data.numpy()}: {x_vector_out.grad.data.numpy()}")

Scalar Input -> Scalar Output:
x:  tensor(2., requires_grad=True)
y:  tensor(4., grad_fn=<PowBackward0>)
Derivative dy/dx at x = 2.0: 4.0

Vector Input -> Scalar Output:
x:  tensor([1., 2., 3.], requires_grad=True)
y:  tensor(14., grad_fn=<SumBackward0>)
Derivative dy/dx at x = [1. 2. 3.]: [2. 4. 6.]

Vector Input -> Vector Output:
x:  tensor([1., 2., 3.], requires_grad=True)
y:  tensor([1., 4., 9.], grad_fn=<PowBackward0>)
Derivative dy/dx at x = [1. 2. 3.]: [2. 4. 6.]
