In [7]:
import numpy as np
import pandas as pd
from typing import Tuple

pd.set_option('display.precision', 12)  # Increase decimal precision
pd.set_option('display.width', 300)     # Wider display
pd.set_option('display.max_columns', None)  # Show all column


In [8]:
def read_matrix(path: str) -> pd.DataFrame:
    """
    Reads a whitespace-delimited matrix from a text file (allowing decimals
    or integer fractions) and returns it as a pandas DataFrame of floats.
    """
    data = []
    with open(path, 'r') as f:
        for lineno, line in enumerate(f, start=1):
            tokens = line.strip().split()
            if not tokens:
                continue
            row = []
            for tok in tokens:
                try:
                    if tok.count('/') == 1:
                        num, den = tok.split('/')
                        val = int(num) / int(den)
                    else:
                        val = float(tok)
                except Exception as e:
                    raise ValueError(
                        f"Error parsing token '{tok}' on line {lineno} in {path}: {e}"
                    )
                row.append(val)
            data.append(row)

    if not data:
        # empty file → empty DataFrame
        return pd.DataFrame()

    # ensure all rows have same length
    ncol = len(data[0])
    if any(len(r) != ncol for r in data):
        lengths = [len(r) for r in data]
        raise ValueError(
            f"Row-length mismatch in {path}: expected {ncol} columns, got {lengths}"
        )

    return pd.DataFrame(data)


def print_matrix(df: pd.DataFrame, float_format: str = '{:,.9f}') -> None:
    """
    Nicely prints the DataFrame without row/col labels, formatting floats.
    """
    print(
        df.to_string(
            index=False,
            header=False,
            float_format=float_format.format
        )
    )

In [9]:
# input
A_df = read_matrix("SVD_input_A.txt")
print_matrix(A_df)

 1.000000000 4.000000000 -4.000000000
 5.000000000 1.000000000  5.000000000
-2.000000000 2.000000000  2.000000000
 3.000000000 7.000000000 -1.000000000


# Khai triển kỳ dị SVD

## Thuật toán

### 1. ComputeSVD Algorithm

**Input**  
- A: real matrix of size m×n  
- tol: tolerance threshold for singular values  

**Output**  
- U: m×m orthonormal matrix  
- Σ: m×n diagonal matrix of singular values  
- Vᵀ: n×n orthonormal matrix  

---

**Steps**

1. **Determine dimensions**  
   - m, n ← shape(A)

2. **Choose decomposition route**  
   - **If m ≤ n**:  
     1. M ← A · Aᵀ   (size m×m)  
     2. Eigendecompose M: M = U · Λ · Uᵀ  
     3. For i = 1 … m:  
        - σᵢ ← √λᵢ  
     4. Build Σ (m×n) with Σᵢᵢ = σᵢ  
     5. For each i where σᵢ > tol:  
        - vᵢ ← (Aᵀ · uᵢ) / σᵢ  
     6. Assemble V = [v₁ … vₖ] (k = rank candidate).  
        - If k < m, keep the first k vectors and extend V to n×n via orthonormal completion  
   - **Else (m > n)**:  
     1. N ← Aᵀ · A   (size n×n)  
     2. Eigendecompose N: N = V · Λ · Vᵀ  
     3. For j = 1 … n:  
        - σⱼ ← √λⱼ  
     4. Build Σ (m×n) with Σⱼⱼ = σⱼ  
     5. For each j where σⱼ > tol:  
        - uⱼ ← (A · vⱼ) / σⱼ  
     6. Assemble U = [u₁ … uₖ] (k = rank candidate).  
        - If k < n, keep the first k vectors and extend U to m×m via orthonormal completion  

3. **Compute numerical rank**  
   - r ← number of singular values σᵢ > tol  

4. **Zero-pad and complete**  
   - If r < min(m,n):  
     - Zero-pad Σ so it remains m×n  
     - Extend the kept columns of U and V to full orthonormal bases  

5. **Return**  
   - U (m×m), Σ (m×n), Vᵀ (n×n)


In [10]:
def complete_orthonormal_basis(B: np.ndarray, tol: float = 1e-12) -> np.ndarray:
    """
    Given B (n×k) with orthonormal columns, extend to a full n×n orthonormal basis.
    """
    n, k = B.shape
    Q = np.zeros((n, n))
    Q[:, :k] = B
    for j in range(k, n):
        # start with a random vector
        v = np.random.rand(n)
        # orthonormalize against existing columns
        for i in range(j):
            v -= np.dot(Q[:, i], v) * Q[:, i]
        norm = np.linalg.norm(v)
        # if vector is nearly zero, retry
        while norm < tol:
            v = np.random.rand(n)
            for i in range(j):
                v -= np.dot(Q[:, i], v) * Q[:, i]
            norm = np.linalg.norm(v)
        Q[:, j] = v / norm
    return Q

In [11]:
def compute_svd(A_df: pd.DataFrame, tol: float = 1e-10) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Compute the SVD of A (m×n) via the smaller of A·Aᵀ or Aᵀ·A.
    Prints intermediate steps using pandas formatting.

    Returns:
        U_df: m×m DataFrame of left singular vectors
        Sigma_df: m×n DataFrame of singular values on the diagonal
        Vt_df: n×n DataFrame of right singular vectors transposed
    """
    # Convert to NumPy
    A = A_df.values.astype(float)
    m, n = A.shape

    print("--- Input matrix A ---")
    print_matrix(A_df)

    # Choose smaller route
    if True:
        print("Route: m <= n, use M = A · Aᵀ ({}×{})".format(m, m))
        M = A.dot(A.T)
        M_df = pd.DataFrame(M)
        print("Matrix M:")
        print_matrix(M_df)

        # Eigen-decomposition of M
        eigvals, eigvecs = np.linalg.eigh(M)
        # sort descending
        idx = np.argsort(eigvals)[::-1]
        eigvals = eigvals[idx]
        eigvecs = eigvecs[:, idx]

        # Print eigenpairs
        for i, lam in enumerate(eigvals, start=1):
            print(f"Eigenvalue λ{i} = {lam}")
            print(f"Eigenvector u{i}:")
            print_matrix(pd.DataFrame(eigvecs[:, i-1]))

        # Singular values
        sigmas = np.sqrt(np.clip(eigvals, 0, None))
        print("Singular values:")
        for i, s in enumerate(sigmas, start=1):
            print(f"σ{i} = {s}")

        # Build Σ
        Sigma = np.zeros((m, n))
        for i in range(min(m, n)):
            Sigma[i, i] = sigmas[i]
        Sigma_df = pd.DataFrame(Sigma)
        print("Sigma matrix Σ:")
        print_matrix(Sigma_df)

        # Compute right singular vectors V
        V = np.zeros((n, n))
        for i in range(min(m, n)):
            if sigmas[i] > tol:
                v = A.T.dot(eigvecs[:, i]) / sigmas[i]
            else:
                v = np.zeros(n)
            V[:, i] = v
            print(f"Right singular vector v{i+1}:")
            print_matrix(pd.DataFrame(v))

        # Complete V to full orthonormal basis
        V_complete = complete_orthonormal_basis(V[:, :m], tol)
        Vt_df = pd.DataFrame(V_complete.T)

        # U is full eigenvector matrix of M
        U_complete = eigvecs
        U_df = pd.DataFrame(U_complete)

    return U_df, Sigma_df, Vt_df


In [12]:
def compute_svd_2(A_df: pd.DataFrame, tol: float = 1e-10) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Compute the SVD of A (m×n) via the smaller of Aᵀ·A.
    Prints intermediate steps using pandas formatting.

    Returns:
        U_df: m×m DataFrame of left singular vectors
        Sigma_df: m×n DataFrame of singular values on the diagonal
        Vt_df: n×n DataFrame of right singular vectors transposed
    """
    # Convert to NumPy
    A = A_df.values.astype(float)
    m, n = A.shape

    print("--- Input matrix A ---")
    print_matrix(A_df)

    # Choose smaller route
    if True:
        print("Route: m > n, use N = Aᵀ · A ({}×{})".format(n, n))
        N = A.T.dot(A)
        N_df = pd.DataFrame(N)
        print("Matrix N:")
        print_matrix(N_df)

        # Eigen-decomposition of N
        eigvals, eigvecs = np.linalg.eigh(N)
        idx = np.argsort(eigvals)[::-1]
        eigvals = eigvals[idx]
        eigvecs = eigvecs[:, idx]

        for j, lam in enumerate(eigvals, start=1):
            print(f"Eigenvalue λ{j} = {lam}")
            print(f"Eigenvector v{j}:")
            print_matrix(pd.DataFrame(eigvecs[:, j-1]))

        sigmas = np.sqrt(np.clip(eigvals, 0, None))
        print("Singular values:")
        for j, s in enumerate(sigmas, start=1):
            print(f"σ{j} = {s}")

        Sigma = np.zeros((m, n))
        for j in range(min(m, n)):
            Sigma[j, j] = sigmas[j]
        Sigma_df = pd.DataFrame(Sigma)
        print("Sigma matrix Σ:")
        print_matrix(Sigma_df)

        # Compute left singular vectors U
        U = np.zeros((m, m))
        for j in range(min(m, n)):
            if sigmas[j] > tol:
                u = A.dot(eigvecs[:, j]) / sigmas[j]
            else:
                u = np.zeros(m)
            U[:, j] = u
            print(f"Left singular vector u{j+1}:")
            print_matrix(pd.DataFrame(u))

        # Complete U to full orthonormal basis
        U_complete = complete_orthonormal_basis(U[:, :n], tol)
        U_df = pd.DataFrame(U_complete)

        # V is full eigenvector matrix of N
        V_complete = eigvecs
        Vt_df = pd.DataFrame(V_complete.T)

    return U_df, Sigma_df, Vt_df


## Kết quả

In [13]:
U, S, Vt = compute_svd(A_df, tol=1e-9)
print("--- Final U ---")
print_matrix(U)
print("--- Final Σ ---")
print_matrix(S)
print("--- Final V^T ---")
print_matrix(Vt)

--- Input matrix A ---
 1.000000000 4.000000000 -4.000000000
 5.000000000 1.000000000  5.000000000
-2.000000000 2.000000000  2.000000000
 3.000000000 7.000000000 -1.000000000
Route: m <= n, use M = A · Aᵀ (4×4)
Matrix M:
 33.000000000 -11.000000000 -2.000000000 35.000000000
-11.000000000  51.000000000  2.000000000 17.000000000
 -2.000000000   2.000000000 12.000000000  6.000000000
 35.000000000  17.000000000  6.000000000 59.000000000
Eigenvalue λ1 = 85.59112945336764
Eigenvector u1:
-0.496092992
-0.252881824
-0.060930140
-0.828390032
Eigenvalue λ2 = 56.36503202891461
Eigenvector u2:
 0.443538952
-0.894252566
-0.058727717
 0.011687662
Eigenvalue λ3 = 13.043838517717784
Eigenvector u3:
 0.254793713
 0.186170881
-0.938461734
-0.140392807
Eigenvalue λ4 = 1.932101354361016e-14
Eigenvector u4:
-0.701601814
-0.318909915
-0.334855411
 0.542146856
Singular values:
σ1 = 9.25154740858888
σ2 = 7.507664885229935
σ3 = 3.611625467530899
σ4 = 1.390000487180136e-07
Sigma matrix Σ:
9.251547409 0.00000000

In [14]:
U, S, Vt = compute_svd_2(A_df, tol=1e-9)
print("--- Final U ---")
print_matrix(U)
print("--- Final Σ ---")
print_matrix(S)
print("--- Final V^T ---")
print_matrix(Vt)

--- Input matrix A ---
 1.000000000 4.000000000 -4.000000000
 5.000000000 1.000000000  5.000000000
-2.000000000 2.000000000  2.000000000
 3.000000000 7.000000000 -1.000000000
Route: m > n, use N = Aᵀ · A (3×3)
Matrix N:
39.000000000  26.000000000  14.000000000
26.000000000  70.000000000 -14.000000000
14.000000000 -14.000000000  46.000000000
Eigenvalue λ1 = 85.59112945336761
Eigenvector v1:
-0.445742938
-0.881781602
 0.154189622
Eigenvalue λ2 = 56.365032028914584
Eigenvector v2:
 0.516166547
-0.112453266
 0.849073824
Eigenvalue λ3 = 13.043838517717795
Eigenvector v3:
 0.731358550
-0.458056186
-0.505271414
Singular values:
σ1 = 9.25154740858888
σ2 = 7.507664885229933
σ3 = 3.6116254675309003
Sigma matrix Σ:
9.251547409 0.000000000 0.000000000
0.000000000 7.507664885 0.000000000
0.000000000 0.000000000 3.611625468
0.000000000 0.000000000 0.000000000
Left singular vector u1:
-0.496092992
-0.252881824
-0.060930140
-0.828390032
Left singular vector u2:
-0.443538952
 0.894252566
 0.058727717
-