# Tutorial 02: Matrices and Linear Transformations

Visualizing how matrices transform space.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection

plt.style.use('seaborn-v0_8-whitegrid')

## 1. Matrix as a Transformation

In [None]:
def visualize_transformation(A, title="Transformation"):
    """Visualize how matrix A transforms the unit square and basis vectors."""
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # Original space
    ax1 = axes[0]
    
    # Unit square
    square = np.array([[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]])
    ax1.plot(square[:, 0], square[:, 1], 'b-', linewidth=2, label='Unit square')
    ax1.fill(square[:-1, 0], square[:-1, 1], alpha=0.3, color='blue')
    
    # Basis vectors
    ax1.quiver(0, 0, 1, 0, angles='xy', scale_units='xy', scale=1, color='red', width=0.02, label='e1')
    ax1.quiver(0, 0, 0, 1, angles='xy', scale_units='xy', scale=1, color='green', width=0.02, label='e2')
    
    ax1.set_xlim(-0.5, 2)
    ax1.set_ylim(-0.5, 2)
    ax1.set_aspect('equal')
    ax1.axhline(y=0, color='k', linewidth=0.5)
    ax1.axvline(x=0, color='k', linewidth=0.5)
    ax1.set_title('Before Transformation')
    ax1.legend()
    
    # Transformed space
    ax2 = axes[1]
    
    # Transform square
    transformed_square = (A @ square.T).T
    ax2.plot(transformed_square[:, 0], transformed_square[:, 1], 'b-', linewidth=2)
    ax2.fill(transformed_square[:-1, 0], transformed_square[:-1, 1], alpha=0.3, color='blue')
    
    # Transformed basis vectors (columns of A)
    ax2.quiver(0, 0, A[0, 0], A[1, 0], angles='xy', scale_units='xy', scale=1, color='red', width=0.02, label='Ae1')
    ax2.quiver(0, 0, A[0, 1], A[1, 1], angles='xy', scale_units='xy', scale=1, color='green', width=0.02, label='Ae2')
    
    all_points = np.vstack([transformed_square, [[0, 0]]])
    max_val = np.abs(all_points).max() * 1.3
    ax2.set_xlim(-max_val, max_val)
    ax2.set_ylim(-max_val, max_val)
    ax2.set_aspect('equal')
    ax2.axhline(y=0, color='k', linewidth=0.5)
    ax2.axvline(x=0, color='k', linewidth=0.5)
    ax2.set_title(f'After Transformation\nA = {A.tolist()}')
    ax2.legend()
    
    plt.suptitle(title, fontsize=14)
    plt.tight_layout()
    plt.show()

In [None]:
# Scaling
A_scale = np.array([[2, 0], [0, 1.5]])
visualize_transformation(A_scale, "Scaling (2x horizontal, 1.5x vertical)")

In [None]:
# Rotation by 45 degrees
theta = np.pi / 4
A_rotate = np.array([[np.cos(theta), -np.sin(theta)], 
                     [np.sin(theta), np.cos(theta)]])
visualize_transformation(A_rotate, f"Rotation by {np.degrees(theta):.0f}°")

In [None]:
# Shear
A_shear = np.array([[1, 0.5], [0, 1]])
visualize_transformation(A_shear, "Shear (horizontal)")

In [None]:
# Reflection
A_reflect = np.array([[1, 0], [0, -1]])
visualize_transformation(A_reflect, "Reflection (across x-axis)")

In [None]:
# Projection (rank-deficient - collapses to line)
A_project = np.array([[1, 0], [0, 0]])
visualize_transformation(A_project, "Projection (onto x-axis) - Rank 1")

## 2. Matrix Multiplication = Composition

In [None]:
def visualize_composition(A, B, title="Composition"):
    """Show how AB represents applying B then A."""
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    # Original
    square = np.array([[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]])
    
    for ax in axes:
        ax.set_aspect('equal')
        ax.axhline(y=0, color='k', linewidth=0.5)
        ax.axvline(x=0, color='k', linewidth=0.5)
    
    # Plot 1: Original
    axes[0].fill(square[:-1, 0], square[:-1, 1], alpha=0.3, color='blue')
    axes[0].plot(square[:, 0], square[:, 1], 'b-', linewidth=2)
    axes[0].set_xlim(-2, 2)
    axes[0].set_ylim(-2, 2)
    axes[0].set_title('Original')
    
    # Plot 2: After B
    after_B = (B @ square.T).T
    axes[1].fill(after_B[:-1, 0], after_B[:-1, 1], alpha=0.3, color='green')
    axes[1].plot(after_B[:, 0], after_B[:, 1], 'g-', linewidth=2)
    max_val = np.abs(after_B).max() * 1.5
    axes[1].set_xlim(-max_val, max_val)
    axes[1].set_ylim(-max_val, max_val)
    axes[1].set_title(f'After B\nB = {B.tolist()}')
    
    # Plot 3: After A(B) = AB
    AB = A @ B
    after_AB = (AB @ square.T).T
    axes[2].fill(after_AB[:-1, 0], after_AB[:-1, 1], alpha=0.3, color='red')
    axes[2].plot(after_AB[:, 0], after_AB[:, 1], 'r-', linewidth=2)
    max_val = np.abs(after_AB).max() * 1.5
    axes[2].set_xlim(-max_val, max_val)
    axes[2].set_ylim(-max_val, max_val)
    axes[2].set_title(f'After A(B) = AB\nAB = {AB.tolist()}')
    
    plt.suptitle(title, fontsize=14)
    plt.tight_layout()
    plt.show()

# First scale, then rotate
scale = np.array([[2, 0], [0, 0.5]])
rotate = np.array([[np.cos(np.pi/4), -np.sin(np.pi/4)], 
                   [np.sin(np.pi/4), np.cos(np.pi/4)]])

visualize_composition(rotate, scale, "Scale then Rotate")

In [None]:
# Show that order matters: AB ≠ BA
visualize_composition(scale, rotate, "Rotate then Scale")

print("Scale @ Rotate =")
print(scale @ rotate)
print("\nRotate @ Scale =")
print(rotate @ scale)
print("\nAre they equal?", np.allclose(scale @ rotate, rotate @ scale))

## 3. The Column Space View

In [None]:
def visualize_column_space(A):
    """Show that Ax is a linear combination of columns of A."""
    fig, ax = plt.subplots(figsize=(10, 8))
    
    # Columns of A
    col1 = A[:, 0]
    col2 = A[:, 1]
    
    # Plot columns
    ax.quiver(0, 0, col1[0], col1[1], angles='xy', scale_units='xy', scale=1,
              color='blue', width=0.015, label=f'a1 (column 1) = {col1}')
    ax.quiver(0, 0, col2[0], col2[1], angles='xy', scale_units='xy', scale=1,
              color='red', width=0.015, label=f'a2 (column 2) = {col2}')
    
    # Choose a vector x
    x = np.array([1.5, 0.5])
    result = A @ x
    
    # Show result as combination
    ax.quiver(0, 0, x[0]*col1[0], x[0]*col1[1], angles='xy', scale_units='xy', scale=1,
              color='blue', width=0.008, alpha=0.5)
    ax.quiver(x[0]*col1[0], x[0]*col1[1], x[1]*col2[0], x[1]*col2[1], 
              angles='xy', scale_units='xy', scale=1,
              color='red', width=0.008, alpha=0.5)
    
    # Result vector
    ax.quiver(0, 0, result[0], result[1], angles='xy', scale_units='xy', scale=1,
              color='purple', width=0.02, label=f'Ax = {result}')
    
    max_val = max(np.abs([col1, col2, result]).max()) * 1.5
    ax.set_xlim(-max_val, max_val)
    ax.set_ylim(-max_val, max_val)
    ax.set_aspect('equal')
    ax.axhline(y=0, color='k', linewidth=0.5)
    ax.axvline(x=0, color='k', linewidth=0.5)
    ax.set_title(f'Ax = x1·a1 + x2·a2\nx = {x}\nAx = {x[0]}·{col1} + {x[1]}·{col2} = {result}')
    ax.legend()
    plt.show()

A = np.array([[2, 1], [1, 2]])
visualize_column_space(A)

## 4. Rank and Dimension Reduction

In [None]:
def visualize_rank_reduction():
    """Show how low-rank matrices reduce dimensionality."""
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    # Generate random points in 2D
    np.random.seed(42)
    points = np.random.randn(50, 2)
    
    # Full rank transformation
    A_full = np.array([[2, 0.5], [0.5, 1]])
    
    # Rank-1 transformation (projects to a line)
    A_rank1 = np.array([[1, 0.5], [2, 1]])  # Column 2 = 0.5 * Column 1
    
    # Plot original
    axes[0].scatter(points[:, 0], points[:, 1], alpha=0.6)
    axes[0].set_title(f'Original Points\nDimension: 2')
    axes[0].set_aspect('equal')
    axes[0].set_xlim(-4, 4)
    axes[0].set_ylim(-4, 4)
    
    # Plot full rank transformation
    transformed_full = (A_full @ points.T).T
    axes[1].scatter(transformed_full[:, 0], transformed_full[:, 1], alpha=0.6)
    axes[1].set_title(f'Full Rank (rank=2)\nStill 2D spread')
    axes[1].set_aspect('equal')
    max_val = np.abs(transformed_full).max() * 1.2
    axes[1].set_xlim(-max_val, max_val)
    axes[1].set_ylim(-max_val, max_val)
    
    # Plot rank-1 transformation
    transformed_rank1 = (A_rank1 @ points.T).T
    axes[2].scatter(transformed_rank1[:, 0], transformed_rank1[:, 1], alpha=0.6)
    axes[2].set_title(f'Rank 1\nCollapsed to 1D line!')
    axes[2].set_aspect('equal')
    max_val = np.abs(transformed_rank1).max() * 1.2
    axes[2].set_xlim(-max_val, max_val)
    axes[2].set_ylim(-max_val, max_val)
    
    plt.tight_layout()
    plt.show()
    
    print(f"Rank of A_full: {np.linalg.matrix_rank(A_full)}")
    print(f"Rank of A_rank1: {np.linalg.matrix_rank(A_rank1)}")

visualize_rank_reduction()

## 5. Inverse Matrix: Undoing a Transformation

In [None]:
def visualize_inverse(A):
    """Show how A^(-1) undoes transformation A."""
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    # Check if invertible
    det = np.linalg.det(A)
    if np.abs(det) < 1e-10:
        print("Matrix is not invertible!")
        return
    
    A_inv = np.linalg.inv(A)
    
    # Original square
    square = np.array([[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]])
    
    # Plot original
    axes[0].fill(square[:-1, 0], square[:-1, 1], alpha=0.3, color='blue')
    axes[0].plot(square[:, 0], square[:, 1], 'b-', linewidth=2)
    axes[0].set_title('Original')
    
    # After A
    after_A = (A @ square.T).T
    axes[1].fill(after_A[:-1, 0], after_A[:-1, 1], alpha=0.3, color='green')
    axes[1].plot(after_A[:, 0], after_A[:, 1], 'g-', linewidth=2)
    axes[1].set_title(f'After A\nA = {A.tolist()}')
    
    # After A^(-1)A = I
    after_inv = (A_inv @ after_A.T).T
    axes[2].fill(after_inv[:-1, 0], after_inv[:-1, 1], alpha=0.3, color='red')
    axes[2].plot(after_inv[:, 0], after_inv[:, 1], 'r-', linewidth=2)
    axes[2].set_title(f'After A⁻¹(Ax) = x\nA⁻¹ = {np.round(A_inv, 2).tolist()}')
    
    for ax in axes:
        ax.set_aspect('equal')
        ax.set_xlim(-2, 3)
        ax.set_ylim(-2, 3)
        ax.axhline(y=0, color='k', linewidth=0.5)
        ax.axvline(x=0, color='k', linewidth=0.5)
    
    plt.tight_layout()
    plt.show()

A = np.array([[2, 1], [0.5, 1.5]])
visualize_inverse(A)

## 6. Orthogonal Matrices: Length-Preserving Transformations

In [None]:
def visualize_orthogonal(Q):
    """Show that orthogonal matrices preserve lengths and angles."""
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    
    # Generate points on unit circle
    theta = np.linspace(0, 2*np.pi, 100)
    circle = np.vstack([np.cos(theta), np.sin(theta)])
    
    # Transform
    transformed = Q @ circle
    
    # Plot original
    axes[0].plot(circle[0], circle[1], 'b-', linewidth=2)
    axes[0].quiver(0, 0, 1, 0, angles='xy', scale_units='xy', scale=1, color='red', width=0.02)
    axes[0].quiver(0, 0, 0, 1, angles='xy', scale_units='xy', scale=1, color='green', width=0.02)
    axes[0].set_title('Original Unit Circle')
    
    # Plot transformed
    axes[1].plot(transformed[0], transformed[1], 'b-', linewidth=2)
    axes[1].quiver(0, 0, Q[0,0], Q[1,0], angles='xy', scale_units='xy', scale=1, color='red', width=0.02)
    axes[1].quiver(0, 0, Q[0,1], Q[1,1], angles='xy', scale_units='xy', scale=1, color='green', width=0.02)
    axes[1].set_title(f'After Orthogonal Q\nStill a circle!')
    
    for ax in axes:
        ax.set_aspect('equal')
        ax.set_xlim(-1.5, 1.5)
        ax.set_ylim(-1.5, 1.5)
        ax.axhline(y=0, color='k', linewidth=0.5)
        ax.axvline(x=0, color='k', linewidth=0.5)
    
    plt.tight_layout()
    plt.show()
    
    # Verify orthogonality
    print(f"Q^T Q = \n{np.round(Q.T @ Q, 6)}")
    print(f"\nIs orthogonal: {np.allclose(Q.T @ Q, np.eye(2))}")

# Rotation (orthogonal)
theta = np.pi / 3
Q = np.array([[np.cos(theta), -np.sin(theta)],
              [np.sin(theta), np.cos(theta)]])
visualize_orthogonal(Q)

## 7. Application: Neural Network Layer

In [None]:
def visualize_neural_layer():
    """Show how a neural network layer transforms data."""
    np.random.seed(42)
    
    # Generate clustered 2D data (2 classes)
    class1 = np.random.randn(50, 2) * 0.5 + np.array([1, 1])
    class2 = np.random.randn(50, 2) * 0.5 + np.array([-1, -1])
    X = np.vstack([class1, class2])
    labels = np.array([0]*50 + [1]*50)
    
    # Weight matrix (learned to separate classes)
    W = np.array([[1.5, -0.5], [-0.5, 1.5]])
    b = np.array([0.1, -0.1])
    
    # Forward pass
    Z = X @ W.T + b  # Linear transformation
    A = np.maximum(0, Z)  # ReLU activation
    
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    # Original
    axes[0].scatter(X[labels==0, 0], X[labels==0, 1], c='blue', label='Class 0', alpha=0.6)
    axes[0].scatter(X[labels==1, 0], X[labels==1, 1], c='red', label='Class 1', alpha=0.6)
    axes[0].set_title('Input Space')
    axes[0].legend()
    
    # After linear
    axes[1].scatter(Z[labels==0, 0], Z[labels==0, 1], c='blue', alpha=0.6)
    axes[1].scatter(Z[labels==1, 0], Z[labels==1, 1], c='red', alpha=0.6)
    axes[1].set_title(f'After Linear (Wx + b)\nW = {W.tolist()}')
    
    # After ReLU
    axes[2].scatter(A[labels==0, 0], A[labels==0, 1], c='blue', alpha=0.6)
    axes[2].scatter(A[labels==1, 0], A[labels==1, 1], c='red', alpha=0.6)
    axes[2].set_title('After ReLU\nNon-linearity folds space')
    
    for ax in axes:
        ax.set_aspect('equal')
        ax.axhline(y=0, color='k', linewidth=0.5)
        ax.axvline(x=0, color='k', linewidth=0.5)
    
    plt.tight_layout()
    plt.show()

visualize_neural_layer()

## 8. Covariance Matrix

In [None]:
def visualize_covariance():
    """Show how covariance matrix captures data spread."""
    np.random.seed(42)
    
    # Generate correlated data
    mean = [0, 0]
    cov = [[1, 0.8], [0.8, 1]]  # Positive correlation
    data = np.random.multivariate_normal(mean, cov, 500)
    
    # Compute sample covariance
    data_centered = data - data.mean(axis=0)
    sample_cov = (data_centered.T @ data_centered) / (len(data) - 1)
    
    # Eigendecomposition
    eigenvalues, eigenvectors = np.linalg.eig(sample_cov)
    
    fig, ax = plt.subplots(figsize=(10, 8))
    
    # Plot data
    ax.scatter(data[:, 0], data[:, 1], alpha=0.3, s=10)
    
    # Plot eigenvectors scaled by eigenvalues
    for i in range(2):
        vec = eigenvectors[:, i] * np.sqrt(eigenvalues[i]) * 2  # Scale for visibility
        ax.quiver(0, 0, vec[0], vec[1], angles='xy', scale_units='xy', scale=1,
                  color=['red', 'blue'][i], width=0.02,
                  label=f'PC{i+1}: λ={eigenvalues[i]:.2f}')
    
    ax.set_aspect('equal')
    ax.set_xlim(-4, 4)
    ax.set_ylim(-4, 4)
    ax.axhline(y=0, color='k', linewidth=0.5)
    ax.axvline(x=0, color='k', linewidth=0.5)
    ax.set_title('Data with Principal Components\nEigenvectors of Covariance Matrix')
    ax.legend()
    
    plt.show()
    
    print(f"Sample Covariance Matrix:\n{np.round(sample_cov, 3)}")
    print(f"\nEigenvalues: {np.round(eigenvalues, 3)}")

visualize_covariance()

## 9. Summary

In [None]:
print("""
KEY INSIGHTS ABOUT MATRICES
===========================

1. MATRICES = TRANSFORMATIONS
   - Every matrix transforms vectors
   - Columns show where basis vectors go

2. MATRIX MULTIPLICATION = COMPOSITION
   - AB means "first B, then A"
   - Order matters! AB ≠ BA

3. RANK = EFFECTIVE DIMENSION
   - Low rank = transformation loses dimensions
   - Used for compression in ML

4. INVERSE UNDOES TRANSFORMATION
   - Only exists if matrix is full rank
   - A⁻¹A = AA⁻¹ = I

5. ORTHOGONAL = RIGID MOTION
   - Preserves lengths and angles
   - Q^T = Q⁻¹

6. ML APPLICATIONS
   - Neural layers: y = Wx + b
   - Attention: softmax(QK^T/√d)V
   - PCA: eigenvectors of covariance
""")