In [4]:
import numpy as np
from minitorch.tensor.tensor import Tensor



In [13]:

def test_unit_tensor_creation():
    """ðŸ§ª Test Tensor creation with various data types."""
    print("ðŸ§ª Unit Test: Tensor Creation...")

    # Test scalar creation
    scalar = Tensor(np.array((5.0), dtype=np.float32))
    assert scalar.data == 5.0
    assert scalar.shape == ()
    assert scalar.size == 1
    assert not scalar.requires_grad
    assert scalar.grad is None
    assert scalar.dtype == np.float32

    # # Test vector creation
    vector = Tensor(np.array([1, 2, 3]))
    assert np.array_equal(vector.data, np.array([1, 2, 3], dtype=np.float32))
    assert vector.shape == (3,)
    assert vector.size == 3

    # Test matrix creation
    matrix = Tensor(np.array([[1, 2], [3, 4]]))
    assert np.array_equal(matrix.data, np.array([[1, 2], [3, 4]], dtype=np.float32))
    assert matrix.shape == (2, 2)
    assert matrix.size == 4

    # Test gradient flag (dormant feature)
    grad_tensor = Tensor(np.array([[1, 2]]), requires_grad=True)
    assert grad_tensor.requires_grad
    assert grad_tensor.grad is None  # Still None until Module 05

    print("âœ… Tensor creation works correctly!")

test_unit_tensor_creation()

ðŸ§ª Unit Test: Tensor Creation...
âœ… Tensor creation works correctly!


In [15]:
def test_unit_arithmetic_operations():
    """ðŸ§ª Test arithmetic operations with broadcasting."""
    print("ðŸ§ª Unit Test: Arithmetic Operations...")

    # Test tensor + tensor
    a = Tensor(np.array([1, 2, 3]))
    b = Tensor(np.array([4, 5, 6]))
    result = a + b
    assert np.array_equal(result.data, np.array([5, 7, 9], dtype=np.float32))

    # Test tensor + scalar (very common in ML)
    result = a + 10
    assert np.array_equal(result.data, np.array([11, 12, 13], dtype=np.float32))
    # Test broadcasting with different shapes (matrix + vector)
    matrix = Tensor(np.array([[1, 2], [3, 4]]))
    vector = Tensor(np.array([10, 20]))
    result = matrix + vector
    expected = np.array([[11, 22], [13, 24]], dtype=np.float32)
    assert np.array_equal(result.data, expected)

    # Test subtraction (data centering)
    result = b - a
    print(result.data)
    assert np.array_equal(result.data, np.array([3, 3, 3], dtype=np.float32))

    # Test multiplication (scaling)
    result = a * 2
    assert np.array_equal(result.data, np.array([2, 4, 6], dtype=np.float32))

    # Test division (normalization)
    result = b / 2
    assert np.array_equal(result.data, np.array([2.0, 2.5, 3.0], dtype=np.float32))

    # Test chaining operations (common in ML pipelines)
    normalized = (a - 2) / 2  # Center and scale
    expected = np.array([-0.5, 0.0, 0.5], dtype=np.float32)
    assert np.allclose(normalized.data, expected)

    print("âœ… Arithmetic operations work correctly!")

test_unit_arithmetic_operations()

ðŸ§ª Unit Test: Arithmetic Operations...
[3 3 3]
âœ… Arithmetic operations work correctly!


In [16]:

def test_unit_matrix_multiplication():
    """ðŸ§ª Test matrix multiplication operations."""
    print("ðŸ§ª Unit Test: Matrix Multiplication...")

    # Test 2Ã—2 matrix multiplication (basic case)
    a = Tensor(np.array([[1, 2], [3, 4]]))  # 2Ã—2
    b = Tensor(np.array([[5, 6], [7, 8]]))  # 2Ã—2
    result = a.matmul(b)
    # Expected: [[1Ã—5+2Ã—7, 1Ã—6+2Ã—8], [3Ã—5+4Ã—7, 3Ã—6+4Ã—8]] = [[19, 22], [43, 50]]
    expected = np.array([[19, 22], [43, 50]], dtype=np.float32)
    assert np.array_equal(result.data, expected)

    # Test rectangular matrices (common in neural networks)
    c = Tensor(np.array([[1, 2, 3], [4, 5, 6]]))  # 2Ã—3 (like batch_size=2, features=3)
    d = Tensor(np.array([[7, 8], [9, 10], [11, 12]]))  # 3Ã—2 (like features=3, outputs=2)
    result = c.matmul(d)
    # Expected: [[1Ã—7+2Ã—9+3Ã—11, 1Ã—8+2Ã—10+3Ã—12], [4Ã—7+5Ã—9+6Ã—11, 4Ã—8+5Ã—10+6Ã—12]]
    expected = np.array([[58, 64], [139, 154]], dtype=np.float32)
    assert np.array_equal(result.data, expected)

    # Test matrix-vector multiplication (common in forward pass)
    matrix = Tensor(np.array([[1, 2, 3], [4, 5, 6]]))  # 2Ã—3
    vector = Tensor(np.array([[1], [2], [3]]))  # 3Ã—1 (conceptually)
    result = matrix.matmul(vector)
    # Expected: [1Ã—1+2Ã—2+3Ã—3, 4Ã—1+5Ã—2+6Ã—3] = [14, 32]
    expected = np.array([[14], [32]], dtype=np.float32)
    assert np.array_equal(result.data, expected)

    # Test shape validation - should raise clear error
    try:
        incompatible_a = Tensor(np.array([[1, 2]]))     # 1Ã—2
        incompatible_b = Tensor(np.array([[1], [2], [3]]))  # 3Ã—1
        incompatible_a.matmul(incompatible_b)  # 1Ã—2 @ 3Ã—1 should fail (2 â‰  3)
        assert False, "Should have raised ValueError for incompatible shapes"
    except ValueError as e:
        assert "Inner dimensions must match" in str(e)
        assert "2 â‰  3" in str(e)  # Should show specific dimensions

    print("âœ… Matrix multiplication works correctly!")
    
test_unit_matrix_multiplication()


ðŸ§ª Unit Test: Matrix Multiplication...
âœ… Matrix multiplication works correctly!


In [17]:
def test_unit_shape_manipulation():
    """ðŸ§ª Test reshape and transpose operations."""
    print("ðŸ§ª Unit Test: Shape Manipulation...")

    # Test basic reshape (flatten â†’ matrix)
    tensor = Tensor(np.array([[1, 2, 3, 4, 5, 6]]))  # Shape: (6,)
    reshaped = tensor.reshape(2, 3)      # Shape: (2, 3)
    assert reshaped.shape == (2, 3)
    expected = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
    assert np.array_equal(reshaped.data, expected)

    # Test reshape with tuple (alternative calling style)
    reshaped2 = tensor.reshape((3, 2))   # Shape: (3, 2)
    assert reshaped2.shape == (3, 2)
    expected2 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32)
    assert np.array_equal(reshaped2.data, expected2)

    # Test reshape with -1 (automatic dimension inference)
    auto_reshaped = tensor.reshape(2, -1)  # Should infer -1 as 3
    assert auto_reshaped.shape == (2, 3)

    # Test reshape validation - should raise error for incompatible sizes
    try:
        tensor.reshape(2, 2)  # 6 elements can't fit in 2Ã—2=4
        assert False, "Should have raised ValueError"
    except ValueError as e:
        assert "Total elements must match" in str(e)
        assert "6 â‰  4" in str(e)

    # Test matrix transpose (most common case)
    matrix = Tensor(np.array([[1, 2, 3], [4, 5, 6]]))  # (2, 3)
    transposed = matrix.transpose()          # (3, 2)
    assert transposed.shape == (3, 2)
    expected = np.array([[1, 4], [2, 5], [3, 6]], dtype=np.float32)
    assert np.array_equal(transposed.data, expected)

    # Test 1D transpose (should be identity)
    vector = Tensor(np.array([1, 2, 3]))  # Shape: (3,)
    vector_t = vector.transpose()
    assert np.array_equal(vector.data, vector_t.data)

    # Test specific dimension transpose
    tensor_3d = Tensor(np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]))  # (2, 2, 2)
    swapped = tensor_3d.transpose(0, 2)  # Swap first and last dimensions
    assert swapped.shape == (2, 2, 2)  # Same shape but data rearranged

    # Test neural network reshape pattern (flatten for MLP)
    batch_images = Tensor(np.random.rand(2, 3, 4))  # (batch=2, height=3, width=4)
    flattened = batch_images.reshape(2, -1)  # (batch=2, features=12)
    assert flattened.shape == (2, 12)

    print("âœ… Shape manipulation works correctly!")

test_unit_shape_manipulation()

ðŸ§ª Unit Test: Shape Manipulation...
âœ… Shape manipulation works correctly!


In [18]:
def test_unit_reduction_operations():
    """ðŸ§ª Test reduction operations."""
    print("ðŸ§ª Unit Test: Reduction Operations...")

    matrix = Tensor(np.array([[1, 2, 3], [4, 5, 6]]))  # Shape: (2, 3)

    # Test sum all elements (common for loss computation)
    total = matrix.sum()
    assert total.data == 21.0  # 1+2+3+4+5+6
    assert total.shape == ()   # Scalar result

    # Test sum along axis 0 (columns) - batch dimension reduction
    col_sum = matrix.sum(axis=0)
    expected_col = np.array([5, 7, 9], dtype=np.float32)  # [1+4, 2+5, 3+6]
    assert np.array_equal(col_sum.data, expected_col)
    assert col_sum.shape == (3,)

    # Test sum along axis 1 (rows) - feature dimension reduction
    row_sum = matrix.sum(axis=1)
    expected_row = np.array([6, 15], dtype=np.float32)  # [1+2+3, 4+5+6]
    assert np.array_equal(row_sum.data, expected_row)
    assert row_sum.shape == (2,)

    # Test mean (average loss computation)
    avg = matrix.mean()
    assert np.isclose(avg.data, 3.5)  # 21/6
    assert avg.shape == ()

    # Test mean along axis (batch normalization pattern)
    col_mean = matrix.mean(axis=0)
    expected_mean = np.array([2.5, 3.5, 4.5], dtype=np.float32)  # [5/2, 7/2, 9/2]
    assert np.allclose(col_mean.data, expected_mean)

    # Test max (finding best predictions)
    maximum = matrix.max()
    assert maximum.data == 6.0
    assert maximum.shape == ()

    # Test max along axis (argmax-like operation)
    row_max = matrix.max(axis=1)
    expected_max = np.array([3, 6], dtype=np.float32)  # [max(1,2,3), max(4,5,6)]
    assert np.array_equal(row_max.data, expected_max)

    # Test keepdims (important for broadcasting)
    sum_keepdims = matrix.sum(axis=1, keepdims=True)
    assert sum_keepdims.shape == (2, 1)  # Maintains 2D shape
    expected_keepdims = np.array([[6], [15]], dtype=np.float32)
    assert np.array_equal(sum_keepdims.data, expected_keepdims)

    # Test 3D reduction (simulating global average pooling)
    tensor_3d = Tensor(np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]))  # (2, 2, 2)
    spatial_mean = tensor_3d.mean(axis=(1, 2))  # Average across spatial dimensions
    assert spatial_mean.shape == (2,)  # One value per batch item

    print("âœ… Reduction operations work correctly!")

test_unit_reduction_operations()


ðŸ§ª Unit Test: Reduction Operations...
âœ… Reduction operations work correctly!


In [19]:
BYTES_PER_FLOAT32 = 4
MB_TO_BYTES = 1024

def analyze_memory_layout():
    """ðŸ“Š Demonstrate cache effects with row vs column access patterns."""
    print("ðŸ“Š Analyzing Memory Access Patterns...")
    print("=" * 60)

    # Create a moderately-sized matrix (large enough to show cache effects)
    size = 2000
    matrix = Tensor(np.random.rand(size, size))

    import time

    print(f"\nTesting with {size}Ã—{size} matrix ({matrix.size * BYTES_PER_FLOAT32 / MB_TO_BYTES:.1f} MB)")
    print("-" * 60)

    # Test 1: Row-wise access (cache-friendly)
    # Memory layout: [row0][row1][row2]... stored contiguously
    print("\nðŸ”¬ Test 1: Row-wise Access (Cache-Friendly)")
    start = time.time()
    row_sums = []
    for i in range(size):
        row_sum = matrix.data[i, :].sum()  # Access entire row sequentially
        row_sums.append(row_sum)
    row_time = time.time() - start
    print(f"   Time: {row_time*1000:.1f}ms")
    print("   Access pattern: Sequential (follows memory layout)")

    # Test 2: Column-wise access (cache-unfriendly)
    # Must jump between rows, poor spatial locality
    print("\nðŸ”¬ Test 2: Column-wise Access (Cache-Unfriendly)")
    start = time.time()
    col_sums = []
    for j in range(size):
        col_sum = matrix.data[:, j].sum()  # Access entire column with large strides
        col_sums.append(col_sum)
    col_time = time.time() - start
    print(f"   Time: {col_time*1000:.1f}ms")
    print(f"   Access pattern: Strided (jumps {size * BYTES_PER_FLOAT32} bytes per element)")

    # Calculate slowdown
    slowdown = col_time / row_time
    print("\n" + "=" * 60)
    print("ðŸ“Š PERFORMANCE IMPACT:")
    print(f"   Slowdown factor: {slowdown:.2f}Ã— ({col_time/row_time:.1f}Ã— slower)")
    print(f"   Cache misses cause {(slowdown-1)*100:.0f}% performance loss")

    # Educational insights
    print("\nðŸ’¡ KEY INSIGHTS:")
    print("   1. Memory layout matters: Row-major (C-style) storage is sequential")
    print("   2. Cache lines are ~64 bytes: Row access loads nearby elements \"for free\"")
    print("   3. Column access misses cache: Must reload from DRAM every time")
    print(f"   4. This is O(n) algorithm but {slowdown:.1f}Ã— different wall-clock time!")

    print("\nðŸš€ REAL-WORLD IMPLICATIONS:")
    print("   â€¢ CNNs use NCHW format (channels sequential) for cache efficiency")
    print("   â€¢ Matrix multiplication optimized with blocking (tile into cache-sized chunks)")
    print(f"   â€¢ Transpose is expensive ({slowdown:.1f}Ã—) because it changes memory layout")
    print("   â€¢ This is why GPU frameworks obsess over memory coalescing")

    print("\n" + "=" * 60)
    
analyze_memory_layout()

ðŸ“Š Analyzing Memory Access Patterns...

Testing with 2000Ã—2000 matrix (15625.0 MB)
------------------------------------------------------------

ðŸ”¬ Test 1: Row-wise Access (Cache-Friendly)
   Time: 30.9ms
   Access pattern: Sequential (follows memory layout)

ðŸ”¬ Test 2: Column-wise Access (Cache-Unfriendly)
   Time: 65.8ms
   Access pattern: Strided (jumps 8000 bytes per element)

ðŸ“Š PERFORMANCE IMPACT:
   Slowdown factor: 2.13Ã— (2.1Ã— slower)
   Cache misses cause 113% performance loss

ðŸ’¡ KEY INSIGHTS:
   1. Memory layout matters: Row-major (C-style) storage is sequential
   2. Cache lines are ~64 bytes: Row access loads nearby elements "for free"
   3. Column access misses cache: Must reload from DRAM every time
   4. This is O(n) algorithm but 2.1Ã— different wall-clock time!

ðŸš€ REAL-WORLD IMPLICATIONS:
   â€¢ CNNs use NCHW format (channels sequential) for cache efficiency
   â€¢ Matrix multiplication optimized with blocking (tile into cache-sized chunks)
   â€¢ Tran

In [22]:
def test_module():
    """ðŸ§ª Module Test: Complete Integration

    Comprehensive test of entire module functionality.

    This final test runs before module summary to ensure:
    - All unit tests pass
    - Functions work together correctly
    - Module is ready for integration with TinyTorch
    """
    print("ðŸ§ª RUNNING MODULE INTEGRATION TEST")
    print("=" * 50)

    # Run all unit tests
    print("Running unit tests...")
    test_unit_tensor_creation()
    test_unit_arithmetic_operations()
    test_unit_matrix_multiplication()
    test_unit_shape_manipulation()
    test_unit_reduction_operations()

    print("\nRunning integration scenarios...")

    # Test realistic neural network computation
    print("ðŸ§ª Integration Test: Two-Layer Neural Network...")

    # Create input data (2 samples, 3 features)
    x = Tensor(np.array([[1, 2, 3], [4, 5, 6]]))

    # First layer: 3 inputs â†’ 4 hidden units
    W1 = Tensor(np.array([[0.1, 0.2, 0.3, 0.4],
                [0.5, 0.6, 0.7, 0.8],
                [0.9, 1.0, 1.1, 1.2]]))
    b1 = Tensor(np.array([0.1, 0.2, 0.3, 0.4]))

    # Forward pass: hidden = xW1 + b1
    hidden = x.matmul(W1) + b1
    assert hidden.shape == (2, 4), f"Expected (2, 4), got {hidden.shape}"

    # Second layer: 4 hidden â†’ 2 outputs
    W2 = Tensor(np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6], [0.7, 0.8]]))
    b2 = Tensor(np.array([0.1, 0.2]))

    # Output layer: output = hiddenW2 + b2
    output = hidden.matmul(W2) + b2
    assert output.shape == (2, 2), f"Expected (2, 2), got {output.shape}"

    # Verify data flows correctly (no NaN, reasonable values)
    assert not np.isnan(output.data).any(), "Output contains NaN values"
    assert np.isfinite(output.data).all(), "Output contains infinite values"

    print("âœ… Two-layer neural network computation works!")

    # Test gradient attributes are preserved and functional
    print("ðŸ§ª Integration Test: Gradient System Readiness...")
    grad_tensor = Tensor(np.array([1, 2, 3]), requires_grad=True)
    result = grad_tensor + 5
    assert grad_tensor.requires_grad, "requires_grad not preserved"
    assert grad_tensor.grad is None, "grad should still be None"

    # Test backward() doesn't crash (even though it does nothing)
    grad_tensor.backward()  # Should not raise any exception

    print("âœ… Gradient system ready for Module 05!")

    # Test complex shape manipulations
    print("ðŸ§ª Integration Test: Complex Shape Operations...")
    data = Tensor(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]))

    # Reshape to 3D tensor (simulating batch processing)
    tensor_3d = data.reshape(2, 2, 3)  # (batch=2, height=2, width=3)
    print(tensor_3d)
    assert tensor_3d.shape == (2, 2, 3)

    # Global average pooling simulation
    pooled = tensor_3d.mean(axis=(1, 2))  # Average across spatial dimensions
    print(pooled)
    assert pooled.shape == (2,), f"Expected (2,), got {pooled.shape}"

    # Flatten for MLP
    flattened = tensor_3d.reshape(2, -1)  # (batch, features)
    print(flattened)
    assert flattened.shape == (2, 6)

    # Transpose for different operations
    transposed = tensor_3d.transpose()  # Should transpose last two dims
    print(transposed)
    assert transposed.shape == (2, 3, 2)

    print("âœ… Complex shape operations work!")

    # Test broadcasting edge cases
    print("ðŸ§ª Integration Test: Broadcasting Edge Cases...")

    # Scalar broadcasting
    scalar = Tensor(np.array([5.0]))
    vector = Tensor(np.array([1, 2, 3]))
    result = scalar + vector  # Should broadcast scalar to vector shape
    expected = np.array([6, 7, 8], dtype=np.float32)
    assert np.array_equal(result.data, expected)

    # Matrix + vector broadcasting
    matrix = Tensor(np.array([[1, 2], [3, 4]]))
    vec = Tensor(np.array([10, 20]))
    result = matrix + vec
    expected = np.array([[11, 22], [13, 24]], dtype=np.float32)
    assert np.array_equal(result.data, expected)

    print("âœ… Broadcasting edge cases work!")

    print("\n" + "=" * 50)
    print("ðŸŽ‰ ALL TESTS PASSED! Module ready for export.")
    print("Run: tito module complete 01_tensor")
    
test_module()

ðŸ§ª RUNNING MODULE INTEGRATION TEST
Running unit tests...
ðŸ§ª Unit Test: Tensor Creation...
âœ… Tensor creation works correctly!
ðŸ§ª Unit Test: Arithmetic Operations...
[3 3 3]
âœ… Arithmetic operations work correctly!
ðŸ§ª Unit Test: Matrix Multiplication...
âœ… Matrix multiplication works correctly!
ðŸ§ª Unit Test: Shape Manipulation...
âœ… Shape manipulation works correctly!
ðŸ§ª Unit Test: Reduction Operations...
âœ… Reduction operations work correctly!

Running integration scenarios...
ðŸ§ª Integration Test: Two-Layer Neural Network...
âœ… Two-layer neural network computation works!
ðŸ§ª Integration Test: Gradient System Readiness...
âœ… Gradient system ready for Module 05!
ðŸ§ª Integration Test: Complex Shape Operations...
Tensor(data=[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]])
Tensor(data=[3.5 9.5])
Tensor(data=[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]])
Tensor(data=[[[ 1  4]
  [ 2  5]
  [ 3  6]]

 [[ 7 10]
  [ 8 11]
  [ 9 12]]])
âœ… Complex shape operations work