# 2. Matrix Multiplication in PyTorch

Matrix multiplication is essential for neural networks!
PyTorch provides efficient operations for multiplying matrices and tensors.


In [1]:
import torch


## 1. Matrix Multiplication (@ operator)

Use the @ operator or torch.matmul() for matrix multiplication!
A @ B multiplies matrices A and B.


In [2]:
# Create two matrices
A = torch.tensor([[1, 2], 
                  [3, 4]], dtype=torch.float32)

B = torch.tensor([[5, 6], 
                  [7, 8]], dtype=torch.float32)

print("Matrix A:")
print(A)
print(f"Shape: {A.shape}")
print()

print("Matrix B:")
print(B)
print(f"Shape: {B.shape}")
print()

# Matrix multiplication using @
result = A @ B
print("A @ B (matrix multiplication):")
print(result)
print(f"Shape: {result.shape}")
print()

# Using torch.matmul()
result2 = torch.matmul(A, B)
print("torch.matmul(A, B):")
print(result2)
print("(Same result!)")


Matrix A:
tensor([[1., 2.],
        [3., 4.]])
Shape: torch.Size([2, 2])

Matrix B:
tensor([[5., 6.],
        [7., 8.]])
Shape: torch.Size([2, 2])

A @ B (matrix multiplication):
tensor([[19., 22.],
        [43., 50.]])
Shape: torch.Size([2, 2])

torch.matmul(A, B):
tensor([[19., 22.],
        [43., 50.]])
(Same result!)


## 2. Matrix-Vector Multiplication

Multiply a matrix by a vector to transform it!


In [3]:
# Matrix-vector multiplication
M = torch.tensor([[1, 2], 
                  [3, 4]], dtype=torch.float32)

v = torch.tensor([5, 6], dtype=torch.float32)

print("Matrix M:")
print(M)
print(f"Shape: {M.shape}")
print()

print("Vector v:")
print(v)
print(f"Shape: {v.shape}")
print()

# Matrix × Vector
result = M @ v
print("M @ v (matrix-vector multiplication):")
print(result)
print(f"Shape: {result.shape}")
print()

# How it works
print("How it works:")
print(f"Result[0] = M[0, 0]*v[0] + M[0, 1]*v[1] = {M[0, 0]}*{v[0]} + {M[0, 1]}*{v[1]} = {result[0]}")
print(f"Result[1] = M[1, 0]*v[0] + M[1, 1]*v[1] = {M[1, 0]}*{v[0]} + {M[1, 1]}*{v[1]} = {result[1]}")


Matrix M:
tensor([[1., 2.],
        [3., 4.]])
Shape: torch.Size([2, 2])

Vector v:
tensor([5., 6.])
Shape: torch.Size([2])

M @ v (matrix-vector multiplication):
tensor([17., 39.])
Shape: torch.Size([2])

How it works:
Result[0] = M[0, 0]*v[0] + M[0, 1]*v[1] = 1.0*5.0 + 2.0*6.0 = 17.0
Result[1] = M[1, 0]*v[0] + M[1, 1]*v[1] = 3.0*5.0 + 4.0*6.0 = 39.0


## 3. Batch Matrix Multiplication

Process multiple matrices at once using batch dimensions!
Very useful for neural networks!


In [4]:
# Batch matrix multiplication
# Create batch of matrices: (batch_size, rows, cols)
batch_A = torch.randn(3, 2, 4)  # 3 matrices, each 2×4
batch_B = torch.randn(3, 4, 3)  # 3 matrices, each 4×3

print("Batch A (3 matrices, each 2×4):")
print(batch_A)
print(f"Shape: {batch_A.shape}")
print()

print("Batch B (3 matrices, each 4×3):")
print(batch_B)
print(f"Shape: {batch_B.shape}")
print()

# Batch matrix multiplication
batch_result = torch.bmm(batch_A, batch_B)
print("Batch matrix multiplication result:")
print(batch_result)
print(f"Shape: {batch_result.shape}")
print("(3 matrices, each 2×3)")
print()

# Verify: first matrix multiplication
A0 = batch_A[0]
B0 = batch_B[0]
result0 = A0 @ B0

print("First matrix multiplication (manual):")
print(result0)
print()

print("First matrix multiplication (from batch):")
print(batch_result[0])
print("(Should match!)")
print(f"Are they equal? {torch.allclose(result0, batch_result[0])}")


Batch A (3 matrices, each 2×4):
tensor([[[-1.5379,  0.0291,  1.1533, -0.1324],
         [ 1.6259,  1.4004, -0.7527, -0.7088]],

        [[ 0.0799,  0.4315, -0.1863,  0.3809],
         [-0.2108, -0.5510,  1.4508,  0.9944]],

        [[ 0.1208, -0.3484,  0.1828,  0.2160],
         [-0.1547,  1.0977, -1.2316, -0.5545]]])
Shape: torch.Size([3, 2, 4])

Batch B (3 matrices, each 4×3):
tensor([[[-0.0060, -0.5900, -0.5415],
         [ 2.4403, -0.3269,  1.8787],
         [ 0.9109,  0.0259,  0.3109],
         [-0.7873, -0.4925, -1.2929]],

        [[ 0.8654,  0.2479, -1.6910],
         [ 0.5028,  0.7096, -0.2944],
         [-0.4037, -0.9022,  2.5316],
         [ 1.9622,  0.3996,  0.1633]],

        [[ 0.6043, -0.8676,  0.4673],
         [-0.7879,  0.1842,  0.0099],
         [-1.4569,  0.0050, -1.5731],
         [ 0.8508,  0.3823, -1.6350]]])
Shape: torch.Size([3, 4, 3])

Batch matrix multiplication result:
tensor([[[ 1.2352,  0.9929,  1.4173],
         [ 3.2800, -1.0876,  2.4328]],

        [[ 1

## 4. Element-wise Multiplication

Multiply elements at the same position (not matrix multiplication)!
Use * for element-wise multiplication.


In [5]:
# Element-wise multiplication (different from matrix multiplication!)
A = torch.tensor([[1, 2], 
                  [3, 4]], dtype=torch.float32)

B = torch.tensor([[5, 6], 
                  [7, 8]], dtype=torch.float32)

print("Matrix A:")
print(A)
print()

print("Matrix B:")
print(B)
print()

# Element-wise multiplication
element_wise = A * B
print("A * B (element-wise multiplication):")
print(element_wise)
print()

# Matrix multiplication (for comparison)
matrix_mult = A @ B
print("A @ B (matrix multiplication):")
print(matrix_mult)
print()

print("Notice: Element-wise (*) is different from matrix multiplication (@)!")
print(f"Element-wise: {A[0, 0]} * {B[0, 0]} = {element_wise[0, 0]}")
print(f"Matrix mult: A[0, 0]*B[0, 0] + A[0, 1]*B[1, 0] = {matrix_mult[0, 0]}")


Matrix A:
tensor([[1., 2.],
        [3., 4.]])

Matrix B:
tensor([[5., 6.],
        [7., 8.]])

A * B (element-wise multiplication):
tensor([[ 5., 12.],
        [21., 32.]])

A @ B (matrix multiplication):
tensor([[19., 22.],
        [43., 50.]])

Notice: Element-wise (*) is different from matrix multiplication (@)!
Element-wise: 1.0 * 5.0 = 5.0
Matrix mult: A[0, 0]*B[0, 0] + A[0, 1]*B[1, 0] = 19.0
