<a href="https://colab.research.google.com/github/rastringer/code_first_ml/blob/main/matmul.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from pathlib import Path
import pickle, gzip, math, os, time, shutil, matplotlib as mpl, matplotlib.pyplot as plt

In [None]:
import torch
from torch import tensor

### Let's download the MNIST dataset using PyTorch

In [None]:
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt


training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 93514093.21it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 58946809.65it/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 25080568.04it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 22679200.91it/s]


Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [None]:
len(training_data)

60000

We have 60,000 images of the numbers 0-9.
It would be helpful to access a single tensor however PyTorch's `DataLoader` divides the data into batches for efficient training.

We can access and view different image tensors by extracting images and labels from individual batches:

In [None]:
from torch.utils.data import DataLoader


# Create a DataLoader to handle batching and shuffling
batch_size = 32
train_loader = DataLoader(training_data, batch_size=batch_size, shuffle=True)

# Access and view elements from the DataLoader
for batch in train_loader:
    images, labels = batch
    # You can now work with the batch of images and labels
    # For example, printing the shape of the batch
    print("Batch of images shape:", images.shape)
    print("Batch of labels shape:", labels.shape)
    image_tensor = images[:5]
    print("Image tensor shape:", image_tensor.shape)
    break  # Stop after processing the first batch

Batch of images shape: torch.Size([32, 1, 28, 28])
Batch of labels shape: torch.Size([32])
Image tensor shape: torch.Size([5, 1, 28, 28])


### What is shape?

The number of rows or columns in a tensor.

We also need to 'flatten' the image tensor since their current shape of [5, 1, 28, 28] (denoting 5 entries of 1 color channel and image size of 28 x 28 pixels) won't fit with our weights tensor. `

In [None]:
reshaped_images = image_tensor.view(image_tensor.size(0), -1)
reshaped = reshaped_images[:5]
reshaped.shape

torch.Size([5, 784])

In [None]:
torch.manual_seed(1)
weights = torch.randn(784, 10)
bias = torch.zeros(10)

In [None]:
a = reshaped
b = weights
a.shape, b.shape

(torch.Size([5, 784]), torch.Size([784, 10]))

In [None]:
# a rows, a columns
ar, ac = a.shape
# b rows, b columns
br, bc = b.shape

(ar, ac), (br, bc)


((5, 784), (784, 10))

In [None]:
t1 = torch.zeros(ar, bc)
t1.shape

torch.Size([5, 10])

In [None]:
t1

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [None]:
def matmul_simple(a, b):
  (ar,ac),(br,bc) = a.shape,b.shape
  t1 = torch.zeros(ar, bc)
  for i in range(ar):
    for j in range(bc):
      for k in range(ac):
        t1[i][j] += a[i][k] * b[k][j]

  return t1

In [None]:
%timeit matmul_simple(a, b)

512 ms ± 7.93 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
t1 = matmul_simple(a, b)
t1.shape

torch.Size([5, 10])

In [None]:
torch.set_printoptions(precision=2, linewidth=140, sci_mode=False)
t1

tensor([[  0.62, -14.26,   5.51, -22.86,  10.22, -15.22, -20.53,  -5.93, -22.84,   0.54],
        [  5.72,   5.56,   4.61, -12.84,   0.03,   1.42,   0.48,  -0.96, -21.63,   3.16],
        [ -5.18,   2.38, -13.61,  10.52,   6.80,  -2.36, -12.82, -11.39,  -4.25,   3.30],
        [ 12.50,  -8.58,  -8.19, -14.78,  14.92,  -8.83, -11.98,  -4.13, -27.63,   1.97],
        [  1.25, -11.63,  -3.09, -27.15,  19.21,  -0.34, -15.67, -15.94, -27.39,   5.93]])

### Python to machine code via **Numba**.
Numba means we can write Python that compiles, then runs at speed similar to C.

In [None]:
import numba as nb
from numba import njit
import numpy as np

In [None]:
a.shape

torch.Size([5, 784])

Numba doesn't work with PyTorch tensors, so we have to convert to numpy arrays

In [None]:
a_np = a.numpy()
b_np = b.numpy()


In [None]:
@nb.jit(nopython=True)
def matmul_numba(a, b):
  ar,ac = a_np.shape
  br,bc = b_np.shape
  t1 = np.zeros((ar, bc))
  for i in range(ar):
    for j in range(bc):
      dot_product = 0.0
      for k in range(ac):
        dot_product += a[i][k] * b[k][j]
      t1[i][j] = dot_product
  return t1

In [None]:
%timeit matmul_numba(a_np, b_np)

45.7 µs ± 16.8 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Broadcasting

How can we perform efficient operation on objects of different shapes?

In [None]:
a_tensor = tensor([10., 6, -4])
b_tensor = tensor([2., 8, 7])


In [None]:
a_tensor + 1

tensor([11.,  7., -3.])

In [None]:
c_tensor = tensor([10,20,30])
m_matrix = tensor([[1.,2.,3.],[4.,5.,6.],[7.,8.,9.]])

In [None]:
m_matrix

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [None]:
m_matrix + c_tensor

tensor([[11., 22., 33.],
        [14., 25., 36.],
        [17., 28., 39.]])

In [None]:
c_tensor[None,:].shape

torch.Size([1, 3])

How is this happening? Let's check using a little-known method, `expand_as`.

In [None]:
c_tensor

tensor([10, 20, 30])

In [None]:
expanded = c_tensor.expand_as(m_matrix)
expanded

tensor([[10, 20, 30],
        [10, 20, 30],
        [10, 20, 30]])

After the expansion, `expanded` now acts as if it is a 3 x 3 matrix (and is the correct shape to be multipled with m_matrix). Though it looks like it has copied itself three times, in memory it is still just three values in one row. It simply acts as if it's a 3 x 3 matrix.

In [None]:
expanded.storage()

  expanded.storage()


 10
 20
 30
[torch.storage.TypedStorage(dtype=torch.int64, device=cpu) of size 3]

### Strides

NumPy achieves this slight of hand as follows: when the `expanded` tensor uses `stride` to read through the three rows required (in this case), it is in reality going across its [10,20,30] values three times.

This is the also the technique used by deep learning frameworks such as JAX and PyTorch to avoid prohibitively expensive copies in order to multiply tensors and matrices of different shapes.

In [None]:
expanded.stride(), expanded.shape

((0, 1), torch.Size([3, 3]))

### Unsqueeze and None

We can also tweak shapes using `unsqueeze` and `None`. In this case, from a tensor to a matrix of 1 x 3 columns.

In [None]:
c_tensor

tensor([10, 20, 30])

In [None]:
c_tensor.unsqueeze(0)

tensor([[10, 20, 30]])

`None` inserts a new axis, achieving the same result.

In [None]:
# Create a matrix with one row
c_tensor[None,:]

tensor([[10, 20, 30]])

Unsqueezing into the first dimension means we have three rows of one column.

In [None]:
c_tensor.unsqueeze(1)

tensor([[10],
        [20],
        [30]])

In [None]:
# Create a matrix with one column
c_tensor[:, None]


tensor([[10],
        [20],
        [30]])

In [None]:
t1

tensor([[  0.62, -14.26,   5.51, -22.86,  10.22, -15.22, -20.53,  -5.93, -22.84,   0.54],
        [  5.72,   5.56,   4.61, -12.84,   0.03,   1.42,   0.48,  -0.96, -21.63,   3.16],
        [ -5.18,   2.38, -13.61,  10.52,   6.80,  -2.36, -12.82, -11.39,  -4.25,   3.30],
        [ 12.50,  -8.58,  -8.19, -14.78,  14.92,  -8.83, -11.98,  -4.13, -27.63,   1.97],
        [  1.25, -11.63,  -3.09, -27.15,  19.21,  -0.34, -15.67, -15.94, -27.39,   5.93]])

2 * a matrix will be broadcast across all rows and columns

In [None]:
2 * t1

tensor([[  1.24, -28.51,  11.02, -45.73,  20.44, -30.43, -41.06, -11.86, -45.69,   1.08],
        [ 11.45,  11.12,   9.22, -25.68,   0.06,   2.84,   0.97,  -1.93, -43.27,   6.31],
        [-10.36,   4.77, -27.21,  21.05,  13.61,  -4.72, -25.64, -22.79,  -8.49,   6.60],
        [ 25.00, -17.15, -16.39, -29.55,  29.84, -17.65, -23.96,  -8.26, -55.25,   3.94],
        [  2.49, -23.26,  -6.18, -54.30,  38.42,  -0.67, -31.34, -31.88, -54.77,  11.86]])

In [None]:
def matmul_broadcast(a,b):
    (ar,ac),(br,bc) = a.shape,b.shape
    c = torch.zeros(ar, bc)
    for i in range(ar):
#       c[i,j] = (a[i,:] * b[:,j]).sum()      # previous version
        c[i]   = (a[i,:,None] * b).sum(dim=0) # broadcast version
    return c

In [None]:
%timeit matmul_broadcast(a,b)

128 µs ± 23.5 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


### Einstein summation

_Einsum_ is a compact representation that combines sums and their products.

* Repeating charactes between input arrays mean values along those axis are multiplied together.

* Values along the axis of an omitted letter will be summed.

In [None]:
a.shape, b.shape

(torch.Size([5, 784]), torch.Size([784, 10]))

In [None]:
a

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [None]:
b

tensor([[-1.53, -0.75, -0.65,  ..., -1.61, -0.71,  0.30],
        [-0.78, -0.25, -0.22,  ..., -1.16,  0.70,  0.20],
        [ 0.87,  0.24, -0.66,  ..., -1.45,  0.06, -0.62],
        ...,
        [ 0.51,  0.47, -0.26,  ...,  0.65,  0.43, -1.29],
        [ 0.52,  1.03,  0.81,  ..., -0.10,  2.26, -0.28],
        [-1.49,  0.39, -0.55,  ..., -0.19, -0.51,  0.54]])