In [1]:
import torch
from torch import tensor, nn

In [2]:
a = tensor([10., 6, -4])

In [3]:
a.unsqueeze(-1)

tensor([[10.],
        [ 6.],
        [-4.]])

In [4]:
a[:,None]

tensor([[10.],
        [ 6.],
        [-4.]])

## Matrix multiplication

### Loop

In [5]:
def matmul(a,b):
    a_r, a_c = a.shape
    b_r, b_c = b.shape
    #print(a_r,a_c,b_r,b_c)
    result = torch.zeros(a_r, b_c)
    for i in range(a_r):
        for j in range(b_c):
            result[i, j] = (a[i]*b[:,j]).sum()
    return result

In [6]:
w = torch.randn(28*28, 10)
x = (torch.randn(5, 28*28).uniform_() > 0.5).float()

print(w.shape, x.shape)

torch.Size([784, 10]) torch.Size([5, 784])


In [7]:
w

tensor([[ 0.6585,  0.8505, -1.5729,  ..., -0.7811,  0.3777, -0.6824],
        [-0.8713,  1.5161, -1.9336,  ..., -0.0216, -0.3616,  0.4600],
        [ 0.4457,  0.4802, -0.4382,  ...,  0.4809,  1.7293,  2.1749],
        ...,
        [ 1.1256, -0.8450,  0.6215,  ..., -1.6206, -1.7708,  1.6707],
        [ 0.1464, -0.2688,  0.7906,  ...,  1.2148,  0.8712,  0.8713],
        [ 1.4520, -0.6913,  0.5994,  ..., -0.3363, -2.0610, -1.8160]])

In [8]:
result = matmul(x, w)
result

tensor([[ -5.1816,   1.2283,  17.8439, -26.6228,   2.4741,  21.5022, -41.9003,
         -11.8169,  11.4589, -15.1908],
        [ 14.3575,  18.1787,  15.3604,  -8.3450,   8.4390,  11.8457, -56.5944,
          -8.7871,  15.5160, -10.0521],
        [  3.9617,  10.9410,  -2.4306,  10.7817,  -8.4258,  14.4627, -42.7108,
         -22.4357,  -9.8514, -18.4852],
        [ 22.0265,  17.4174, -13.8640,  39.8922,  -6.3677, -19.9488, -33.2374,
         -35.0137, -16.8065,  13.5555],
        [ -3.2925,   2.7557,  29.7941,  16.3102,   7.7942,  27.2021, -19.6526,
         -10.0621, -17.5123,  11.8520]])

In [9]:
result.shape

torch.Size([5, 10])

### Broadcasting

In [10]:
x[0].shape

torch.Size([784])

In [11]:
x[0][:, None].shape

torch.Size([784, 1])

In [12]:
x[0].unsqueeze(1).shape

torch.Size([784, 1])

In [13]:
x[0].unsqueeze(-1).shape

torch.Size([784, 1])

In [14]:
def matmul(a,b):
    a_r, a_c = a.shape
    b_r, b_c = b.shape
    result = torch.zeros(a_r, b_c)
    for i in range(a_r):
        result[i] = (a[i][:, None]*b).sum(dim=0)
    return result

In [15]:
result = matmul(x, w)
result

tensor([[ -5.1816,   1.2283,  17.8439, -26.6228,   2.4741,  21.5022, -41.9003,
         -11.8169,  11.4589, -15.1908],
        [ 14.3575,  18.1787,  15.3604,  -8.3450,   8.4390,  11.8458, -56.5944,
          -8.7871,  15.5160, -10.0521],
        [  3.9617,  10.9410,  -2.4306,  10.7817,  -8.4258,  14.4627, -42.7108,
         -22.4357,  -9.8514, -18.4852],
        [ 22.0265,  17.4174, -13.8640,  39.8922,  -6.3677, -19.9488, -33.2374,
         -35.0137, -16.8065,  13.5555],
        [ -3.2925,   2.7557,  29.7942,  16.3102,   7.7942,  27.2020, -19.6526,
         -10.0621, -17.5123,  11.8520]])

### Einsum

In [16]:
def matmul(a,b):
    return torch.einsum('ik,kj->ij', a, b)

In [17]:
result = matmul(x, w)
result

tensor([[ -5.1816,   1.2283,  17.8439, -26.6228,   2.4741,  21.5022, -41.9003,
         -11.8169,  11.4589, -15.1908],
        [ 14.3575,  18.1787,  15.3604,  -8.3450,   8.4390,  11.8458, -56.5944,
          -8.7871,  15.5160, -10.0521],
        [  3.9617,  10.9410,  -2.4306,  10.7817,  -8.4258,  14.4627, -42.7108,
         -22.4357,  -9.8514, -18.4852],
        [ 22.0265,  17.4174, -13.8640,  39.8922,  -6.3677, -19.9488, -33.2374,
         -35.0137, -16.8065,  13.5555],
        [ -3.2925,   2.7557,  29.7942,  16.3102,   7.7942,  27.2021, -19.6526,
         -10.0621, -17.5123,  11.8520]])

### Pytorch matmul / @

In [18]:
def matmul(a,b):
    return a@b # == a.matmul(b)

In [19]:
%time result = matmul(x, w)
result

Wall time: 0 ns


tensor([[ -5.1816,   1.2283,  17.8439, -26.6228,   2.4741,  21.5022, -41.9003,
         -11.8169,  11.4589, -15.1908],
        [ 14.3575,  18.1787,  15.3604,  -8.3450,   8.4390,  11.8458, -56.5944,
          -8.7871,  15.5160, -10.0521],
        [  3.9617,  10.9410,  -2.4306,  10.7817,  -8.4258,  14.4627, -42.7108,
         -22.4357,  -9.8514, -18.4852],
        [ 22.0265,  17.4174, -13.8640,  39.8922,  -6.3677, -19.9488, -33.2374,
         -35.0137, -16.8065,  13.5555],
        [ -3.2925,   2.7557,  29.7942,  16.3102,   7.7942,  27.2021, -19.6526,
         -10.0621, -17.5123,  11.8520]])

## Basic Neural Network

In [20]:
x.mean(), x.std()

(tensor(0.4936), tensor(0.5000))

In [21]:
def lin(x, w, b):
    return x@w + b

def relu(x):
    return x.clamp(min=0)

Rectification example:

In [22]:
x-0.5

tensor([[ 0.5000,  0.5000,  0.5000,  ...,  0.5000, -0.5000, -0.5000],
        [-0.5000, -0.5000, -0.5000,  ..., -0.5000, -0.5000,  0.5000],
        [-0.5000, -0.5000, -0.5000,  ..., -0.5000,  0.5000, -0.5000],
        [ 0.5000,  0.5000, -0.5000,  ...,  0.5000,  0.5000,  0.5000],
        [ 0.5000, -0.5000, -0.5000,  ...,  0.5000,  0.5000,  0.5000]])

In [23]:
(x-0.5).clamp(min=0) # clamp = "restringir"

tensor([[0.5000, 0.5000, 0.5000,  ..., 0.5000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.5000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.5000, 0.0000],
        [0.5000, 0.5000, 0.0000,  ..., 0.5000, 0.5000, 0.5000],
        [0.5000, 0.0000, 0.0000,  ..., 0.5000, 0.5000, 0.5000]])

In [24]:
def mse(output, targ): 
    return (output.squeeze(-1) - targ).pow(2).mean()

In [25]:
class Model(nn.Module):
    def __init__(self, n_in, nh, n_out):
        super().__init__()
        self.layers = [nn.Linear(n_in,nh), nn.ReLU(), nn.Linear(nh,n_out)]
        self.loss = mse
        
    def __call__(self, x, targ):
        for l in self.layers: x = l(x)
        return self.loss(x.squeeze(), targ)

In [26]:
model = Model(28*28, 50, 1)

In [27]:
# forward pass
# loss = model(x_train, y_train)

# backward pass
# loss.backward()