In [54]:
import torch
from torch import tensor, nn

In [2]:
a = tensor([10., 6, -4])

In [3]:
a.unsqueeze(-1)

tensor([[10.],
        [ 6.],
        [-4.]])

In [4]:
a[:,None]

tensor([[10.],
        [ 6.],
        [-4.]])

## Matrix multiplication

### Loop

In [27]:
def matmul(a,b):
    a_r, a_c = a.shape
    b_r, b_c = b.shape
    #print(a_r,a_c,b_r,b_c)
    result = torch.zeros(a_r, b_c)
    for i in range(a_r):
        for j in range(b_c):
            result[i, j] = (a[i]*b[:,j]).sum()
    return result

In [28]:
w = torch.randn(28*28, 10)
x = (torch.randn(5, 28*28).uniform_() > 0.5).float()

print(w.shape, x.shape)

torch.Size([784, 10]) torch.Size([5, 784])


In [29]:
w

tensor([[-1.3211,  0.7073, -2.6653,  ..., -1.5623,  0.5117,  1.2107],
        [-0.1769,  1.7697, -0.0745,  ...,  0.4187, -0.7671,  0.6182],
        [ 1.5185,  0.2683, -1.1117,  ...,  0.8351,  0.3929, -0.6093],
        ...,
        [ 0.7381,  0.1837,  1.2768,  ..., -1.1529,  0.0362, -0.0818],
        [-0.8085,  1.2301,  0.3700,  ..., -1.5857, -0.1115, -0.2392],
        [ 1.6216, -1.0680, -1.0443,  ..., -1.3424, -1.5653, -0.6021]])

In [30]:
result = matmul(x, w)
result

tensor([[ 15.5561,  18.9614,  12.9767,  11.7611,   1.8205, -12.1763,   3.5304,
          -9.8229, -11.4052,   6.0269],
        [ 16.3255,   7.0026,   9.8062, -19.0253,  14.2650,  18.3936, -14.6898,
          -7.6931,  -9.8005,   7.0486],
        [  6.5847,  -5.7398,   6.0632,  15.3926,  22.3657,  37.6803, -17.8128,
           3.4587, -19.9257,  12.4680],
        [  9.5143,   7.9566, -15.8792, -19.5290,  -9.2390,  -3.3436, -25.2611,
           2.7339,   1.9323, -12.1647],
        [ 33.2948,   8.8638,  13.2803, -10.2013,   9.9745,  31.0709, -35.2126,
          -5.4623, -11.6704,  -1.2602]])

In [31]:
result.shape

torch.Size([5, 10])

### Broadcasting

In [32]:
x[0].shape

torch.Size([784])

In [33]:
x[0][:, None].shape

torch.Size([784, 1])

In [34]:
x[0].unsqueeze(1).shape

torch.Size([784, 1])

In [35]:
x[0].unsqueeze(-1).shape

torch.Size([784, 1])

In [36]:
def matmul(a,b):
    a_r, a_c = a.shape
    b_r, b_c = b.shape
    result = torch.zeros(a_r, b_c)
    for i in range(a_r):
        result[i] = (a[i][:, None]*b).sum(dim=0)
    return result

In [37]:
result = matmul(x, w)
result

tensor([[ 15.5561,  18.9614,  12.9767,  11.7611,   1.8205, -12.1763,   3.5304,
          -9.8229, -11.4052,   6.0269],
        [ 16.3255,   7.0027,   9.8062, -19.0253,  14.2650,  18.3936, -14.6898,
          -7.6931,  -9.8005,   7.0486],
        [  6.5847,  -5.7398,   6.0632,  15.3926,  22.3657,  37.6803, -17.8128,
           3.4588, -19.9257,  12.4680],
        [  9.5143,   7.9566, -15.8792, -19.5290,  -9.2390,  -3.3436, -25.2611,
           2.7339,   1.9323, -12.1647],
        [ 33.2948,   8.8638,  13.2803, -10.2013,   9.9745,  31.0709, -35.2126,
          -5.4623, -11.6704,  -1.2602]])

### Einsum

In [38]:
def matmul(a,b):
    return torch.einsum('ik,kj->ij', a, b)

In [39]:
result = matmul(x, w)
result

tensor([[ 15.5561,  18.9614,  12.9767,  11.7611,   1.8205, -12.1763,   3.5304,
          -9.8229, -11.4052,   6.0269],
        [ 16.3255,   7.0026,   9.8062, -19.0253,  14.2650,  18.3936, -14.6898,
          -7.6931,  -9.8005,   7.0486],
        [  6.5847,  -5.7398,   6.0632,  15.3926,  22.3657,  37.6803, -17.8128,
           3.4587, -19.9257,  12.4680],
        [  9.5143,   7.9566, -15.8792, -19.5290,  -9.2390,  -3.3436, -25.2611,
           2.7339,   1.9323, -12.1647],
        [ 33.2948,   8.8638,  13.2803, -10.2013,   9.9745,  31.0709, -35.2126,
          -5.4623, -11.6704,  -1.2602]])

### Pytorch matmul / @

In [40]:
def matmul(a,b):
    return a@b # == a.matmul(b)

In [41]:
%time result = matmul(x, w)
result

Wall time: 0 ns


tensor([[ 15.5561,  18.9614,  12.9767,  11.7611,   1.8205, -12.1763,   3.5304,
          -9.8229, -11.4052,   6.0269],
        [ 16.3255,   7.0026,   9.8062, -19.0253,  14.2650,  18.3936, -14.6898,
          -7.6931,  -9.8005,   7.0486],
        [  6.5847,  -5.7398,   6.0632,  15.3926,  22.3657,  37.6803, -17.8128,
           3.4587, -19.9257,  12.4680],
        [  9.5143,   7.9566, -15.8792, -19.5290,  -9.2390,  -3.3436, -25.2611,
           2.7339,   1.9323, -12.1647],
        [ 33.2948,   8.8638,  13.2803, -10.2013,   9.9745,  31.0709, -35.2126,
          -5.4623, -11.6704,  -1.2602]])

## Basic Neural Network

In [42]:
x.mean(), x.std()

(tensor(0.5059), tensor(0.5000))

In [43]:
def lin(x, w, b):
    return x@w + b

def relu(x):
    return x.clamp(min=0)

Rectification example:

In [49]:
x-0.5

tensor([[-0.5000,  0.5000,  0.5000,  ...,  0.5000,  0.5000, -0.5000],
        [ 0.5000, -0.5000, -0.5000,  ..., -0.5000, -0.5000,  0.5000],
        [-0.5000,  0.5000,  0.5000,  ..., -0.5000,  0.5000, -0.5000],
        [ 0.5000, -0.5000,  0.5000,  ...,  0.5000,  0.5000,  0.5000],
        [ 0.5000,  0.5000,  0.5000,  ...,  0.5000,  0.5000,  0.5000]])

In [50]:
(x-0.5).clamp(min=0) # clamp = "restringir"

tensor([[0.0000, 0.5000, 0.5000,  ..., 0.5000, 0.5000, 0.0000],
        [0.5000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.5000],
        [0.0000, 0.5000, 0.5000,  ..., 0.0000, 0.5000, 0.0000],
        [0.5000, 0.0000, 0.5000,  ..., 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000,  ..., 0.5000, 0.5000, 0.5000]])

In [58]:
def mse(output, targ): 
    return (output.squeeze(-1) - targ).pow(2).mean()

In [59]:
class Model(nn.Module):
    def __init__(self, n_in, nh, n_out):
        super().__init__()
        self.layers = [nn.Linear(n_in,nh), nn.ReLU(), nn.Linear(nh,n_out)]
        self.loss = mse
        
    def __call__(self, x, targ):
        for l in self.layers: x = l(x)
        return self.loss(x.squeeze(), targ)

In [60]:
model = Model(28*28, 50, 1)

In [62]:
# forward pass
# loss = model(x_train, y_train)

# backward pass
# loss.backward()