# Introduction

A notebook that I use to familiarise myself with PyTorch.

# Autograd

## Scalars don't have derivatives

In [2]:
import torch

In [3]:
x = torch.ones(1, requires_grad=True)

In [4]:
x

tensor([1.], requires_grad=True)

In [5]:
print(x)

tensor([1.], requires_grad=True)


In [6]:
type(x)

torch.Tensor

In [7]:
print(x.grad)

None


x is just a scalar that stores the value 1.0.  Therefore, it has no derivative.

## A function has a derivative

In [15]:
x = torch.ones(1, requires_grad=True)
y = 2*x

In [16]:
type(x)

torch.Tensor

In [17]:
type(y)

torch.Tensor

In [18]:
x

tensor([1.], requires_grad=True)

In [19]:
y

tensor([2.], grad_fn=<MulBackward0>)

In [20]:
print(x.grad)

None


The derivative of y with respect to x is not yet computed. First we have to call:

In [21]:
y.backward()

In [22]:
print(x.grad)

tensor([2.])


This means: $\frac{dy}{dx} = 2$

## Another example for automatic gradient computation

In [157]:
x = torch.ones(1, requires_grad=True)
y = 2*x
z = y*y*y

In [158]:
type(x)

torch.Tensor

In [159]:
type(y)

torch.Tensor

In [160]:
type(z)

torch.Tensor

In [161]:
x

tensor([1.], requires_grad=True)

In [163]:
x.item()

1.0

In [111]:
y

tensor([2.], grad_fn=<MulBackward0>)

In [112]:
z

tensor([8.], grad_fn=<MulBackward0>)

In [113]:
z.backward()

In [114]:
z.grad

In [115]:
y.grad

In [116]:
x.grad

tensor([24.])

$\frac{dz}{dx} = \frac{dz}{dy} * \frac{dy}{dx} = 3y^2 * 2 = 6(2x)^2 = 6*4x^2 = 24x^2$

# Machine learning

## Prepare a Perceptron

In [123]:
class Perceptron(torch.nn.Module):
    
    def __init__(self, input_size, hidden_size, output_size):
        
        super().__init__()
        self.layer1 = torch.nn.Linear(input_size, hidden_size)
        self.layer2 = torch.nn.Linear(hidden_size, output_size)        
        
    def forward(self, input_vec):
        
        out1 = self.layer1(input_vec)
        out2 = self.layer2(out1)
        return out2        

In [124]:
p = Perceptron(2,5,1)

In [125]:
p

Perceptron(
  (layer1): Linear(in_features=2, out_features=5, bias=True)
  (layer2): Linear(in_features=5, out_features=1, bias=True)
)

In [126]:
p.layer1.weight

Parameter containing:
tensor([[ 0.2795, -0.5875],
        [-0.3618,  0.3865],
        [-0.6314, -0.5359],
        [ 0.2324, -0.5080],
        [-0.3094,  0.6219]], requires_grad=True)

In [127]:
p.layer1.bias

Parameter containing:
tensor([ 0.2663, -0.0981,  0.2298,  0.0100,  0.5158], requires_grad=True)

In [128]:
p.layer2.weight

Parameter containing:
tensor([[ 0.1897, -0.3041,  0.2441,  0.2419, -0.2331]], requires_grad=True)

In [129]:
p.layer2.bias

Parameter containing:
tensor([-0.4425], requires_grad=True)

## Feedforward with untrained Perceptron

In [134]:
input_vec = torch.tensor([1.0,3.0])

In [135]:
p(input_vec)

tensor([-2.1695], grad_fn=<AddBackward0>)

In [136]:
p.forward(input_vec)

tensor([-2.1695], grad_fn=<AddBackward0>)

## Train the Perceptron

In [291]:
p = Perceptron(2,5,1)
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.SGD(p.parameters(), lr=0.001)

In [292]:
type(loss_fn)

torch.nn.modules.loss.MSELoss

In [293]:
type(optimizer)

torch.optim.sgd.SGD

In [294]:
import numpy
x1 = numpy.random.rand()
x2 = numpy.random.rand()
x1,x2

(0.9995184510467505, 0.21792698263708732)

In [295]:
def get_ground_truth_output_from_input_tensor(input_vec):
    x1 = input_vec[0].item()
    x2 = input_vec[1].item()
    return torch.tensor( [x1+x2] )

In [296]:
input_vec = torch.tensor([10,20])

In [297]:
input_vec

tensor([10, 20])

In [298]:
get_ground_truth_output_from_input_tensor( input_vec )

tensor([30])

In [299]:
for train_step in range(10000):
    
    # generate a training sample
    x1 = numpy.random.rand()
    x2 = numpy.random.rand()    
    input_vec   = torch.tensor([x1,x2])
    teacher_vec = get_ground_truth_output_from_input_tensor( input_vec )
            
    # forward step
    y_pred = p(input_vec)
    
    # compute loss
    loss = loss_fn(y_pred, teacher_vec)
    
    # backward step -> computes gradients
    p.zero_grad()
    loss.backward()
    
    # optimizer step -> adapts weights
    optimizer.step()
    
    if False:
        error = abs(teacher_vec-y_pred)
        print( f"teacher={teacher_vec}, predicted={y_pred:} -> error={error}" )

## Test the Perceptron

In [300]:
list_of_errors = []
for train_step in range(1000):
    
    # generate a training sample
    x1 = numpy.random.rand()
    x2 = numpy.random.rand()    
    input_vec   = torch.tensor([x1,x2])
    teacher_vec = get_ground_truth_output_from_input_tensor( input_vec )
    
    # forward step
    y_pred = p(input_vec)
    
    # compute prediction error
    error = abs(teacher_vec-y_pred)
    list_of_errors.append( error.item() )
    
    if False:
        print( f"teacher={teacher_vec}, predicted={y_pred:} -> error={error}" )    

In [301]:
list_of_errors[:10]

[0.08586347103118896,
 0.07623982429504395,
 0.03937935829162598,
 0.006882071495056152,
 0.003127455711364746,
 0.08154228329658508,
 0.03935086727142334,
 0.09884917736053467,
 0.05762195587158203,
 0.05417889356613159]

In [302]:
mean_error = numpy.mean( list_of_errors )
mean_error

0.04854625552892685