<a href="https://colab.research.google.com/github/nicovakr/Pytorch-init/blob/master/pytorch_init.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PyTorch for Deep Learning

In [5]:
import torch
import torchvision
import torchaudio

In [31]:
import numpy as np

# PyTorch Basics : Tensors and Gradients

## Tensor : number, vector, matrix, or any n-dimensional array.

In [7]:
#entry is a number
t1 = torch.tensor(4.)
t1, t1.dtype

(tensor(4.), torch.float32)

In [8]:
#entry is a list/vector
t2 = torch.tensor([1., 2, 3, 4])
t2

tensor([1., 2., 3., 4.])

In [9]:
#entry is a matrix
t3 = torch.tensor([[5,6], [7,8], [9,10]]) 
t3

tensor([[ 5,  6],
        [ 7,  8],
        [ 9, 10]])

In [10]:
#entry is a 3D array
t4 = torch.tensor([[[11, 12, 13],[13, 14, 15]],  
                   [[15, 16, 17],[17, 18, 19.]]])
t4

tensor([[[11., 12., 13.],
         [13., 14., 15.]],

        [[15., 16., 17.],
         [17., 18., 19.]]])

In [11]:
#t1 : nombre
#t2 : longueur vector
#t3 : shape ligne, colonne
#t4 : shape prof, ligne, colonne
t1.shape, t2.shape, t3.shape, t4.shape

(torch.Size([]), torch.Size([4]), torch.Size([3, 2]), torch.Size([2, 2, 3]))

## Tensor operations and gradients

In [27]:
#create tensors with requires_grad = True or not
x = torch.tensor(3.)
w = torch.tensor(4., requires_grad=True)
b = torch.tensor(5., requires_grad=True)
x, w, b

(tensor(3.), tensor(4., requires_grad=True), tensor(5., requires_grad=True))

In [28]:
y = w*x+b
y

tensor(17., grad_fn=<AddBackward0>)

In [29]:
#compute derivaties
y.backward() 

PyTorch permet de calculer des derivees par rapport aux tensors avec 'requires_grad' = True (called autograd).

In [30]:
#partial derivaties :
print('dy/dx:', x.grad)
print('dy/dw:', w.grad)
print('dy/db:', b.grad) # y = x*w+b -> y'/db = 1

dy/dx: None
dy/dw: tensor(3.)
dy/db: tensor(1.)


In [32]:
#create a tensor with a fixed value for every element
t6 = torch.full((3,2), 23)
t6

tensor([[23, 23],
        [23, 23],
        [23, 23]])

In [33]:
#concatenate two tensors with compatibles shapes
t7 = torch.cat((t3, t6))
t7

tensor([[ 5,  6],
        [ 7,  8],
        [ 9, 10],
        [23, 23],
        [23, 23],
        [23, 23]])

In [39]:
#compute the sin of each element
t8 = torch.sin(t7)
t8

tensor([[-0.9589, -0.2794],
        [ 0.6570,  0.9894],
        [ 0.4121, -0.5440],
        [-0.8462, -0.8462],
        [-0.8462, -0.8462],
        [-0.8462, -0.8462]])

In [40]:
#change the shape of a tensor
t9 = t8.reshape(3, 2, 2)
t9

tensor([[[-0.9589, -0.2794],
         [ 0.6570,  0.9894]],

        [[ 0.4121, -0.5440],
         [-0.8462, -0.8462]],

        [[-0.8462, -0.8462],
         [-0.8462, -0.8462]]])

# Interoperability with Numpy

In [41]:
x = np.array([[1, 2], [3, 4.]])
x

array([[1., 2.],
       [3., 4.]])

In [45]:
# Convert the numpy array to a torch tensor.
y = torch.from_numpy(x)
#y = torch.tensor(x) : this method create a copy of x
y

tensor([[1., 2.],
        [3., 4.]], dtype=torch.float64)

In [46]:
x.dtype, y.dtype

(dtype('float64'), torch.float64)

In [47]:
# Convert a torch tensor to a numpy array
z = y.numpy()
z

array([[1., 2.],
       [3., 4.]])

# Linear Regression

Tuto : create a model that predicts crop yields for apples and oranges (the targets variables) by looking the average temperature, rainfall and humidity (input variables)

Linear regression model : weights w, input variables and bias :

yield_apple  = w11 * temp + w12 * rainfall + w13 * humidity + b1

yield_orange = w21 * temp + w22 * rainfall + w23 * humidity + b2

In [48]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

In [49]:
# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

We've separated the input and target variables because we'll operate on them separately. Also, we've created numpy arrays, because this is typically how you would work with training data: read some CSV files as numpy arrays, do some processing, and then convert them to PyTorch tensors.

Let's convert the arrays to PyTorch tensors.

In [50]:
# Convert inputs and targets to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


## Linear regression model from scratch

Weights and biases can be represented as matrices, initialized as random values.

First row of w and first element of b are used to predict the first target i.e yield of apples.

In [52]:
# torch.randn create tensor with given shape from Normal distribution N(0,1)

In [51]:
# Weights and biases
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[ 0.1763,  0.2920, -0.6731],
        [-1.2771, -0.0233,  0.7905]], requires_grad=True)
tensor([-0.1811, -0.6536], requires_grad=True)


We have : Inputs(5x3) * tr'Weights(3x2) + bias(5x2)

In [57]:
# .t() to get the transposee
print(w.t())

# @ represents matrix multiplication
print(inputs @ w.t())

tensor([[ 0.1763, -1.2771],
        [ 0.2920, -0.0233],
        [-0.6731,  0.7905]], grad_fn=<TBackward0>)
tensor([[   3.4943,  -60.7961],
        [  -1.3344,  -67.6722],
        [  15.4310,  -68.3784],
        [   5.6387, -102.0153],
        [  -6.9164,  -35.0198]], grad_fn=<MmBackward0>)


In [58]:
def model(x):
  return x @ w.t() + b

In [59]:
# Generate predictions
preds = model(inputs)
print(preds)

tensor([[   3.3132,  -61.4496],
        [  -1.5155,  -68.3257],
        [  15.2499,  -69.0320],
        [   5.4575, -102.6689],
        [  -7.0975,  -35.6733]], grad_fn=<AddBackward0>)


In [60]:
# Compare with targets
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


## Loss function

Evaluate how well model is performing. We can compare model's prediction with the actual targets using the methods: 

calculate the diff between preds and targets

square all elements of the diff matrix to remove negative values

calculate the average of the elements in the resulting matrix

In [66]:
#MSE Loss
def mse(t1, t2):
  diff = t1 - t2
  return torch.sum(diff * diff) / diff.numel()
# .numel returns number of elements in a tensor

In [67]:
# Compute loss
loss = mse(preds, targets)
print(loss)

tensor(16294.2266, grad_fn=<DivBackward0>)


In [71]:
# Compute gradients
loss.backward()

In [72]:
# Gradients for weights
print(w) # w_{1,1}, w_{1,2}, ...
print(w.grad) # {1,1} corresponds to Derivate(Loss) w.r.t (w_{1,1})

tensor([[ 0.1763,  0.2920, -0.6731],
        [-1.2771, -0.0233,  0.7905]], requires_grad=True)
tensor([[ -5933.0732,  -7194.9160,  -4376.5859],
        [-13499.9863, -14326.8965,  -8840.3838]])


Adjust weights and biases to reduce the loss

Loss is a quadratic function of our weights and biases. Objective is to find the set of weights where the loss is the lowest. 

If a gradient element is positive:

increasing the weights element's value slightly will increase the loss

decreasing the weight elements value slightly will decrease the loss


If a gradient element is negative:

increasing the weights element's value slightly will decrease the loss

decreasing the weight elements value slightly will increase the loss


In [73]:
w
w.grad

tensor([[ -5933.0732,  -7194.9160,  -4376.5859],
        [-13499.9863, -14326.8965,  -8840.3838]])

In [74]:
# torch.no_grad do not track, calculate, modify gradients while updating 
#               weights and biases
with torch.no_grad():
  w -= w.grad * 1e-5
  b -= b.grad * 1e-5
# Here 1e-5 is called learning rate

In [75]:
# Let's verify that the loss is actually lower
loss = mse(preds, targets)
print(loss)

tensor(16294.2266, grad_fn=<DivBackward0>)


Before we proceed, we reset the gradients to zero by invoking the .zero_() method. We need to do this because PyTorch accumulates gradients. Otherwise, the next time we invoke .backward on the loss, the new gradient values are added to the existing gradients, which may lead to unexpected results.

In [76]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


## Train the model using gradient descent

Improve the loss -> use gradient descent optimization algorithm.

Train the model -> Generate predictions, Calculate the loss, Compute gradients wrt weights and biases, Adjust the weights by substracting a small quantity proportional to the gradient, Reset Gradients to Zero

In [77]:
# Generate predictions
preds = model(inputs)
print(preds)

tensor([[ 14.3476, -38.1926],
        [ 13.0169, -37.7736],
        [ 32.5920, -32.9599],
        [ 16.2232, -79.4658],
        [  6.9678,  -6.4147]], grad_fn=<AddBackward0>)


In [78]:
# Calculate the loss
loss = mse(preds, targets)
print(loss)

tensor(11087.8125, grad_fn=<DivBackward0>)


In [79]:
# Compute gradients
loss.backward()
print(w.grad)
print(b.grad)

tensor([[ -4792.0098,  -5963.8794,  -3617.9272],
        [-11099.6211, -11749.4912,  -7249.5464]])
tensor([ -59.5705, -130.9613])


In [80]:
# Adjust weights & reset gradients
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [81]:
print(w)
print(b)

tensor([[ 0.2836,  0.4236, -0.5931],
        [-1.0311,  0.2375,  0.9514]], requires_grad=True)
tensor([-0.1798, -0.6507], requires_grad=True)


In [82]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(7578.1748, grad_fn=<DivBackward0>)


## Train for multiple epochs

In [83]:
# Train for 100 epochs
for i in range(100):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [84]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(132.2703, grad_fn=<DivBackward0>)


In [85]:
# Predictions
preds

tensor([[ 61.7084,  69.1113],
        [ 77.8832, 102.5583],
        [121.2220, 130.5878],
        [ 46.9167,  30.9522],
        [ 79.2868, 125.8337]], grad_fn=<AddBackward0>)

In [86]:
# Targets
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])