# Following tutorial video as per info below

Youtube video by "freeCodeCamp.Org" : PyTorch for Deep Learning - Full Course / Tutorial
30 April 2020
https://www.youtube.com/watch?v=GIsg-ZUy0MY

# PyTorch basics - tensors and gradients

In [1]:
import torch
import numpy as np

## Defining different types of tensors - shape, dtype

All elements in a tensor will have same data type.
Tensor must have a regular shape, unlike lists.
Tensor are suitable data type to execute on GPU. Numpy arrays execute on CPU.

E.g. List can be = [[1, 2], [1, 2, 3]]. But this cannot be done for a tensor

Dtypes expalined here: https://pytorch.org/docs/stable/tensors.html

In [3]:
# number
t1 = torch.tensor(4.)
print(t1)
print(t1.shape)
print(t1.dtype)
print("--------")
# vector
t2 = torch.tensor([1., 2, 3, 4])
print(t2)
print(t2.shape)
print(t2.dtype)
print("--------")
# 2d array
t3 = torch.tensor([[1., 2, 3, 4], [11., 22, 33, 44]])
print(t3)
print(t3.shape)
print(t3.dtype)
# 2d array - changing the default dtype from float32 to float64
t3_1 = torch.tensor([[1., 2, 3, 4], [11., 22, 33, 44]], dtype=torch.float64)
print(t3_1)
print(t3_1.shape)
print(t3_1.dtype)
print("--------")
# 3d array
t4 = torch.tensor([
    [[1., 2, 3, 4], [11, 22, 33, 44], [111, 222, 333, 444]],
    [[5., 6, 7, 8], [55, 66, 77, 77], [555, 666, 777, 888]]
])
print(t4)
print(t4.shape)
print(t4.dtype)

tensor(4.)
torch.Size([])
torch.float32
--------
tensor([1., 2., 3., 4.])
torch.Size([4])
torch.float32
--------
tensor([[ 1.,  2.,  3.,  4.],
        [11., 22., 33., 44.]])
torch.Size([2, 4])
torch.float32
tensor([[ 1.,  2.,  3.,  4.],
        [11., 22., 33., 44.]], dtype=torch.float64)
torch.Size([2, 4])
torch.float64
--------
tensor([[[  1.,   2.,   3.,   4.],
         [ 11.,  22.,  33.,  44.],
         [111., 222., 333., 444.]],

        [[  5.,   6.,   7.,   8.],
         [ 55.,  66.,  77.,  77.],
         [555., 666., 777., 888.]]])
torch.Size([2, 3, 4])
torch.float32


# Tensor operations and gradients

In [4]:
## create three tensors, but two of them requires_grad

In [5]:
x = torch.tensor(3.)
w = torch.tensor(4., requires_grad=True)
b = torch.tensor(5., requires_grad=True)

In [6]:
y = w * x + b
print(y)

tensor(17., grad_fn=<AddBackward0>)


In [7]:
## now y is a tensor as expected. But, we can automatically compute the derivate
## (aka gradient) of y wrt. the tensors that are defined with requires_grad=True.

## But note there is a computation cost, so be careful defining with requires_grad=True

## To compute these gradients, we simply call y.backward
## The gradients are accessed using the .grad method on the tensors.

y.backward()

## See the gradients
## Note the grad for x is None, as it is defined with requires_grad=False (default)

print(f"dy/dx = {x.grad}")
print(f"dy/dw = {w.grad}")
print(f"dy/db = {b.grad}")

dy/dx = None
dy/dw = 3.0
dy/db = 1.0


In [8]:
# 1. What if one or more of x, w or b were matrices and not just numbers?
#    What would result y and the gradiets w.grad and b.grad look like?
# 2. What if y was a matrix created using torch.tensor, with each element of the
#    matrix expressed as a combination of numberic tensors x, w and b?
# 3. What if we had a chain of operations of just one i.e.
#    y = x * w + b
#    z = 1 * y + m
#    w = c * z + d
#    What would calling w.grad do?

In [9]:
## 1st part - only x is a matrix
x = torch.tensor(np.arange(6, dtype=np.float32).reshape(2,3))
w = torch.tensor(4., requires_grad=True)
b = torch.tensor(5., requires_grad=True)
print(x)
print(w)
print(b)
print(f"------")
y = w * x + b
print(y)
print(type(y))
print(f"------")
y.backward()
print(x.grad)
print(w.grad)
print(b.grad)

tensor([[0., 1., 2.],
        [3., 4., 5.]])
tensor(4., requires_grad=True)
tensor(5., requires_grad=True)
------
tensor([[ 5.,  9., 13.],
        [17., 21., 25.]], grad_fn=<AddBackward0>)
<class 'torch.Tensor'>
------


RuntimeError: grad can be implicitly created only for scalar outputs

In [10]:
## 1st part - x, w and b are matrices of same size and shape
x = torch.tensor(np.arange(6, dtype=np.float32).reshape(2,3))
#w = torch.tensor(np.array([4.] * 6).reshape(2,3), requires_grad=True)
w = torch.tensor(np.full((2,3), 4.), requires_grad=True)
b = torch.tensor(np.full((2,3), 5.), requires_grad=True)
print(x)
print(w)
print(b)
print(f"------")
y = w * x + b
print(y)
print(type(y))
print(f"------")
y.backward()
print(x.grad)
print(w.grad)
print(b.grad)

tensor([[0., 1., 2.],
        [3., 4., 5.]])
tensor([[4., 4., 4.],
        [4., 4., 4.]], dtype=torch.float64, requires_grad=True)
tensor([[5., 5., 5.],
        [5., 5., 5.]], dtype=torch.float64, requires_grad=True)
------
tensor([[ 5.,  9., 13.],
        [17., 21., 25.]], dtype=torch.float64, grad_fn=<AddBackward0>)
<class 'torch.Tensor'>
------


RuntimeError: grad can be implicitly created only for scalar outputs

In [11]:
## 2nd part - y created using torch.tensor
x = torch.tensor(np.arange(6, dtype=np.float32).reshape(2,3))
w = torch.tensor(np.full((2,3), 4., dtype=np.float32), requires_grad=True)
b = torch.tensor(np.full((2,3), 5., dtype=np.float32), requires_grad=True)
print(x)
print(w)
print(b)
print(x.shape, w.shape, b.shape)
print(f"------")
y = torch.tensor(w * x + b)
print(y)
print(type(y))
print(f"------")
y.backward()
print(x.grad)
print(w.grad)
print(b.grad)

tensor([[0., 1., 2.],
        [3., 4., 5.]])
tensor([[4., 4., 4.],
        [4., 4., 4.]], requires_grad=True)
tensor([[5., 5., 5.],
        [5., 5., 5.]], requires_grad=True)
torch.Size([2, 3]) torch.Size([2, 3]) torch.Size([2, 3])
------
tensor([[ 5.,  9., 13.],
        [17., 21., 25.]])
<class 'torch.Tensor'>
------


  y = torch.tensor(w * x + b)


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [12]:
## 3rd part
x = torch.tensor(3.)
w = torch.tensor(4., requires_grad=True)
b = torch.tensor(5., requires_grad=True)
m = torch.tensor(6., requires_grad=True)
c = torch.tensor(7., requires_grad=True)
d = torch.tensor(8., requires_grad=True)
print(f"------")
y = w * x + b
z = 1 * y + m
w = c * z + d
print(y)
print(type(y))
print(f"------")
y.backward()
tensorlist = [x, w, b, m, c, d]
for varname, t in zip([v for v in 'xwbmcd'], tensorlist):
    print(f"{varname}.grad = {t.grad}")

------
tensor(17., grad_fn=<AddBackward0>)
<class 'torch.Tensor'>
------
x.grad = None
w.grad = None
b.grad = 1.0
m.grad = None
c.grad = None
d.grad = None


  print(f"{varname}.grad = {t.grad}")


# Interoperability with numpy

In [13]:
#x = np.array([[1.,2],[3,4]])
x = np.arange(8, dtype=np.float64).reshape(2,4)
print(x)
print(x.shape)
print(x.dtype)

[[0. 1. 2. 3.]
 [4. 5. 6. 7.]]
(2, 4)
float64


In [14]:
## convert np array to tensor using .from_numpy

y_4mNp = torch.from_numpy(x)
print(y_4mNp)
print(y_4mNp.shape)
print(y_4mNp.dtype)
print(id(x), id(y_4mNp))

tensor([[0., 1., 2., 3.],
        [4., 5., 6., 7.]], dtype=torch.float64)
torch.Size([2, 4])
torch.float64
140279006117488 140279006156160


In [15]:
## convert np array to tensor using torch.tensor

y_t = torch.tensor(x)
print(y_t)
print(y_t.shape)
print(y_t.dtype)
print(id(x), id(y_t))

tensor([[0., 1., 2., 3.],
        [4., 5., 6., 7.]], dtype=torch.float64)
torch.Size([2, 4])
torch.float64
140279006117488 140279006173824


In [16]:
## Note the differene between torch.tensor vs torch.from_numpy
## From_numpy uses the same underlying memory that the np variable uses.
##     So changing either the np or the .from_numpy variables impact each other but NOT
##        the tensor variable.
##     But changes to y_t affect only itself and not the numpy or the from_numpy variables.

In [17]:
print(f"x={x}\ny_4mNp={y_4mNp}\ny_t={y_t}")  ## all have same values right now

x=[[0. 1. 2. 3.]
 [4. 5. 6. 7.]]
y_4mNp=tensor([[0., 1., 2., 3.],
        [4., 5., 6., 7.]], dtype=torch.float64)
y_t=tensor([[0., 1., 2., 3.],
        [4., 5., 6., 7.]], dtype=torch.float64)


In [18]:
x[0,1] = 111       ## changed the numpy variable itself directly
y_4mNp[1,:] = 500  ## changed the .from_numpy variable
y_t[0,:] = 999     ## changed the tensor variable
print(f"x={x}\ny_4mNp={y_4mNp}\ny_t={y_t}")

x=[[  0. 111.   2.   3.]
 [500. 500. 500. 500.]]
y_4mNp=tensor([[  0., 111.,   2.,   3.],
        [500., 500., 500., 500.]], dtype=torch.float64)
y_t=tensor([[999., 999., 999., 999.],
        [  4.,   5.,   6.,   7.]], dtype=torch.float64)


# Linear Regression with PyTorch

Code without built in methods of pytorch.
Explicitly updating the weights.

In [19]:
We are trying to make such a model

yield_apple  = w11 * temp + w12 * rainfall + w13 * humidity + b1
yield_orange = w21 * temp + w22 * rainfall + w23 * humidity + b2

Essentially this would be a matrix multiplication of the form

Y = X * transpose(W) + B

Y is target values.
X is the input values.
W is the weights matrix.
B is the biases matrix.

First define the model, data and the loss functions.
So the idea is:
1. Generate predictions
2. Calculate the loss e.g. with MSE
3. Compute gradient using .backward().
4. Use the .grad values of the variables to update their values as per the LR.
5. Zero the gradients

SyntaxError: invalid syntax (<ipython-input-19-d750484eaae9>, line 1)

In [50]:
import numpy as np
import torch

In [60]:
# Numpy arrays first
# Input (temp, rain, humid)
inputs = np.array([
    [73, 67, 43],
    [91, 88, 64],
    [87, 134, 58],
    [102, 43, 37],
    [69, 96, 70]
], dtype='float32')
# Targets (apples, oranges)
targets = np.array([
    [56, 70],
    [81, 101],
    [119, 133],
    [22, 37],
    [103, 119]
], dtype='float32')
print(inputs.shape, targets.shape)

# Convert to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs.shape, targets.shape)

(5, 3) (5, 2)
torch.Size([5, 3]) torch.Size([5, 2])


In [61]:
# Weights and biases
# Initialise randomly using the torch.randn function. This outputs values samples from a
#    gaussian distribution with mean 0 and SD=1. So mostly the values will be in range
#    -1 to +1 but some will be outside this range also.
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)
print(w.shape, b.shape)

tensor([[-0.4372,  1.2163,  0.1074],
        [ 0.8596, -1.3096, -0.3948]], requires_grad=True)
tensor([-0.2865, -0.1088], requires_grad=True)
torch.Size([2, 3]) torch.Size([2])


In [62]:
# @ is for matrix multiplication  and the .t() method to transpose
def regr_model(x):
    return x @ w.t() + b

In [63]:
# Generate predictions       ##  (5,3) @ (2,3).t() + (2)
preds = regr_model(inputs)   ##  (5,3) @ (3,2) + (5,2) = (5,2)
print(preds)
print(targets)

tensor([[  53.9129,  -42.0830],
        [  73.8422,  -62.4041],
        [ 130.8981, -123.7165],
        [  11.3986,   16.6442],
        [  93.8350,  -94.1602]], grad_fn=<AddBackward0>)
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [64]:
# MSE loss
def mse(p, t):
    diff = p-t
    return torch.sum(torch.square(diff)) / diff.numel()

In [65]:
# Compute loss
## Suppose the loss = 3600 => on average the predictions are off from targets by a value of
##     square_root 3600 = 60
loss = mse(preds, targets)
print(loss)
print(torch.sqrt(loss))

tensor(15141.2012, grad_fn=<DivBackward0>)
tensor(123.0496, grad_fn=<SqrtBackward>)


In [66]:
# Compute gradients
# .backward() method automatically does the gradient calculations and stores the values
#     for each of the variables that have requires_grad=True.
#     These gradients can be accessed using the .grad() method on those variables.
loss.backward()

# See the weights grads
print(w.grad)
print(b.grad)

tensor([[  -296.4617,   -102.2158,   -178.3113],
        [-12434.1016, -15525.5625,  -9168.2734]])
tensor([  -3.4226, -153.1439])


In [67]:
# Reseting the grads to 0 by calling the .zero_() method.
# This is done as pytorch accumulates the gradients automatically and thus each time
# .backward() is invoked on the loss, the new gradient values will be added to the old
# existing values - which is not what we want to do.


# Keep repeating above steps as per number of desired epochs.
#w.grad.zero_()
#b.grad.zero_()
#print(w.grad)
#print(b.grad)

In [None]:
# torch.no_grad tells pytorch that we should not modify the gradients while updating the
# weights and biases.

In [68]:
print(f"Values BEFORE updating the weights and biases:")
print(w)
print(b)
# Adjust the weights and biases while resetting the gradients
LR = 1e-5
with torch.no_grad():
    w -= w.grad * LR
    b -= b.grad * LR
    w.grad.zero_()
    b.grad.zero_()
print(f"\nValues AFTER updating the weights and biases:")
print(w)
print(b)

Values BEFORE updating the weights and biases:
tensor([[-0.4372,  1.2163,  0.1074],
        [ 0.8596, -1.3096, -0.3948]], requires_grad=True)
tensor([-0.2865, -0.1088], requires_grad=True)

Values AFTER updating the weights and biases:
tensor([[-0.4342,  1.2174,  0.1092],
        [ 0.9839, -1.1544, -0.3031]], requires_grad=True)
tensor([-0.2864, -0.1073], requires_grad=True)


In [70]:
# check new loss - it should have falled from earlier
preds = regr_model(inputs)
loss = mse(preds, targets)
print(loss)
print(torch.sqrt(loss))

tensor(10770.3574, grad_fn=<DivBackward0>)
tensor(103.7803, grad_fn=<SqrtBackward>)


In [76]:
# Train mulitple epochs
epochs = 300
LR = 1e-4
for i in range(epochs):
    preds = regr_model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * LR
        b -= b.grad * LR
        w.grad.zero_()
        b.grad.zero_()

In [77]:
# check new loss - it should have falled from earlier
preds = regr_model(inputs)
loss = mse(preds, targets)
print(loss)
print(torch.sqrt(loss))
print(preds)
print(targets)
print(preds-targets)

tensor(0.6589, grad_fn=<DivBackward0>)
tensor(0.8117, grad_fn=<SqrtBackward>)
tensor([[ 57.1494,  70.3239],
        [ 81.9703, 100.4016],
        [119.2556, 133.5452],
        [ 21.2429,  37.1811],
        [101.3587, 118.5520]], grad_fn=<AddBackward0>)
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])
tensor([[ 1.1494,  0.3239],
        [ 0.9703, -0.5984],
        [ 0.2556,  0.5452],
        [-0.7571,  0.1811],
        [-1.6413, -0.4480]], grad_fn=<SubBackward0>)


In [None]:
# Linear Regression with PyTorch using PyTorch built-ins

Using the optimizer and torch modules

In [None]:
import numpy as np
import torch
import torch.nn as nn

In [None]:
# Numpy arrays first
# Input (temp, rain, humid)
inputs = np.array([
    [73, 67, 43],
    [91, 88, 64],
    [87, 134, 58],
    [102, 43, 37],
    [69, 96, 70]
], dtype='float32')
# Targets (apples, oranges)
targets = np.array([
    [56, 70],
    [81, 101],
    [119, 133],
    [22, 37],
    [103, 119]
], dtype='float32')
print(inputs.shape, targets.shape)

# Convert to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs.shape, targets.shape)