## Outline
* PyTorch
* What are tensors
* Initialising, slicing, reshaping tensors
* Numpy and PyTorch interfacing
* GPU support for PyTorch + Enabling GPUs on Google Colab
* Speed comparisons, Numpy -- PyTorch -- PyTorch on GPU
* Autodiff concepts and application
* Writing a basic learning loop using autograd
* Exercises

In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt

## Initialise tensors

In [7]:
x = torch.ones(3, 2)
print(x)
x = torch.zeros(3, 2)
print(x)
x = torch.rand(3, 2)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0.4842, 0.3076],
        [0.2847, 0.8224],
        [0.7798, 0.6830]])


In [30]:
x = torch.empty(3, 2)
print(x)
y = torch.zeros_like(x)
print(y)

tensor([[7.2868e-44, 8.1275e-44],
        [7.1466e-44, 7.9874e-44],
        [8.1275e-44, 7.4269e-44]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


In [10]:
x = torch.linspace(0, 1, steps=5)
print(x)

tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])


In [11]:
x = torch.tensor([[1, 2], 
                 [3, 4], 
                 [5, 6]])
print(x)

tensor([[1, 2],
        [3, 4],
        [5, 6]])


## Slicing tensors

In [12]:
print(x.size())
print(x[:, 1]) 
print(x[0, :]) 

torch.Size([3, 2])
tensor([2, 4, 6])
tensor([1, 2])


In [14]:
y = x[1, 1]
print(y)
print(y.item())

tensor(4)
4


## Reshaping tensors

In [16]:
print(x)
y = x.view(2, 3)
print(y)

tensor([[1, 2],
        [3, 4],
        [5, 6]])
tensor([[1, 2, 3],
        [4, 5, 6]])


In [31]:
y = x.view(6,-1) 
print(y)

tensor([[7.2868e-44],
        [8.1275e-44],
        [7.1466e-44],
        [7.9874e-44],
        [8.1275e-44],
        [7.4269e-44]])


## Simple Tensor Operations

In [36]:
x = torch.ones(3, 2)
y = torch.ones([3, 2])
z = x + y
print(z)
z = x - y
print(z)
z = x * y
print(z)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [48]:
z = y.add(x)
print(z)
print(y)

tensor([[8., 8.],
        [8., 8.],
        [8., 8.]])
tensor([[7., 7.],
        [7., 7.],
        [7., 7.]])


In [61]:
z = y.add_(x)
print(z)
print(y)

tensor([[20., 20.],
        [20., 20.],
        [20., 20.]])
tensor([[20., 20.],
        [20., 20.],
        [20., 20.]])


## Numpy <> PyTorch

In [62]:
x_np = x.numpy()
print(type(x), type(x_np))
print(x_np)

<class 'torch.Tensor'> <class 'numpy.ndarray'>
[[1. 1.]
 [1. 1.]
 [1. 1.]]


In [69]:
a = np.random.randn(5)
print(a)
a_pt = torch.from_numpy(a)
print(type(a), type(a_pt))
print(a_pt)

[-0.7123801  -0.17071854 -0.44808188  0.36737712 -0.63545245]
<class 'numpy.ndarray'> <class 'torch.Tensor'>
tensor([-0.7124, -0.1707, -0.4481,  0.3674, -0.6355], dtype=torch.float64)


In [70]:
np.add(a, 1, out=a)
print(a)
print(a_pt) 

[0.2876199  0.82928146 0.55191812 1.36737712 0.36454755]
tensor([0.2876, 0.8293, 0.5519, 1.3674, 0.3645], dtype=torch.float64)


In [74]:
%%time
for i in range(100):
    a = np.random.randn(100,100)
    b = np.random.randn(100,100)
    c = np.matmul(a, b)

Wall time: 148 ms


In [75]:
%%time
for i in range(100):
    a = torch.randn([100, 100])
    b = torch.randn([100, 100])
    c = torch.matmul(a, b)

Wall time: 41.8 ms


In [76]:
%%time
for i in range(10):
    a = np.random.randn(10000,10000)
    b = np.random.randn(10000,10000)
    c = a + b

Wall time: 1min 4s


In [77]:
%%time
for i in range(10):
    a = torch.randn([10000, 10000])
    b = torch.randn([10000, 10000])
    c = a + b

Wall time: 14.4 s


## CUDA support

In [2]:
print(torch.cuda.device_count())

0


In [4]:
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))

<torch.cuda.device object at 0x0000016DF08BCAF0>


AssertionError: Torch not compiled with CUDA enabled

In [None]:
cuda0 = torch.device('cuda:0')

In [None]:
a = torch.ones(3, 2, device=cuda0)
b = torch.ones(3, 2, device=cuda0)
c = a + b
print(c)

In [None]:
print(a)

In [None]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.add(b, a)

In [None]:
%%time
for i in range(10):
  a_cpu = torch.randn([10000, 10000])
  b_cpu = torch.randn([10000, 10000])
  b_cpu.add_(a_cpu)

In [None]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000], device=cuda0)
  b = torch.randn([10000, 10000], device=cuda0)
  b.add_(a)

In [None]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.matmul(b, a)

In [None]:
%%time
for i in range(10):
  a_cpu = torch.randn([10000, 10000])
  b_cpu = torch.randn([10000, 10000])
  torch.matmul(a_cpu, b_cpu)

In [None]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000], device=cuda0)
  b = torch.randn([10000, 10000], device=cuda0)
  torch.matmul(a, b)

## Autodiff

In [83]:
x = torch.ones([3, 2], requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)


In [88]:
y = x + 5
print(y)

tensor([[6., 6.],
        [6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>)


In [89]:
z = y*y + 1
print(z)

tensor([[37., 37.],
        [37., 37.],
        [37., 37.]], grad_fn=<AddBackward0>)


In [90]:
t = torch.sum(z)
print(t)

tensor(222., grad_fn=<SumBackward0>)


In [91]:
t.backward()

In [98]:
print(x.grad)

tensor([[12., 12.],
        [12., 12.],
        [12., 12.]])


$t = \sum_i z_i, z_i = y_i^2 + 1, y_i = x_i + 5$

$\frac{\partial t}{\partial x_i} = \frac{\partial z_i}{\partial x_i} = \frac{\partial z_i}{\partial y_i} \frac{\partial y_i}{\partial x_i} = 2y_i \times 1$


At x = 1, y = 6, $\frac{\partial t}{\partial x_i} = 12$

In [99]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
print(r)
s = torch.sum(r)
s.backward()
print(x.grad)

tensor([[0.9975, 0.9975],
        [0.9975, 0.9975],
        [0.9975, 0.9975]], grad_fn=<MulBackward0>)
tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


In [100]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
a = torch.ones([3, 2])
r.backward(a)
print(x.grad)

tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


$\frac{\partial{s}}{\partial{x}} = \frac{\partial{s}}{\partial{r}} \cdot \frac{\partial{r}}{\partial{x}}$

For the above code $a$ represents $\frac{\partial{s}}{\partial{r}}$ and then $x.grad$ gives directly $\frac{\partial{s}}{\partial{x}}$



## Autodiff example that looks like what we have been doing

In [23]:
x = torch.randn([20, 1], requires_grad=True)
y = 3*x - 2

In [102]:
w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

y_hat = w*x + b

loss = torch.sum((y_hat - y)**2)

In [103]:
print(loss)

tensor(235.3598, grad_fn=<SumBackward0>)


In [104]:
loss.backward()

In [105]:
print(w.grad, b.grad)

tensor([-86.1141]) tensor([99.4972])


## Do it in a loop

In [111]:
learning_rate = 0.01

w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

print(w.item(), b.item())

for i in range(20):
  
    x = torch.randn([20, 1])
    y = 3*x - 2

    y_hat = w*x + b
    loss = torch.sum((y_hat - y)**2)

    loss.backward()
  
    with torch.no_grad():
        w -= learning_rate * w.grad
        b -= learning_rate * b.grad
    
    w.grad.zero_()
    b.grad.zero_()

    print(w.item(), b.item())
  

1.0 1.0
1.4819804430007935 -0.21703898906707764
2.7933149337768555 -1.0637097358703613
2.8412890434265137 -1.4313437938690186
2.924560546875 -1.655783772468567
2.9701499938964844 -1.798510193824768
2.9662649631500244 -1.8761252164840698
2.9850785732269287 -1.9267973899841309
2.9977688789367676 -1.9575732946395874
2.9956202507019043 -1.9744027853012085
2.9957854747772217 -1.9843930006027222
2.9988420009613037 -1.9911061525344849
2.999514579772949 -1.9947044849395752
2.9993984699249268 -1.9967952966690063
2.9996070861816406 -1.9980491399765015
2.9996745586395264 -1.9988163709640503
3.0000596046447754 -1.9993586540222168
3.000098466873169 -1.9996092319488525
3.000060558319092 -1.999760389328003
3.0000381469726562 -1.9998559951782227
3.000002384185791 -1.9999175071716309


## Do it for a large problem

In [113]:
%%time
learning_rate = 0.001
N = 10000000
epochs = 200

w = torch.rand([N], requires_grad=True)
b = torch.ones([1], requires_grad=True)

# print(torch.mean(w).item(), b.item())

for i in range(epochs):
  
    x = torch.randn([N])
    y = torch.dot(3*torch.ones([N]), x) - 2

    y_hat = torch.dot(w, x) + b
    loss = torch.sum((y_hat - y)**2)

    loss.backward()
  
    with torch.no_grad():
        w -= learning_rate * w.grad
        b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

#   print(torch.mean(w).item(), b.item())

Wall time: 32.8 s


In [112]:
%%time
learning_rate = 0.001
N = 10000000
epochs = 200

w = torch.rand([N], requires_grad=True, device=cuda0)
b = torch.ones([1], requires_grad=True, device=cuda0)

# print(torch.mean(w).item(), b.item())

for i in range(epochs):
  
    x = torch.randn([N], device=cuda0)
    y = torch.dot(3*torch.ones([N], device=cuda0), x) - 2

    y_hat = torch.dot(w, x) + b
    loss = torch.sum((y_hat - y)**2)

    loss.backward()
  
    with torch.no_grad():
        w -= learning_rate * w.grad
        b -= learning_rate * b.grad
    
    w.grad.zero_()
    b.grad.zero_()

  #print(torch.mean(w).item(), b.item())

NameError: name 'cuda0' is not defined

In [15]:
y_hat = x*w + b

In [20]:
torch.sum(y_hat - y)/x.numel()

tensor(-40.3247, grad_fn=<DivBackward0>)

In [31]:
x = torch.tensor([[1,2,3],[4,5,6],[7,8,9]])
y = 7*x + 6

w = torch.randn(3,requires_grad=True)
b = torch.randn(1,requires_grad=True)

epochs = 10000

lr = 0.01

for i in range(epochs):
    y_hat = x*w + b
    loss = torch.sum((y_hat - y)**2)/x.numel()
    
    loss.backward()
    
    with torch.no_grad():
        w -= lr * w.grad
        b -= lr * b.grad

    w.grad.zero_()
    b.grad.zero_()

print(w,b)

tensor([7.0000, 7.0000, 7.0000], requires_grad=True) tensor([5.9999], requires_grad=True)
