### Outline

* PyTorch
* What are tensors
* Initialising, slicing, reshaping tensors
* Numpy and PyTorch interfacing 
* GPU support for PyTorch + Enabling GPUs on Kaggle
* Speed comparisons, Numpy -- PyTorch -- PyTorch on GPU
* Autodiff concepts and application
* Writing a basic learning loop using autograd

In [1]:
import torch 
import numpy as np

### Initialise Tensors

In [2]:
x = torch.ones(3, 2)
print(x)
x = torch.zeros(3, 2)
print(x)
x = torch.rand(3, 2)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0.7018, 0.9004],
        [0.2214, 0.9275],
        [0.5580, 0.4482]])


In [3]:
x = torch.empty(3, 2)
print(x)
y = torch.zeros_like(x)
print(y)

tensor([[-7.3346e+26,  4.5743e-41],
        [-1.1421e+31,  3.0855e-41],
        [ 4.4842e-44,  0.0000e+00]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


In [4]:
x = torch.linspace(0, 1, steps=5)
print(x)

tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])


In [5]:
x = torch.tensor([[1, 2],
                 [3, 4],
                 [5, 6]])
print(x)

tensor([[1, 2],
        [3, 4],
        [5, 6]])


### Slicing Tensors

In [6]:
print(x.size())
print(x[:, 1])
print(x[0, :])

torch.Size([3, 2])
tensor([2, 4, 6])
tensor([1, 2])


In [7]:
y = x[1, 1]
print(y)
print(y.item())

tensor(4)
4


### Reshaping Tensors

In [8]:
print(x)
y = x.view(2, 3)
print(y)

tensor([[1, 2],
        [3, 4],
        [5, 6]])
tensor([[1, 2, 3],
        [4, 5, 6]])


In [9]:
y = x.view(6, -1)
print(y)

tensor([[1],
        [2],
        [3],
        [4],
        [5],
        [6]])


### Simple Tensor Operations

In [10]:
x = torch.ones([3, 2])
y = torch.ones([3, 2])
z = x + y
print(z)
z = x * y
print(z)
z = x - y
print(z)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


In [11]:
z = y.add(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [12]:
z = y.add_(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])


### Numpy <> PyTorch

In [13]:
x_np = x.numpy()
print(type(x), type(x_np))
print(x_np)

<class 'torch.Tensor'> <class 'numpy.ndarray'>
[[1. 1.]
 [1. 1.]
 [1. 1.]]


In [14]:
a = np.random.randn(5)
print(a)
a_pt = torch.from_numpy(a)
print(type(a), type(a_pt))
print(a_pt)

[-0.9209003   0.52065567 -0.56224179 -0.82056432 -1.1482345 ]
<class 'numpy.ndarray'> <class 'torch.Tensor'>
tensor([-0.9209,  0.5207, -0.5622, -0.8206, -1.1482], dtype=torch.float64)


In [15]:
np.add(a, 1, out=a)
print(a)
print(a_pt)

[ 0.0790997   1.52065567  0.43775821  0.17943568 -0.1482345 ]
tensor([ 0.0791,  1.5207,  0.4378,  0.1794, -0.1482], dtype=torch.float64)


In [16]:
%%time
for i in range(100):
    a = np.random.randn(100, 100)
    b = np.random.randn(100, 100)
    c = np.matmul(a, b)

CPU times: user 107 ms, sys: 1.03 ms, total: 108 ms
Wall time: 118 ms


In [17]:
%%time 
for i in range(100):
    a = torch.randn([100, 100])
    b = torch.randn([100, 100])
    c = torch.matmul(a, b)

CPU times: user 29.3 ms, sys: 897 µs, total: 30.2 ms
Wall time: 44.3 ms


In [18]:
%%time
for i in range(10):
    a = np.random.randn(10000, 10000)
    b = np.random.randn(10000, 10000)
    c = np.matmul(a, b)

CPU times: user 6min 46s, sys: 4.86 s, total: 6min 51s
Wall time: 6min 56s


In [19]:
%%time 
for i in range(10):
    a = torch.randn([10000, 10000])
    b = torch.randn([10000, 10000])
    c = torch.matmul(a, b)

CPU times: user 3min 4s, sys: 5.94 s, total: 3min 10s
Wall time: 3min 11s


### CUDA Support

In [20]:
print(torch.cuda.device_count())

1


In [21]:
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))

<torch.cuda.device object at 0x7f834ffa7650>
Tesla P100-PCIE-16GB


In [22]:
cuda0 = torch.device('cuda:0')

In [23]:
a = torch.ones(3, 2, device=cuda0)
b = torch.ones(3, 2, device=cuda0)
c = a + b
print(c)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]], device='cuda:0')


In [24]:
print(a)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')


In [25]:
%%time 
for i in range(10):
    a = np.random.randn(10000, 10000)
    b = np.random.randn(10000, 10000)
    np.add(b, a)

CPU times: user 1min 20s, sys: 4.44 s, total: 1min 25s
Wall time: 1min 25s


In [26]:
%%time 
for i in range(10):
    a_cpu = torch.randn([10000, 10000])
    b_cpu = torch.randn([10000, 10000])
    b_cpu.add_(a_cpu)

CPU times: user 20.8 s, sys: 3.59 s, total: 24.4 s
Wall time: 24.4 s


In [27]:
%%time
for i in range(10):
    a = torch.randn([10000, 10000], device=cuda0)
    b = torch.randn([10000, 10000], device=cuda0)
    b.add_(a)

CPU times: user 738 µs, sys: 6.99 ms, total: 7.73 ms
Wall time: 9.31 ms


In [28]:
%%time
for i in range(10):
    a = np.random.randn(10000,10000)
    b = np.random.randn(10000,10000)
    np.matmul(b, a)

CPU times: user 6min 47s, sys: 4.89 s, total: 6min 52s
Wall time: 6min 56s


In [29]:
%%time
for i in range(10):
    a_cpu = torch.randn([10000, 10000])
    b_cpu = torch.randn([10000, 10000])
    torch.matmul(a_cpu, b_cpu)

CPU times: user 2min 58s, sys: 6.14 s, total: 3min 5s
Wall time: 3min 8s


In [30]:
%%time
for i in range(10):
    a = torch.randn([10000, 10000], device=cuda0)
    b = torch.randn([10000, 10000], device=cuda0)
    torch.matmul(a, b)

CPU times: user 132 ms, sys: 88.9 ms, total: 221 ms
Wall time: 632 ms


### Autodiff

In [31]:
x = torch.ones([3, 2], requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)


In [32]:
y = x + 5
print(y)

tensor([[6., 6.],
        [6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>)


In [33]:
z = y * y + 1
print(z)

tensor([[37., 37.],
        [37., 37.],
        [37., 37.]], grad_fn=<AddBackward0>)


In [34]:
t = torch.sum(z)
print(t)

tensor(222., grad_fn=<SumBackward0>)


In [35]:
t.backward()

In [36]:
print(x.grad)

tensor([[12., 12.],
        [12., 12.],
        [12., 12.]])


$t = \sum_i z_i, z_i = y_i^2 + 1, y_i = x_i + 5$

$\frac{\partial t}{\partial x_i} = \frac{\partial z_i}{\partial x_i} = \frac{\partial z_i}{\partial y_i} \frac{\partial y_i}{\partial x_i} = 2y_i \times 1$

At x = 1, y = 6, $\frac{\partial t}{\partial x_i} = 12$

In [37]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
print(r)
s = torch.sum(r)
s.backward()
print(x.grad)

tensor([[0.9975, 0.9975],
        [0.9975, 0.9975],
        [0.9975, 0.9975]], grad_fn=<MulBackward0>)
tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


In [38]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
a = torch.ones([3, 2])
r.backward(a)
print(x.grad)

tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


$\frac{\partial{s}}{\partial{x}} = \frac{\partial{s}}{\partial{r}} \cdot \frac{\partial{r}}{\partial{x}}$

For the above code $a$ represents $\frac{\partial{s}}{\partial{r}}$ and then $x.grad$ gives directly $\frac{\partial{s}}{\partial{x}}$

### Autodiff example that looks like what we have been doing

In [39]:
x = torch.randn([20, 1], requires_grad=True)
y = 3*x-2

In [40]:
w = torch.tensor([1.],requires_grad=True)
b = torch.tensor([1.],requires_grad=True)

y_hat = w*x + b

loss = torch.sum((y_hat - y)**2)

In [41]:
print(loss)

tensor(219.7281, grad_fn=<SumBackward0>)


In [42]:
loss.backward()

In [43]:
print(w.grad, b.grad)

tensor([-49.1018]) tensor([113.7509])


### Do it in a loop

In [44]:
learning_rate = 0.01

w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

# print(w.item(), b.item())

for i in range(10):
    
    x = torch.randn([20, 1])
    y = 3*x -2
    
    y_hat = w*x + b
    loss = torch.sum((y_hat - y)**2)
    
    loss.backward()
    
    with torch.no_grad():
        w -= learning_rate * w.grad
        b -= learning_rate * b.grad
        
        w.grad.zero_()
        b.grad.zero_()
    
#     print(w.item(), b.item())

### Do it for a large problem

In [45]:
%%time
learning_rate = 0.001
N = 10000000
epochs = 200

w = torch.rand([N], requires_grad=True)
b = torch.ones([1], requires_grad=True)

# print(torch.mean(w).item(), b.item())

for i in range(epochs):
    
    x = torch.randn([N])
    y = torch.dot(3*torch.ones([N]), x) - 2
    
    y_hat = torch.dot(w, x) + b
    loss = torch.sum((y_hat - y)**2)
    
    loss.backward()
    
    with torch.no_grad():
        w -=  learning_rate * w.grad
        b -= learning_rate * b.grad
        
        w.grad.zero_()
        b.grad.zero_()
        
#     print(torch.mean(w).item(), b.item())

CPU times: user 32.1 s, sys: 18.4 s, total: 50.5 s
Wall time: 51.2 s


In [46]:
%%time
learning_rate = 0.001
N = 100000000
epochs = 200

w = torch.rand([N], requires_grad=True, device=cuda0)
b = torch.ones([1], requires_grad=True, device=cuda0)

for i in range(epochs):
    
    x = torch.randn([N], device=cuda0)
    y = torch.dot(3* torch.ones([N], device=cuda0),x)-2
    
    y_hat = torch.dot(w, x) + b
    loss = torch.sum((y_hat - y)**2)
    
    loss.backward()
    
    with torch.no_grad():
        w -= learning_rate * w.grad
        b -= learning_rate * b.grad
        
        w.grad.zero_()
        b.grad.zero_()
    
    print(torch.mean(w).item(), b.item())

0.49999985098838806 4.752829551696777
0.4964137077331543 72.2031478881836
0.25849202275276184 1966.79541015625
0.34471994638442993 -2505.7626953125
-6.358139514923096 73292.0546875
-306.6883239746094 -1161625.875
-423.472412109375 2059627.875
3212.229736328125 39084576.0
-132895.65625 712571392.0
-369056.875 8752850944.0
1990982.125 81345921024.0
-3592025.5 378232963072.0
52846244.0 3144077803520.0
-2420961792.0 34815873122304.0
-20062005248.0 -252093776527360.0
135930249216.0 3875999678726144.0
-2407469416448.0 -5.629717718551757e+16
15341233111040.0 1.0575407778627584e+16
-233490360565760.0 3.723784749265715e+18
-1535928086560768.0 -1.170091917990848e+19
-1.5575417578586112e+16 8.71177191527851e+19
3.530857223511081e+17 3.266277569521982e+21
-2.6615663225208832e+17 -1.4382521255484855e+22
2.0519301501834756e+19 2.1953545782595477e+23
5.466365873609297e+19 -2.3716752594723091e+24
7.495569558251294e+20 1.5036492190959858e+25
-2.3229303417398817e+22 2.8586887931562767e+26
4.109076193533