<a href="https://colab.research.google.com/github/sagar9926/Pytorch/blob/master/IntroToPytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Outline
* PyTorch
* What are tensors
* Initialising, slicing, reshaping tensors
* Numpy and PyTorch interfacing
* GPU support for PyTorch + Enabling GPUs on Google Colab
* Speed comparisons, Numpy -- PyTorch -- PyTorch on GPU
* Autodiff concepts and application
* Writing a basic learning loop using autograd
* Exercises

In [0]:
import torch
import numpy as np
import matplotlib.pyplot as plt

## Initialise Tensors

In [2]:
x = torch.ones(3,2)
print(x)

x = torch.zeros(3,2)
print(x)

x = torch.rand(3,2)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0.6268, 0.4155],
        [0.4310, 0.0269],
        [0.3793, 0.9955]])


In [3]:
x = torch.empty(3,2) #Creates space for (3,2) sized tensor but will not initialise it
print(x)

y = torch.zeros_like(x) # fill the space of size x with zeroes
print(y)

tensor([[8.0390e-36, 0.0000e+00],
        [0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


In [4]:
x = torch.linspace(0,1,steps = 5) # take 5 equal steps between start and stop
print(x)

tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])


In [5]:
x = torch.tensor([[1,2],
                  [3,4],
                  [5,6]])
print(x)

tensor([[1, 2],
        [3, 4],
        [5, 6]])


## Slicing Tensors

In [6]:
print(x.size())
print(x[:,1])
print(x[0,:])


torch.Size([3, 2])
tensor([2, 4, 6])
tensor([1, 2])


In [7]:
y = x[1,1]
print(y)
print(y.item())

tensor(4)
4


## Reshaping Tensors 

In [8]:
print(x)
y = x.view(2,3)
z = x.view(6,-1)
print(y)
print(z)


tensor([[1, 2],
        [3, 4],
        [5, 6]])
tensor([[1, 2, 3],
        [4, 5, 6]])
tensor([[1],
        [2],
        [3],
        [4],
        [5],
        [6]])


## Simple Tensor Operations :

In [9]:
x = torch.ones([3,2])
y = torch.ones([3,2])

z = x + y
print(z)

z = x - y
print(z)

z = x*y
print(z)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [10]:
z = y.add(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [11]:
z = y.add_(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])


## Numpy <> PyTorch

In [12]:
x = torch.ones([3,2])
x_np = x.numpy()
print(type(x),type(x_np))
print(x_np)

<class 'torch.Tensor'> <class 'numpy.ndarray'>
[[1. 1.]
 [1. 1.]
 [1. 1.]]


In [13]:
a = np.random.randn(5)
print(a)
a_pt = torch.from_numpy(a)
print(type(a),type(a_pt))
print(a_pt)

[ 0.08718272  0.52921216 -0.14172494 -0.03278325 -0.26065707]
<class 'numpy.ndarray'> <class 'torch.Tensor'>
tensor([ 0.0872,  0.5292, -0.1417, -0.0328, -0.2607], dtype=torch.float64)


In [14]:
np.add(a,1,out = a)
print(a)
print(a_pt)

[1.08718272 1.52921216 0.85827506 0.96721675 0.73934293]
tensor([1.0872, 1.5292, 0.8583, 0.9672, 0.7393], dtype=torch.float64)


In [15]:
%%time

for i in range(100):
  a = np.random.randn(100,100)
  b = np.random.randn(100,100)
  c = np.matmul(a,b)


CPU times: user 155 ms, sys: 100 ms, total: 255 ms
Wall time: 152 ms


In [16]:
%%time

for i in range(100):
  a = torch.randn([100,100])
  b = torch.randn([100,100])
  c = torch.matmul(a,b)

CPU times: user 45.9 ms, sys: 32.1 ms, total: 78 ms
Wall time: 40.7 ms


In [17]:
%%time

for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  c = np.matmul(a,b)


CPU times: user 20min 35s, sys: 5.77 s, total: 20min 40s
Wall time: 11min 14s


In [18]:
%%time

for i in range(10):
  a = torch.randn([10000,10000])
  b = torch.randn([10000,10000])
  c = torch.matmul(a,b)

CPU times: user 4min 49s, sys: 108 ms, total: 4min 49s
Wall time: 4min 49s


## CUDA support

In [19]:
print(torch.cuda.device_count()) # tells if you have any GPU currently in system

1


In [20]:
print(torch.cuda.device(0)) # from here we now know that there is a device at location 0
print(torch.cuda.get_device_name(0))

<torch.cuda.device object at 0x7f3cb2d8a8d0>
Tesla K80


In [0]:
cuda0 = torch.device('cuda:0')  #Here we are referring to the device present at location 0

# whereas 0 in abbove statement refers to the location of device 

In [22]:
## a = torch.ones(3 ,2) Creates the tensor in CPU
a = torch.ones(3 ,2 , device = cuda0) # This will create the tensor on GPU Card 
b = torch.ones(3 ,2 , device = cuda0)

# a and b are going to be initialised in GPU card and the operations performed on them will also run on GPU

c = a + b
print(c)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]], device='cuda:0')


In [23]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.add(b, a)

CPU times: user 1min 27s, sys: 334 ms, total: 1min 27s
Wall time: 1min 27s


In [24]:
%%time
for i in range(10):
  a_cpu = torch.randn([10000, 10000])
  b_cpu = torch.randn([10000, 10000])
  b_cpu.add_(a_cpu)

CPU times: user 17.9 s, sys: 9.09 ms, total: 17.9 s
Wall time: 17.9 s


In [25]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000], device=cuda0)
  b = torch.randn([10000, 10000], device=cuda0)
  b.add_(a)

CPU times: user 1.85 ms, sys: 3.99 ms, total: 5.83 ms
Wall time: 5.96 ms


In [26]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.matmul(b, a)

CPU times: user 20min 34s, sys: 5.92 s, total: 20min 40s
Wall time: 11min 16s


In [27]:
%%time
for i in range(10):
  a_cpu = torch.randn([10000, 10000])
  b_cpu = torch.randn([10000, 10000])
  torch.matmul(a_cpu, b_cpu)

CPU times: user 4min 52s, sys: 192 ms, total: 4min 52s
Wall time: 4min 52s


In [28]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000], device=cuda0)
  b = torch.randn([10000, 10000], device=cuda0)
  torch.matmul(a, b)

CPU times: user 15.4 ms, sys: 3.99 ms, total: 19.4 ms
Wall time: 19.8 ms


## Autodiff

In [29]:
x = torch.ones([3,2], requires_grad = True)

# here we are telling pytorch that x is now something which could be differentiated again 
# I might create some new functions related to x which i may dirfferentiate

print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)


In [30]:
y = x + 5
print(y)

tensor([[6., 6.],
        [6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>)


In [31]:
z = y*y + 1
print(z)

tensor([[37., 37.],
        [37., 37.],
        [37., 37.]], grad_fn=<AddBackward0>)


In [32]:
t = torch.sum(z)

print(t)

tensor(222., grad_fn=<SumBackward0>)


In [0]:
t.backward()  # with this t becomes the function which we want to differentiate

# this computes dt/dx for every parameter x which has requires_grad=True. These are accumulated into x.grad for every parameter x

In [34]:
print(x.grad) # derivative of t w.r.t x

tensor([[12., 12.],
        [12., 12.],
        [12., 12.]])


$t = \sum_i z_i, z_i = y_i^2 + 1, y_i = x_i + 5$

$\frac{\partial t}{\partial x_i} = \frac{\partial z_i}{\partial x_i} = \frac{\partial z_i}{\partial y_i} \frac{\partial y_i}{\partial x_i} = 2y_i \times 1$


At x = 1, y = 6, $\frac{\partial t}{\partial x_i} = 12$

In [35]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
print(r) # here we have six different values
s = torch.sum(r)
s.backward()
print(x.grad)

tensor([[0.9975, 0.9975],
        [0.9975, 0.9975],
        [0.9975, 0.9975]], grad_fn=<MulBackward0>)
tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


In [36]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
a = torch.ones([3, 2])
r.backward(a) # It will compute the derivative of r w.r.t x and will multiply pointwise with a
print(x.grad)

tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


$\frac{\partial{s}}{\partial{x}} = \frac{\partial{s}}{\partial{r}} \cdot \frac{\partial{r}}{\partial{x}}$

For the above code $a$ represents $\frac{\partial{s}}{\partial{r}}$ and then $x.grad$ gives directly $\frac{\partial{s}}{\partial{x}}$



## Autodiff example that looks like what we have been doing