In [0]:
import torch

In [0]:
import numpy as np


<h4>Initialise Tensors</h4>

In [0]:
#Very similar to numpy
x = torch.ones(3,2)
print(x)
x = torch.zeros(3,2)
print(x)
x = torch.rand(3,2)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0.4314, 0.6093],
        [0.5824, 0.3908],
        [0.8770, 0.7935]])


In [0]:
x =torch.empty(3,2) #Returns a tensor filled with uninitialised data 
print(x)
y = torch.zeros_like(x) # equivalent to torch.zeros(x.size())
print(y)


tensor([[4.5402e+14, 0.0000e+00],
        [4.4842e-44, 0.0000e+00],
        [       nan, 0.0000e+00]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


In [0]:
x = torch.linspace(0, 1, steps=5)
print(x)

tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])


In [0]:
x = torch.tensor([[1, 2], 
                 [3, 4], 
                 [5, 6]])
print(x)

tensor([[1, 2],
        [3, 4],
        [5, 6]])


<h4>Slicing Tensors</h4>


In [0]:
print(x.size())
print(x[:,1]) # all rows second column i.e column corresponding to index 1

torch.Size([3, 2])
tensor([2, 4, 6])


In [0]:
y = x[2,1]
print(y)
print(y.item())

tensor(6)
6


In [0]:
y = x.view(6,-1) # similar to numpy reshape
print(y)

tensor([[1],
        [2],
        [3],
        [4],
        [5],
        [6]])


<h4>Tensor Operations</h4>

In [0]:
x = torch.ones(3,2)
y = torch.ones(3,2)

In [0]:
z =x+y
print(z)
z = x*y  # element wise multiplication
print(z)


tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [0]:
z = y.add(x) # adds x (tensor argument) to y and does not affect y

print(y)
print(z)

z = y.add_(x) # adds x (tensor argument) to y and changes the value of y (to the sum)
print(y)
print(z)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])


<h4>NUMPY vs PYTORCH</h4>

In [0]:
# Converting pytorch to numpy
x_np = x.numpy()
print(type(x), type(x_np))
print(x_np)

<class 'torch.Tensor'> <class 'numpy.ndarray'>
[[1. 1.]
 [1. 1.]
 [1. 1.]]


In [0]:
a = np.random.randn(4)
print(a)
a_pt = torch.from_numpy(a)
print(a_pt)

[ 0.47661568 -0.41294102  0.25143192 -0.11363291]
tensor([ 0.4766, -0.4129,  0.2514, -0.1136], dtype=torch.float64)


In [0]:
# When converting a numpy array to a torch tensor using torch.from_numpy, the generated tensor and 
# the numpy array share the same memory

np.add(a, 1, out=a)
print(a)
print(a_pt) 

[1.47661568 0.58705898 1.25143192 0.88636709]
tensor([1.4766, 0.5871, 1.2514, 0.8864], dtype=torch.float64)


In [0]:
%%time
for i in range(100):
  a = np.random.randn(100,100)
  b = np.random.randn(100,100)
  c = np.matmul(a, b)

CPU times: user 163 ms, sys: 106 ms, total: 269 ms
Wall time: 142 ms


In [0]:
%%time
for i in range(100):
  a = torch.randn(100, 100)
  b = torch.randn(100, 100)
  c = torch.matmul(a, b)

CPU times: user 26.1 ms, sys: 1.99 ms, total: 28 ms
Wall time: 34.4 ms


Note that PyTorch runs faster than Numpy on CPU itself (almost 4x speed)

In [0]:
%%time
for i in range(10):
#  a = np.random.randn(10000,10000)
#  b = np.random.randn(10000,10000)
#  c = a + b

CPU times: user 1min 34s, sys: 1.37 s, total: 1min 36s
Wall time: 1min 36s


In [27]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000])  
  b = torch.randn([10000, 10000])
  c = a + b

CPU times: user 14.5 s, sys: 845 ms, total: 15.3 s
Wall time: 15.4 s


Adding 2 10000 x 10000 matrices 10 times takes 100 s using Numpy but just 16 s using PyTorch on CPU

<h4>Using CUDA i.e GPU </h4>

In [0]:
print(torch.cuda.device_count())

1


In [0]:
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))

<torch.cuda.device object at 0x7f9dd2e857f0>
Tesla P100-PCIE-16GB


In [0]:
cuda0 = torch.device('cuda:0')

In [0]:
a  = torch.randn([3,4], device = cuda0)
b =  torch.randn([3,4], device = cuda0)
c = a+b
print(c) # Observe device='cuda:0' in ouput

tensor([[ 1.7822, -0.1146, -1.0174,  3.7797],
        [-0.5334,  0.6524,  0.8704,  0.5706],
        [-0.6381,  1.6469, -1.3934,  1.2304]], device='cuda:0')


In [0]:
%%time
for i in range(10):
  a  = torch.randn([10000,10000], device = cuda0)
  b =  torch.randn([10000,10000], device = cuda0)
  c = b.add(a)

CPU times: user 921 µs, sys: 2.61 ms, total: 3.53 ms
Wall time: 6.44 ms


Adding 2 matrices of order 10000 X 10000 toom just 2.44 ms using Pytorch on GPU unlike 20 s using pytorch on CPU

<h4>Autodiff / Autograd</h4>

In [0]:
x = torch.ones([3,2], requires_grad = True)

In [0]:
y = 3*x + 5

In [0]:
print(y)

tensor([[8., 8.],
        [8., 8.],
        [8., 8.]], grad_fn=<AddBackward0>)


In [0]:
z = y * y

In [0]:
#Grad can only be created for scalar inputs
s = torch.sum(z)

In [0]:
s.backward()

In [0]:
print(x.grad)

tensor([[48., 48.],
        [48., 48.],
        [48., 48.]])


$s = \sum_i z_i, z_i = y_i^2, y_i =3 * x_i + 5$

$\frac{\partial s}{\partial x_i} = \frac{\partial z_i}{\partial x_i} = \frac{\partial z_i}{\partial y_i} \frac{\partial y_i}{\partial x_i} = 2y_i \times 3$


<h4> Gradient Descent using Pytorch </h4>

In [0]:
# Model parameters, w and b
w = torch.tensor([1.], requires_grad = True)
b = torch.tensor([1.], requires_grad = True)

In [0]:
#Input data and corresponding output data

x = torch.randn([20,1])
y = 3*x + 2
  

Note that the true value of w and b are 3 and 2. But we pretend that we do not know the true values and start w and b with initial values of 1 and train the model

In [0]:
for i in range(10):

  y_pred = w*x + b
  #Squared error loss
  loss = torch.sum((y_pred - y)**2)

  #Compute gradient of loss w.r.t w and b
  loss.backward()
  
  #Print weight and bias
  print(w.item() , b.item())

  with torch.no_grad():
    w -= 0.01 * w.grad # Do not write w = w - 0.01* w.grad https://discuss.pytorch.org/t/strange-problem-when-manually-gradient-descent/24138
    b -= 0.01 * b.grad

    w.grad.zero_()
    b.grad.zero_()



1.0 1.0
2.0329179763793945 1.519454002380371
2.5321974754333496 1.7694332599639893
2.773599147796631 1.889600396156311
2.8903586864471436 1.9472824335098267
2.9468584060668945 1.974918007850647
2.974215030670166 1.9881248474121094
2.987471103668213 1.9944149255752563
2.993901252746582 1.9973973035812378
2.9970242977142334 1.998802661895752


Observe how the values of w and b converge towards the true values as the model training progress

I changed the following 2 lines as I thought they were the same, that is instead of this:

with torch.no_grad():    

          w -= 0.01 * w.grad   

          b -= 0.01 * b.grad
   

I wrote 

with torch.no_grad():    

          w = w -  0.01 * w.grad   

          b = b  -  0.01 * b.grad

However it gave error: NoneType' object has no attribute 'zero_'. 

Later I got to know that the first code does an inplace operation whereas the second code creates a new w without the requires_grad parameter set. So ensure that you do it the first way itself. 

https://discuss.pytorch.org/t/strange-problem-when-manually-gradient-descent/24138 