Tensors are higher dimensional matrices that represents number having relations between them.

In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt

# Initialise

In [2]:
x = torch.zeros(3,2)
print(x)
x = torch.ones(2,2)
print(x)
x = torch.rand(3,2)
print(x)

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[1., 1.],
        [1., 1.]])
tensor([[0.5385, 0.9672],
        [0.4080, 0.7146],
        [0.7679, 0.9057]])


In [3]:
x = torch.empty(3,2)        # Fill values that are present in the memory.
print(x)
y = torch.zeros_like(x)     # Fill with 0 but with the shape of X.
print(y)

tensor([[7.7987e-36, 0.0000e+00],
        [3.3631e-44, 0.0000e+00],
        [       nan, 0.0000e+00]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


In [4]:
x = torch.linspace(0,1, steps = 5) # Create linear space from 0 to 1 with 5 steps.
print(x)

tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])


In [5]:
x = torch.tensor([[1,2],
                  [3,4],
                  [5,6]])
print(x)

tensor([[1, 2],
        [3, 4],
        [5, 6]])


# Slicing Tensors

In [6]:
print(x)
print(x.size())
print(x[:, 1:2])
print(x[:2, :2])

tensor([[1, 2],
        [3, 4],
        [5, 6]])
torch.Size([3, 2])
tensor([[2],
        [4],
        [6]])
tensor([[1, 2],
        [3, 4]])


In [7]:
y = x[1,1]
print(y) # In Tensor format
print(y.item()) # To get the numerical value

tensor(4)
4


# Reshaping Tensors

In [8]:
print(x)
y = x.view(2,3) # Reshape to 2 rows and 3 columns
print(y)

tensor([[1, 2],
        [3, 4],
        [5, 6]])
tensor([[1, 2, 3],
        [4, 5, 6]])


In [9]:
y = x.view(6,-1)
print(y)

tensor([[1],
        [2],
        [3],
        [4],
        [5],
        [6]])


# Simple Tensor Operations

In [10]:
x = torch.ones([3,2])
y = torch.ones([3,2])
print(x)
print(y)

# Point wise addition, subtraction and multiplication is done.
z = x + y
print(z)
z = x - y
print(z)
z = x * y
print(z)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [11]:
z = y.add(x) # X is added to Y
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [12]:
z = y.add_(x) # X is added to Y and also modify Y. (Addition in place)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])


# Numpy interfacing to Pytorch

In [13]:
x_np = x.numpy()
print( type(x) )
print( type(x_np) )
print(x)

<class 'torch.Tensor'>
<class 'numpy.ndarray'>
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [14]:
a_np = np.random.randn(5)
print( type(a_np) )
a_pt = torch.from_numpy(a_np)
print( type(a_pt) )
print(a_pt)

<class 'numpy.ndarray'>
<class 'torch.Tensor'>
tensor([2.0655, 0.5605, 0.9578, 0.9218, 0.5429], dtype=torch.float64)


Making changes to numpy array also changes the tensor as they both access the same underlying area.

In [15]:
np.add(a_np, 1, out = a_np)
print(a_np)
print(a_pt)

[3.06554902 1.56053815 1.9578011  1.92181119 1.54294141]
tensor([3.0655, 1.5605, 1.9578, 1.9218, 1.5429], dtype=torch.float64)


In [16]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  c = a + b

CPU times: user 1min 21s, sys: 569 ms, total: 1min 22s
Wall time: 1min 22s


In [17]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000])
  b = torch.randn([10000, 10000])
  c = a + b

CPU times: user 19.3 s, sys: 1.94 ms, total: 19.3 s
Wall time: 19.3 s


In [18]:
%%time
for i in range(100):
  a = np.random.randn(100,100)
  b = np.random.randn(100,100)
  c = np.matmul(a, b)

CPU times: user 129 ms, sys: 95 ms, total: 224 ms
Wall time: 117 ms


In [19]:
%%time
for i in range(100):
  a = torch.randn([100, 100])
  b = torch.randn([100, 100])
  c = torch.matmul(a, b)

CPU times: user 55.1 ms, sys: 47 ms, total: 102 ms
Wall time: 61.6 ms


# GPU - CUDA Support

In [20]:
print(torch.cuda.device_count())

1


In [21]:
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))

<torch.cuda.device object at 0x7f9cdf8c3908>
Tesla P100-PCIE-16GB


In [22]:
cuda0 = torch.device('cuda:0')

In [23]:
# Creating 3,2 Tensors using CUDA
a = torch.ones(3, 2, device=cuda0)
b = torch.ones(3, 2, device=cuda0)
c = a + b
print(c)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]], device='cuda:0')


In [24]:
print(a)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')


In [25]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.add(b, a)

CPU times: user 1min 21s, sys: 284 ms, total: 1min 22s
Wall time: 1min 22s


In [26]:
%%time
for i in range(10):
  a_cpu = torch.randn([10000, 10000])
  b_cpu = torch.randn([10000, 10000])
  b_cpu.add_(a_cpu)

CPU times: user 19.3 s, sys: 316 ms, total: 19.6 s
Wall time: 19.6 s


In [27]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000], device=cuda0)
  b = torch.randn([10000, 10000], device=cuda0)
  b.add_(a)

CPU times: user 1.78 ms, sys: 1.98 ms, total: 3.76 ms
Wall time: 7.75 ms


In [28]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.matmul(b, a)

CPU times: user 16min 25s, sys: 4.28 s, total: 16min 29s
Wall time: 9min 2s


In [29]:
%%time
for i in range(10):
  a_cpu = torch.randn([10000, 10000])
  b_cpu = torch.randn([10000, 10000])
  torch.matmul(a_cpu, b_cpu)

CPU times: user 2min 25s, sys: 177 ms, total: 2min 25s
Wall time: 2min 25s


In [30]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000], device=cuda0)
  b = torch.randn([10000, 10000], device=cuda0)
  torch.matmul(a, b)

CPU times: user 4.7 ms, sys: 1 ms, total: 5.71 ms
Wall time: 5.7 ms


# AutoGrad

In [31]:
x = torch.ones([3,2], requires_grad = True)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)


In [32]:
y = x + 5
print(y)

tensor([[6., 6.],
        [6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>)


In [33]:
z = y * y + 1
print(z)

tensor([[37., 37.],
        [37., 37.],
        [37., 37.]], grad_fn=<AddBackward0>)


In [34]:
t = torch.sum(z) # Adding all the elements of Tensor.
print(t)

tensor(222., grad_fn=<SumBackward0>)


Differentiating t by x.

In [35]:
t.backward()

In [36]:
print(x.grad)

tensor([[12., 12.],
        [12., 12.],
        [12., 12.]])


$t = \sum_i z_i, z_i = y_i^2 + 1, y_i = x_i + 5$

$\frac{\partial t}{\partial x_i} = \frac{\partial z_i}{\partial x_i} = \frac{\partial z_i}{\partial y_i} \frac{\partial y_i}{\partial x_i} = 2 * y_i $

At x = 1, y = 6, $\frac{\partial t}{\partial x_i} = 12$

In [37]:
x = torch.ones([3,2], requires_grad = True)
y = x + 5
print(y)
r = 1 / ( 1 + torch.exp(-y) ) # Derivative of Sigmoid = Output * (1 - Output)
print(r)
s = torch.sum(r)
s.backward()
print(x.grad)

tensor([[6., 6.],
        [6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>)
tensor([[0.9975, 0.9975],
        [0.9975, 0.9975],
        [0.9975, 0.9975]], grad_fn=<MulBackward0>)
tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


In [38]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
print(r)
a = torch.ones([3,2])
r.backward(a)
print(x.grad)

tensor([[0.9975, 0.9975],
        [0.9975, 0.9975],
        [0.9975, 0.9975]], grad_fn=<MulBackward0>)
tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


In the above code, a -> represents ds/dx which was not available for us. Since dr/dx is computed using the grad and backward function, to satisfy the chain rule and to get the output, a point wise multiplication of a and r is done.

$\frac{\partial{s}}{\partial{x}} = \frac{\partial{s}}{\partial{r}} \cdot \frac{\partial{r}}{\partial{x}}$

For the above code $a$ represents $\frac{\partial{s}}{\partial{r}}$ and then $x.grad$ gives directly $\frac{\partial{s}}{\partial{x}}$



## Autodiff example with Own Data and Loss Function

Here X is the input data, Y is the real output, Y_hat is the predicted output.<br>
w and b are the parameters used in y_hat. Here MSE loss function is used.

In [39]:
x = torch.randn([20, 1], requires_grad=True)    
y = 3*x - 2                                     

In [40]:
w = torch.tensor([1.0], requires_grad= True)
b = torch.tensor([1.0], requires_grad= True)

y_hat = w*x + b

loss = torch.sum((y_hat - y)**2)

In [41]:
print(loss)

tensor(227.6503, grad_fn=<SumBackward0>)


In [42]:
loss.backward()

If w.grad returns a positive number, then the selected value of w is greater than the optimal value. If it negative, then it is lower than the optimal value.

In [43]:
print(w.grad, b.grad)

tensor([-65.6806]) tensor([107.9798])


## Do it in a loop

In [44]:
learning_rate = 0.01

w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

print(w.item(), b.item())

for i in range(10):
    
    x = torch.randn([20, 1])
    y = 3*x - 2
    
    y_hat = w*x + b
    loss = torch.sum((y_hat - y)**2)
    
    loss.backward()

    # torch.no_grad() is used to prevent weight updates from further building the forward propogation.
    with torch.no_grad():
        w -= learning_rate * w.grad
        b -= learning_rate * b.grad

        w.grad.zero_()
        b.grad.zero_()
    print(w.item(), b.item())

1.0 1.0
1.301446557044983 -0.05205392837524414
2.2686986923217773 -0.8300359845161438
2.6972808837890625 -1.3352370262145996
2.8399739265441895 -1.601739525794983
2.7917866706848145 -1.7154347896575928
2.910399913787842 -1.8234485387802124
2.938993215560913 -1.8965004682540894
2.9650840759277344 -1.9387147426605225
2.9805994033813477 -1.9650235176086426
2.9891178607940674 -1.9791890382766724


In [45]:
%%time
learning_rate = 0.001
N = 10000000
epochs = 200

w = torch.rand([N], requires_grad=True)
b = torch.ones([1], requires_grad=True)

print(torch.mean(w).item(), b.item())

for i in range(epochs):
    
    x = torch.randn([N])
    y = torch.dot(3 * torch.ones([N]), x) - 2
    
    y_hat = torch.dot(w, x) + b
    loss = torch.sum((y_hat - y)**2)
    
    loss.backward()

    with torch.no_grad():
        w -= learning_rate * w.grad
        b -= learning_rate * b.grad

        w.grad.zero_()
        b.grad.zero_()

print(torch.mean(w).item(), b.item())

0.49982041120529175 1.0
nan nan
CPU times: user 29.4 s, sys: 32 ms, total: 29.5 s
Wall time: 29.5 s


Using GPU to create tensors.

In [46]:
%%time
learning_rate = 0.001
N = 10000000
epochs = 200

w = torch.rand([N], requires_grad=True, device = cuda0)
b = torch.ones([1], requires_grad=True, device = cuda0)

print(torch.mean(w).item(), b.item())

for i in range(epochs):
    
    x = torch.randn([N], device = cuda0)
    y = torch.dot(3 * (torch.ones([N], device = cuda0) ), x) - 2
    
    y_hat = torch.dot(w, x) + b
    loss = torch.sum((y_hat - y)**2)
    
    loss.backward()

    with torch.no_grad():
        w -= learning_rate * w.grad
        b -= learning_rate * b.grad

        w.grad.zero_()
        b.grad.zero_()

print(torch.mean(w).item(), b.item())

0.49994519352912903 1.0
nan nan
CPU times: user 286 ms, sys: 108 ms, total: 394 ms
Wall time: 421 ms
