In [0]:
import torch
import numpy as np
from __future__ import print_function

# Basics

## What is tensor

In [3]:
# construct a 5X3 matrix
x = torch.empty(5,3)
print(x)

tensor([[3.6996e-36, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00]])


In [4]:
# construct a randomly initialized matrix
x = torch.rand(5,3)
print(x)

tensor([[0.2796, 0.9085, 0.8924],
        [0.1565, 0.2269, 0.9746],
        [0.4167, 0.8228, 0.1048],
        [0.6481, 0.2558, 0.5121],
        [0.5933, 0.6517, 0.4628]])


In [5]:
# construct a matrix filled zeros and dtype long
x = torch.zeros(5,3,dtype=torch.long)
print(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


In [6]:
# construct a tensor directly from data
x = torch.tensor([5.5,3.0])
print(x)

tensor([5.5000, 3.0000])


In [7]:
# create a tensor based on existing tensor, aslo with overiddmen data type
x = torch.ones(5,3)
print(x)

x = torch.randn_like(x,dtype=torch.float)
print(x)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[ 0.4020,  0.9801,  1.5505],
        [ 1.4058,  0.7728,  1.0008],
        [-1.0218, -0.4664,  0.9379],
        [-0.2895,  0.1044, -0.7484],
        [-1.0074,  1.0595, -1.4850]])


In [8]:
print(x.size())

torch.Size([5, 3])


## Operations

In [9]:
y = torch.rand(5,3)
print(x+y)

tensor([[ 0.7147,  1.1053,  2.1622],
        [ 1.9070,  1.6949,  1.1223],
        [-0.0746,  0.2281,  1.5796],
        [ 0.5920,  0.5564, -0.3433],
        [-0.1411,  1.9765, -0.9361]])


In [10]:
print(torch.add(x,y))

tensor([[ 0.7147,  1.1053,  2.1622],
        [ 1.9070,  1.6949,  1.1223],
        [-0.0746,  0.2281,  1.5796],
        [ 0.5920,  0.5564, -0.3433],
        [-0.1411,  1.9765, -0.9361]])


In [11]:
# addition in place
y.add_(x)
print(y)

tensor([[ 0.7147,  1.1053,  2.1622],
        [ 1.9070,  1.6949,  1.1223],
        [-0.0746,  0.2281,  1.5796],
        [ 0.5920,  0.5564, -0.3433],
        [-0.1411,  1.9765, -0.9361]])


In [12]:
# indexing 
print(x[:,1])

tensor([ 0.9801,  0.7728, -0.4664,  0.1044,  1.0595])


In [13]:
# resizing the tensors
x = torch.randn(4,4)
y = x.view(16)
z = x.view(-1,8)
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [14]:
# transposing a tensor
x = torch.rand(5,3)
z = torch.t(x)
print(x.size(),z.size())

torch.Size([5, 3]) torch.Size([3, 5])


## Torch to Numpy Arrays

In [15]:
a = torch.ones(5)
print(a)

tensor([1., 1., 1., 1., 1.])


In [16]:
b = a.numpy()
print(b)

[1. 1. 1. 1. 1.]


In [17]:
a.add_(1)
print(a)
print(b)

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [18]:
# converting numpy to tensor
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a,1,out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


## Tensors with GPU

In [28]:
device = torch.device("cuda")
y = torch.ones(5,3,device=device)
x = torch.ones(5,3)
x = x.to(device)
z = x+y
print(z)

z = z.to('cpu',torch.float)
print(z)

print(x)

tensor([[2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.]], device='cuda:0')
tensor([[2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')


# Autograd: Automatic Differentiation

**torch.Tensor** is the central class of the package. **If you set its attribute .requires_grad as True, it starts to track all operations on it**. When you finish your computation you can call .backward() and have all the gradients computed automatically. The gradient for this tensor will be accumulated into .grad attribute.

To stop a tensor from tracking history, you can call .detach() to detach it from the computation history, and to prevent future computation from being tracked.

**To prevent tracking history (and using memory), you can also wrap the code block in with torch.no_grad():.** This can be particularly helpful when evaluating a model because the model may have trainable parameters with requires_grad=True, but for which we don’t need the gradients

In [29]:
x = torch.ones(2,2,requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [30]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


y was created as a result of an operation, so it has a grad_fn.

In [31]:
z = y*y*3
print(z)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)


In [35]:
v = z*z**2
print(v)

tensor([[19683., 19683.],
        [19683., 19683.]], grad_fn=<MulBackward0>)


.requires_grad() changes the existing tensor's requires_grad flag in place

In [33]:
a = torch.randn(2,2)
a = ((a*3)/(a-1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a*a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x7f31a0e31550>


**Gradients**

If you want to compute the derivatives, you can call .backward() on a Tensor. If Tensor is a scalar (i.e. it holds a one element data), you don’t need to specify any arguments to backward(), however if it has more elements, you need to specify a gradient argument that is a tensor of matching shape.

In [0]:
x = torch.rand(5,3,requires_grad=True)
y = x**2
z = y.sigmoid()
z = z.mean()

Let’s backprop now. Because z contains a single scalar, z.backward() is equivalent to out.backward(torch.tensor(1.)).

In [45]:
z.backward()
x.grad

tensor([[0.0005, 0.0195, 0.0157],
        [0.0215, 0.0129, 0.0260],
        [0.0175, 0.0109, 0.0156],
        [0.0118, 0.0113, 0.0008],
        [0.0192, 0.0257, 0.0061]])

**Gradients of Non-Scalar Output**



In [50]:
x = torch.randn(3, requires_grad = True)
y = x*2

print(y)

y.backward(x)

tensor([ 1.9779, -3.1842, -0.6307], grad_fn=<MulBackward0>)


**Loss.backward() raises error ‘grad can be implicitly created only for scalar outputs’**

when you do loss.backward(), it is a shortcut for loss.backward(torch.Tensor([1])). This in only valid if loss is a tensor containing a single element.