In [1]:
import numpy as np
import torch

# Torch tensor
## Construct tensors

In [2]:
# Construct a tensor from shape
x = torch.empty(2, 5)
print(x)
x = torch.rand(2, 5)
print(x)
x = torch.zeros(2, 5, dtype=torch.long)
print(x)

tensor([[1.3696e+36, 7.5810e-43, 1.3696e+36, 7.5810e-43, 8.7441e-43],
        [0.0000e+00, 8.7581e-43, 0.0000e+00, 1.9519e+36, 7.5810e-43]])
tensor([[0.1845, 0.0280, 0.7416, 0.9247, 0.3055],
        [0.8828, 0.4518, 0.2703, 0.4701, 0.6875]])
tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


In [3]:
# Construct a tensor from data
x = torch.tensor([5.5, 3])
print(x)
x = torch.tensor(3)
print(x)

tensor([5.5000, 3.0000])
tensor(3)


In [4]:
# Get size/shape
# torch.Size is in fact a tuple, so it supports all tuple operations. 
print(torch.tensor([5.5, 3]).size())
print(torch.tensor(3).size())

# Numpy-like .shape
print(torch.tensor([5.5, 3]).shape)
print(torch.tensor(3).shape)

torch.Size([2])
torch.Size([])
torch.Size([2])
torch.Size([])


In [5]:
# DO NOT use torch.Tensor (sometimes works like torch.empty)
x = torch.Tensor(3)
print(x)

tensor([ 0.0000e+00,  0.0000e+00, -2.2324e-10])


## Operations

In [6]:
# Addition
x = torch.rand(2, 5)
y = torch.rand(2, 5)
print(x + y)
print(torch.add(x, y))

res = torch.empty(2, 5)
torch.add(x, y, out=res)
print(res)

tensor([[1.2339, 1.0062, 1.2969, 0.1664, 0.8805],
        [0.7677, 0.3182, 1.0001, 0.5139, 0.8270]])
tensor([[1.2339, 1.0062, 1.2969, 0.1664, 0.8805],
        [0.7677, 0.3182, 1.0001, 0.5139, 0.8270]])
tensor([[1.2339, 1.0062, 1.2969, 0.1664, 0.8805],
        [0.7677, 0.3182, 1.0001, 0.5139, 0.8270]])


In [7]:
# In-place addition
# Any operation that mutates a tensor in-place is post-fixed with an _. For example: x.copy_(y), x.t_(), will change x.
y.add_(x)
print(y)

tensor([[1.2339, 1.0062, 1.2969, 0.1664, 0.8805],
        [0.7677, 0.3182, 1.0001, 0.5139, 0.8270]])


In [8]:
# Numpy-like indexing
print(y[:, 1])

tensor([1.0062, 0.3182])


In [9]:
# Resize/reshape
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())

# Numpy-like .reshape()
y = x.reshape(16)
z = x.reshape(-1, 8)  # the size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])
torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [10]:
# Get the only one element from tensor as Python-number
x = torch.randn(1)
print(x)
print(x.item())

tensor([0.8660])
0.8660114407539368


## Numpy bridge

In [11]:
# Convert torch array to numpy array
x = torch.ones(5)
print(x)
x_np = x.numpy()
print(x_np)

# The values are shared between torch-tensor and numpy-array
x.add_(1)
print(x)
print(x_np)

tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]
tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [12]:
# Build torch array from numpy array, and share the values
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a) # it is different from a = a+1
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


## CUDA tensors

In [13]:
print(torch.cuda.is_available())

False


In [14]:
x = torch.ones(5)

if torch.cuda.is_available():
    device = torch.device("cuda")          # a CUDA device object
    y = torch.ones_like(x, device=device)  # directly create a tensor on GPU
    x = x.to(device)                       # or just use strings ``.to("cuda")``
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))       # ``.to`` can also change dtype together!

## Torch tensor indexing
Test indexing and dimension of torch-tensor & variable

In [15]:
# Indexing a scalar from torch-tensor would return a zero-dimensional torch-tensor
x = torch.Tensor([1, 2, 3])
x0 = x[0]
x0, type(x0), x0.size()

(tensor(1.), torch.Tensor, torch.Size([]))

In [16]:
# Indexing a tensor from torch-tensor would return a torch-tensor
x0 = x[:2]
x0, type(x0), x0.size()

(tensor([1., 2.]), torch.Tensor, torch.Size([2]))

### Tensors and Variables have merged
torch.Tensor and torch.autograd.Variable are now the same class.  
More precisely, torch.Tensor is capable of tracking history and behaves like the old Variable; Variable wrapping continues to work as before but returns an object of type torch.Tensor.  
This means that you don’t need the Variable wrapper everywhere in your code anymore.  

Variable.data was the primary way to get the underlying Tensor from a Variable. After this merge, calling y = x.data still has similar semantics. So y will be a Tensor that shares the same data with x, is unrelated with the computation history of x, and has requires_grad=False.  
However, Variable.data can be unsafe in some cases. Any changes on x.data wouldn’t be tracked by autograd, and the computed gradients would be incorrect if x is needed in a backward pass. A safer alternative is to use x.detach(), which also returns a Tensor that shares data with requires_grad=False, but will have its in-place changes reported by autograd if x is needed in backward. 

In [17]:
# Indexing a scalar from torch-variable would return a 1-dimension variable (vector) 
# NOTE: torch-variable is at least 1-dimension, i.e., at least vector (Depreciated)
from torch.autograd import Variable

x_var = Variable(x)
x0 = x_var[0]
x0, type(x0), x0.size()

(tensor(1.), torch.Tensor, torch.Size([]))

In [18]:
# Indexing a tensor from torch-variable would return a torch-variable
x0 = x_var[:2]
x0, type(x0), x0.size()

(tensor([1., 2.]), torch.Tensor, torch.Size([2]))

# Autograd

In [19]:
x = torch.ones(2, 2)
print(x)
print(x.requires_grad)

x = torch.ones(2, 2, requires_grad=True)
print(x)
print(x.requires_grad)

tensor([[1., 1.],
        [1., 1.]])
False
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
True


In [20]:
y = x + 2
print(y)
print(y.requires_grad)
# y was created as a result of an operation, so it has a grad_fn
print(y.grad_fn)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
True
<AddBackward0 object at 0x0000021D7D182208>


In [21]:
z = 3 * y ** 2
out = z.mean()

print(z)
print(out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)
tensor(27., grad_fn=<MeanBackward0>)


In [22]:
# NOTE: now x.grad is None
print(x.grad)

# Call backward function
out.backward()

# NOTE: now x.grad is not None, it is d(out)/dx
print(x.grad)

None
tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [23]:
# What if the output is NOT a scalar?
x = torch.tensor([1, 2, 3], dtype=torch.float, requires_grad=True)
out = x * 10000

# NOTE: y is a vector (instead of a scalar), so torch.autograd could not compute the full Jacobian directly
# But if we just want the vector-Jacobian product, simply pass the vector to backward as argument:
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
out.backward(v)

print(x.grad)

tensor([1.0000e+03, 1.0000e+04, 1.0000e+00])


In [24]:
# How to stop autograd from tracking history on Tensors?
# Use torch.no_grad()
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print(x.requires_grad)
    print((x ** 2).requires_grad)

True
True
True
False


In [25]:
# Use .detach() to get a new Tensor with the same content but that does not require gradients
print(x)

y = x.detach()
print(y)
print(y.requires_grad)

tensor([1., 2., 3.], requires_grad=True)
tensor([1., 2., 3.])
False
