In [1]:
import torch
import numpy as np

torch.manual_seed(42)

<torch._C.Generator at 0x7fcb13662430>

## Creating Tensors

In [2]:
def describe(x):
    print("type: {}".format(x.type()))
    print("shape: {}".format(x.shape))
    print("content: \n{}".format(x))

#### Create a tensor with torch.Tensor

In [3]:
describe(torch.Tensor(2, 3))

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[0., 0., 0.],
        [0., 0., 0.]])


#### Create a randomly initialized tensor

In [4]:
describe(torch.rand(2, 3)) # uniform random between [0, 1)

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[0.8823, 0.9150, 0.3829],
        [0.9593, 0.3904, 0.6009]])


In [5]:
describe(torch.randn(2, 3)) # random normal

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[ 1.1561,  0.3965, -2.4661],
        [ 0.3623,  0.3765, -0.1808]])


#### Create a filled tensor
- Any PyTorch method with an underscore (_) refers to an in-place operation.

In [6]:
describe(torch.zeros(2, 3))
x = torch.ones(2, 3)
describe(x)
x.fill_(5)
describe(x)

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[0., 0., 0.],
        [0., 0., 0.]])
type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[1., 1., 1.],
        [1., 1., 1.]])
type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[5., 5., 5.],
        [5., 5., 5.]])


#### Create a tensor declaratively by using Python lists

In [7]:
x = torch.Tensor([[1, 2, 3], [4, 5, 6]])
describe(x)

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[1., 2., 3.],
        [4., 5., 6.]])


#### Create a tensor from NumPy
- The type of the tensor is DoubleTensor instead of the default FloatTensor.
- The ability to convert between NumPy arrays and PyTorch tensors becomes important when working with legacy libraries that use NumPy-formatted numerical values.

In [8]:
x_npy = np.random.rand(2, 3)
describe(torch.from_numpy(x_npy))

type: torch.DoubleTensor
shape: torch.Size([2, 3])
content: 
tensor([[0.8586, 0.9391, 0.3615],
        [0.3802, 0.6414, 0.8212]], dtype=torch.float64)


## Tensor Types and Size

In [9]:
x = torch.FloatTensor([[1, 2, 3], [4, 5, 6]])
describe(x)

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[1., 2., 3.],
        [4., 5., 6.]])


In [10]:
x = x.long()
describe(x)

type: torch.LongTensor
shape: torch.Size([2, 3])
content: 
tensor([[1, 2, 3],
        [4, 5, 6]])


In [11]:
# this is different from torch.Tensor() above.
x = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.int64)
describe(x)

type: torch.LongTensor
shape: torch.Size([2, 3])
content: 
tensor([[1, 2, 3],
        [4, 5, 6]])


In [12]:
x = x.float()
describe(x)

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[1., 2., 3.],
        [4., 5., 6.]])


## Tensor Operations

#### Tensor addition

In [13]:
x = torch.randn(2, 3)
describe(x)
describe(torch.add(x, x))

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[ 0.3930,  0.4327, -1.3627],
        [ 1.3564,  0.6688, -0.7077]])
type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[ 0.7861,  0.8654, -2.7254],
        [ 2.7129,  1.3376, -1.4154]])


In [14]:
describe(x + x)

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[ 0.7861,  0.8654, -2.7254],
        [ 2.7129,  1.3376, -1.4154]])


#### Dimension-based tensor operations

In [15]:
x = torch.arange(6)
describe(x)

type: torch.LongTensor
shape: torch.Size([6])
content: 
tensor([0, 1, 2, 3, 4, 5])


In [16]:
x = x.view(2, 3)
describe(x)

type: torch.LongTensor
shape: torch.Size([2, 3])
content: 
tensor([[0, 1, 2],
        [3, 4, 5]])


In [17]:
describe(torch.sum(x, dim=0)) # similar to axis=0 in NumPy operation

type: torch.LongTensor
shape: torch.Size([3])
content: 
tensor([3, 5, 7])


In [18]:
describe(torch.sum(x, dim=1)) # similar to axis=1 in NumPy operation

type: torch.LongTensor
shape: torch.Size([2])
content: 
tensor([ 3, 12])


In [19]:
describe(torch.transpose(x, 0, 1))

type: torch.LongTensor
shape: torch.Size([3, 2])
content: 
tensor([[0, 3],
        [1, 4],
        [2, 5]])


A 3D tensor would represent a batch of sequences, where each sequence item has a feature vector. It is common to switch the batch and sequence dimensions so that we can more easily index the sequence in a sequence model.

Note: Tranpose allows us to only swap 2 axes. Permutate allows for multiple axes.

## Indexing, Slicing, and Joining

#### Slicing and indexing a tensor

In [20]:
x = torch.arange(6).view(2, 3)
describe(x)

type: torch.LongTensor
shape: torch.Size([2, 3])
content: 
tensor([[0, 1, 2],
        [3, 4, 5]])


In [21]:
describe(x[:1, :2])

type: torch.LongTensor
shape: torch.Size([1, 2])
content: 
tensor([[0, 1]])


In [22]:
describe(x[0, 1])

type: torch.LongTensor
shape: torch.Size([])
content: 
1


#### Complex indexing: noncontiguous indexing
- Indices need to be a LongTensor for indexing using PyTorch functions.

In [23]:
indices = torch.LongTensor([0, 2])
describe(torch.index_select(x, dim=1, index=indices))

type: torch.LongTensor
shape: torch.Size([2, 2])
content: 
tensor([[0, 2],
        [3, 5]])


In [24]:
indices = torch.LongTensor([0, 0])
describe(torch.index_select(x, dim=0, index=indices))

type: torch.LongTensor
shape: torch.Size([2, 3])
content: 
tensor([[0, 1, 2],
        [0, 1, 2]])


In [25]:
row_indices = torch.arange(2).long()
col_indices = torch.LongTensor([0, 1])
describe(x[row_indices, col_indices])

type: torch.LongTensor
shape: torch.Size([2])
content: 
tensor([0, 4])


#### Concatenating tensors

In [26]:
x = torch.arange(6).view(2, 3)
describe(x)

type: torch.LongTensor
shape: torch.Size([2, 3])
content: 
tensor([[0, 1, 2],
        [3, 4, 5]])


In [27]:
describe(torch.cat([x, x], dim=1))

type: torch.LongTensor
shape: torch.Size([2, 6])
content: 
tensor([[0, 1, 2, 0, 1, 2],
        [3, 4, 5, 3, 4, 5]])


In [28]:
describe(torch.stack([x, x]))

type: torch.LongTensor
shape: torch.Size([2, 2, 3])
content: 
tensor([[[0, 1, 2],
         [3, 4, 5]],

        [[0, 1, 2],
         [3, 4, 5]]])


#### Linear algebra on tensors: multiplication

In [29]:
x1 = torch.arange(6).view(2, 3)
x1 = x1.float()
describe(x1)

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[0., 1., 2.],
        [3., 4., 5.]])


In [30]:
x2 = torch.ones(3, 2)
x2[:, 1] += 1
# x2 = x2.float()
describe(x2)

type: torch.FloatTensor
shape: torch.Size([3, 2])
content: 
tensor([[1., 2.],
        [1., 2.],
        [1., 2.]])


In [31]:
describe(torch.mm(x1, x2))

type: torch.FloatTensor
shape: torch.Size([2, 2])
content: 
tensor([[ 3.,  6.],
        [12., 24.]])


## Tensors and Computational Graphs

- PyTorch tensors handle the bookkeeping needed for building computational graphs for machine learning simply by enabling a Boolean flag at instantiation time.
- PyTorch tensor class encapsulates the data (the tensor itself) and a range of operations.
- When the **requires_grad** Boolean flag is set to **True** on a tensor, bookkeeping operations are enabled that can track the gradient at the tensor as well as the gradient function, both of which are needed to facilitate the gradient-based learning.

#### Creating tensors for gradient bookkeeping

In [32]:
x = torch.ones(2, 2, requires_grad=True)
describe(x)
print(x.grad is None)

type: torch.FloatTensor
shape: torch.Size([2, 2])
content: 
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
True


In [33]:
y = (x + 2) * (x + 5) + 3
describe(y)
print(x.grad is None)

type: torch.FloatTensor
shape: torch.Size([2, 2])
content: 
tensor([[21., 21.],
        [21., 21.]], grad_fn=<AddBackward0>)
True


In [34]:
# Let's say this is a loss function
z = y.mean()
describe(z)
z.backward()
print(x.grad is None)

type: torch.FloatTensor
shape: torch.Size([])
content: 
21.0
False


In [35]:
x.grad

tensor([[2.2500, 2.2500],
        [2.2500, 2.2500]])

## Exercises

1. Create a 2D tensor and then add a dimension of size 1 inserted at dimension 0.

In [36]:
x = torch.rand(3, 3)
describe(x)
x = x.unsqueeze(0)
describe(x)

type: torch.FloatTensor
shape: torch.Size([3, 3])
content: 
tensor([[0.9516, 0.0753, 0.8860],
        [0.5832, 0.3376, 0.8090],
        [0.5779, 0.9040, 0.5547]])
type: torch.FloatTensor
shape: torch.Size([1, 3, 3])
content: 
tensor([[[0.9516, 0.0753, 0.8860],
         [0.5832, 0.3376, 0.8090],
         [0.5779, 0.9040, 0.5547]]])


2. Remove the extra dimension we just added to the previous tensor.

In [37]:
x = x.squeeze(0)
describe(x)

type: torch.FloatTensor
shape: torch.Size([3, 3])
content: 
tensor([[0.9516, 0.0753, 0.8860],
        [0.5832, 0.3376, 0.8090],
        [0.5779, 0.9040, 0.5547]])


3. Create a random tensor of shape 5 x 3 in the interval [3, 7).

In [38]:
3 + torch.rand(5, 3) * (7 - 3)

tensor([[4.3693, 5.5374, 4.4576],
        [5.8417, 6.7856, 6.1561],
        [4.1257, 6.1545, 5.3579],
        [6.0157, 3.7810, 3.0202],
        [4.2273, 3.4660, 6.6411]])

4. Create a tensor with values from a normal distribution (mean=0, std=1).

In [39]:
torch.randn(3, 3)

tensor([[ 0.4528,  0.6410,  0.5200],
        [ 0.5567,  0.0744,  0.7113],
        [-0.5687,  1.2580, -1.5890]])

In [40]:
x = torch.rand(3, 3)
x.normal_()

tensor([[-1.1208, -0.2398,  0.2163],
        [ 0.5484, -0.4415,  1.5815],
        [-0.1981,  0.9554, -1.0902]])

5. Retrieve the indexes of all the nonzero elements in the tensor torch.Tensor([1, 1, 1, 0, 1]).

In [41]:
x = torch.Tensor([1, 1, 1, 0, 1])
torch.nonzero(x)

tensor([[0],
        [1],
        [2],
        [4]])

6. Create a random tensor of size (3, 1) and then horizontally stack four copies together.

In [42]:
x = torch.rand(3, 1)
describe(x)
x.expand(3, 4)

type: torch.FloatTensor
shape: torch.Size([3, 1])
content: 
tensor([[0.0162],
        [0.2137],
        [0.6249]])


tensor([[0.0162, 0.0162, 0.0162, 0.0162],
        [0.2137, 0.2137, 0.2137, 0.2137],
        [0.6249, 0.6249, 0.6249, 0.6249]])

7. Return the batch matrix-matrix product of two three-dimensional matrices (a = torch.rand(3, 4, 5), b = torch.rand(3, 5, 4)).

In [43]:
a = torch.rand(3, 4, 5)
b = torch.rand(3, 5, 4)
torch.bmm(a, b)

tensor([[[0.5097, 0.4511, 0.8574, 0.5705],
         [0.9305, 0.5039, 0.8319, 1.3060],
         [1.0051, 0.2784, 1.0170, 1.0141],
         [0.5467, 0.5376, 0.6298, 0.7419]],

        [[0.7479, 0.5626, 0.6626, 0.7916],
         [1.6845, 1.3342, 1.2485, 1.6060],
         [0.9672, 0.6746, 0.8266, 1.0354],
         [0.9729, 0.7301, 0.9154, 1.0763]],

        [[1.9574, 1.7438, 0.9624, 1.2241],
         [1.5675, 1.4571, 1.1658, 1.3175],
         [1.8151, 1.3825, 0.5208, 1.0758],
         [1.7720, 1.7995, 1.2115, 1.5042]]])

8. Return the batch matrix-matrix product of a 3D matrix and a 2D matrix (a = torch.rand(3, 4, 5), b = torch.rand(5, 4)).

In [44]:
a = torch.rand(3, 4, 5)
b = torch.rand(5, 4)
torch.bmm(a, b.unsqueeze(0).expand(a.size(0), *b.size()))

tensor([[[1.0010, 0.9193, 1.0507, 0.9926],
         [0.6734, 0.6293, 0.8083, 0.6839],
         [1.2929, 1.5602, 1.4694, 1.1348],
         [0.6389, 0.3501, 0.3600, 0.4966]],

        [[0.5280, 0.5076, 0.4488, 0.3678],
         [0.9482, 0.8499, 0.6884, 0.5953],
         [0.6245, 0.7109, 0.7820, 0.5720],
         [1.0368, 1.1702, 0.9839, 0.8194]],

        [[1.5107, 1.0250, 0.9244, 1.0298],
         [0.7812, 0.6688, 0.6384, 0.6752],
         [1.1143, 1.4056, 1.2044, 0.8694],
         [0.6316, 1.0176, 1.0670, 0.7933]]])

In [45]:
print(*b.size())

5 4


**END**