In [1]:
import torch
import numpy as np

## Creating Tensors

In [2]:
def describe(x):
    print("type: {}".format(x.type()))
    print("shape: {}".format(x.shape))
    print("content: \n{}".format(x))

#### Create a tensor with torch.Tensor

In [3]:
describe(torch.Tensor(2, 3))

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[8.4078e-45, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00]])


#### Create a randomly initialized tensor

In [4]:
describe(torch.rand(2, 3)) # uniform random between [0, 1)

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[0.0810, 0.1307, 0.4261],
        [0.4230, 0.2179, 0.8323]])


In [5]:
describe(torch.randn(2, 3)) # random normal

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[-0.4717, -1.5356,  0.0335],
        [-0.2793,  0.6591, -1.9797]])


#### Create a filled tensor
- Any PyTorch method with an underscore (_) refers to an in-place operation.

In [6]:
describe(torch.zeros(2, 3))
x = torch.ones(2, 3)
describe(x)
x.fill_(5)
describe(x)

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[0., 0., 0.],
        [0., 0., 0.]])
type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[1., 1., 1.],
        [1., 1., 1.]])
type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[5., 5., 5.],
        [5., 5., 5.]])


#### Create a tensor declaratively by using Python lists

In [7]:
x = torch.Tensor([[1, 2, 3], [4, 5, 6]])
describe(x)

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[1., 2., 3.],
        [4., 5., 6.]])


#### Create a tensor from NumPy
- The type of the tensor is DoubleTensor instead of the default FloatTensor.
- The ability to convert between NumPy arrays and PyTorch tensors becomes important when working with legacy libraries that use NumPy-formatted numerical values.

In [8]:
x_npy = np.random.rand(2, 3)
describe(torch.from_numpy(x_npy))

type: torch.DoubleTensor
shape: torch.Size([2, 3])
content: 
tensor([[0.0464, 0.4950, 0.2597],
        [0.4928, 0.3608, 0.7874]], dtype=torch.float64)


## Tensor Types and Size

In [9]:
x = torch.FloatTensor([[1, 2, 3], [4, 5, 6]])
describe(x)

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[1., 2., 3.],
        [4., 5., 6.]])


In [10]:
x = x.long()
describe(x)

type: torch.LongTensor
shape: torch.Size([2, 3])
content: 
tensor([[1, 2, 3],
        [4, 5, 6]])


In [11]:
# this is different from torch.Tensor() above.
x = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.int64)
describe(x)

type: torch.LongTensor
shape: torch.Size([2, 3])
content: 
tensor([[1, 2, 3],
        [4, 5, 6]])


In [12]:
x = x.float()
describe(x)

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[1., 2., 3.],
        [4., 5., 6.]])


## Tensor Operations

#### Tensor addition

In [13]:
x = torch.randn(2, 3)
describe(x)
describe(torch.add(x, x))

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[-0.5402,  1.0138,  1.0676],
        [-0.1057,  1.3151,  2.3039]])
type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[-1.0804,  2.0277,  2.1352],
        [-0.2113,  2.6302,  4.6079]])


In [14]:
describe(x + x)

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[-1.0804,  2.0277,  2.1352],
        [-0.2113,  2.6302,  4.6079]])


#### Dimension-based tensor operations

In [15]:
x = torch.arange(6)
describe(x)

type: torch.LongTensor
shape: torch.Size([6])
content: 
tensor([0, 1, 2, 3, 4, 5])


In [16]:
x = x.view(2, 3)
describe(x)

type: torch.LongTensor
shape: torch.Size([2, 3])
content: 
tensor([[0, 1, 2],
        [3, 4, 5]])


In [17]:
describe(torch.sum(x, dim=0)) # similar to axis=0 in NumPy operation

type: torch.LongTensor
shape: torch.Size([3])
content: 
tensor([3, 5, 7])


In [18]:
describe(torch.sum(x, dim=1)) # similar to axis=1 in NumPy operation

type: torch.LongTensor
shape: torch.Size([2])
content: 
tensor([ 3, 12])


In [19]:
describe(torch.transpose(x, 0, 1))

type: torch.LongTensor
shape: torch.Size([3, 2])
content: 
tensor([[0, 3],
        [1, 4],
        [2, 5]])


A 3D tensor would represent a batch of sequences, where each sequence item has a feature vector. It is common to switch the batch and sequence dimensions so that we can more easily index the sequence in a sequence model.

Note: Tranpose allows us to only swap 2 axes. Permutate allows for multiple axes.

## Indexing, Slicing, and Joining

#### Slicing and indexing a tensor

In [20]:
x = torch.arange(6).view(2, 3)
describe(x)

type: torch.LongTensor
shape: torch.Size([2, 3])
content: 
tensor([[0, 1, 2],
        [3, 4, 5]])


In [21]:
describe(x[:1, :2])

type: torch.LongTensor
shape: torch.Size([1, 2])
content: 
tensor([[0, 1]])


In [22]:
describe(x[0, 1])

type: torch.LongTensor
shape: torch.Size([])
content: 
1


#### Complex indexing: noncontiguous indexing
- Indices need to be a LongTensor for indexing using PyTorch functions.

In [23]:
indices = torch.LongTensor([0, 2])
describe(torch.index_select(x, dim=1, index=indices))

type: torch.LongTensor
shape: torch.Size([2, 2])
content: 
tensor([[0, 2],
        [3, 5]])


In [24]:
indices = torch.LongTensor([0, 0])
describe(torch.index_select(x, dim=0, index=indices))

type: torch.LongTensor
shape: torch.Size([2, 3])
content: 
tensor([[0, 1, 2],
        [0, 1, 2]])


In [25]:
row_indices = torch.arange(2).long()
col_indices = torch.LongTensor([0, 1])
describe(x[row_indices, col_indices])

type: torch.LongTensor
shape: torch.Size([2])
content: 
tensor([0, 4])


#### Concatenating tensors

In [26]:
x = torch.arange(6).view(2, 3)
describe(x)

type: torch.LongTensor
shape: torch.Size([2, 3])
content: 
tensor([[0, 1, 2],
        [3, 4, 5]])


In [27]:
describe(torch.cat([x, x], dim=1))

type: torch.LongTensor
shape: torch.Size([2, 6])
content: 
tensor([[0, 1, 2, 0, 1, 2],
        [3, 4, 5, 3, 4, 5]])


In [28]:
describe(torch.stack([x, x]))

type: torch.LongTensor
shape: torch.Size([2, 2, 3])
content: 
tensor([[[0, 1, 2],
         [3, 4, 5]],

        [[0, 1, 2],
         [3, 4, 5]]])


#### Linear algebra on tensors: multiplication

In [29]:
x1 = torch.arange(6).view(2, 3)
x1 = x1.float()
describe(x1)

type: torch.FloatTensor
shape: torch.Size([2, 3])
content: 
tensor([[0., 1., 2.],
        [3., 4., 5.]])


In [30]:
x2 = torch.ones(3, 2)
x2[:, 1] += 1
# x2 = x2.float()
describe(x2)

type: torch.FloatTensor
shape: torch.Size([3, 2])
content: 
tensor([[1., 2.],
        [1., 2.],
        [1., 2.]])


In [31]:
describe(torch.mm(x1, x2))

type: torch.FloatTensor
shape: torch.Size([2, 2])
content: 
tensor([[ 3.,  6.],
        [12., 24.]])


## Tensors and Computational Graphs

- PyTorch tensors handle the bookkeeping needed for building computational graphs for machine learning simply by enabling a Boolean flag at instantiation time.
- PyTorch tensor class encapsulates the data (the tensor itself) and a range of operations.
- When the **requires_grad** Boolean flag is set to **True** on a tensor, bookkeeping operations are enabled that can track the gradient at the tensor as well as the gradient function, both of which are needed to facilitate the gradient-based learning.

#### Creating tensors for gradient bookkeeping

In [32]:
x = torch.ones(2, 2, requires_grad=True)
describe(x)
print(x.grad is None)

type: torch.FloatTensor
shape: torch.Size([2, 2])
content: 
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
True


In [33]:
y = (x + 2) * (x + 5) + 3
describe(y)
print(x.grad is None)

type: torch.FloatTensor
shape: torch.Size([2, 2])
content: 
tensor([[21., 21.],
        [21., 21.]], grad_fn=<AddBackward0>)
True


In [34]:
# Let's say this is a loss function
z = y.mean()
describe(z)
z.backward()
print(x.grad is None)

type: torch.FloatTensor
shape: torch.Size([])
content: 
21.0
False


In [35]:
x.grad

tensor([[2.2500, 2.2500],
        [2.2500, 2.2500]])

## Exercises

1. Create a 2D tensor and then add a dimension of size 1 inserted at dimension 0.

In [36]:
x = torch.rand(3, 3)
describe(x)
x = x.unsqueeze(0)
describe(x)

type: torch.FloatTensor
shape: torch.Size([3, 3])
content: 
tensor([[0.8363, 0.9310, 0.0765],
        [0.1962, 0.6418, 0.7437],
        [0.1437, 0.6700, 0.3494]])
type: torch.FloatTensor
shape: torch.Size([1, 3, 3])
content: 
tensor([[[0.8363, 0.9310, 0.0765],
         [0.1962, 0.6418, 0.7437],
         [0.1437, 0.6700, 0.3494]]])


2. Remove the extra dimension we just added to the previous tensor.

In [37]:
x = x.squeeze(0)
describe(x)

type: torch.FloatTensor
shape: torch.Size([3, 3])
content: 
tensor([[0.8363, 0.9310, 0.0765],
        [0.1962, 0.6418, 0.7437],
        [0.1437, 0.6700, 0.3494]])


3. Create a random tensor of shape 5 x 3 in the interval [3, 7).

In [38]:
3 + torch.rand(5, 3) * (7 - 3)

tensor([[4.9454, 3.7489, 3.7832],
        [6.6856, 6.5091, 5.1139],
        [5.1984, 5.6410, 5.0503],
        [4.7292, 3.7878, 3.0977],
        [5.0333, 3.8287, 4.0390]])

4. Create a tensor with values from a normal distribution (mean=0, std=1).

In [39]:
torch.randn(3, 3)

tensor([[ 0.1755,  1.3824, -0.2178],
        [-0.2999, -0.4550, -0.2023],
        [ 0.7118,  0.4847, -1.2757]])

In [40]:
x = torch.rand(3, 3)
x.normal_()

tensor([[ 0.8629,  0.7376, -2.5209],
        [ 0.9190,  0.3909,  1.4394],
        [-0.1803,  0.4218,  0.5273]])

5. Retrieve the indexes of all the nonzero elements in the tensor torch.Tensor([1, 1, 1, 0, 1]).

In [41]:
x = torch.Tensor([1, 1, 1, 0, 1])
torch.nonzero(x)

tensor([[0],
        [1],
        [2],
        [4]])

6. Create a random tensor of size (3, 1) and then horizontally stack four copies together.

In [42]:
x = torch.rand(3, 1)
describe(x)
x.expand(3, 4)

type: torch.FloatTensor
shape: torch.Size([3, 1])
content: 
tensor([[0.9841],
        [0.7930],
        [0.9783]])


tensor([[0.9841, 0.9841, 0.9841, 0.9841],
        [0.7930, 0.7930, 0.7930, 0.7930],
        [0.9783, 0.9783, 0.9783, 0.9783]])

7. Return the batch matrix-matrix product of two three-dimensional matrices (a = torch.rand(3, 4, 5), b = torch.rand(3, 5, 4)).

In [43]:
a = torch.rand(3, 4, 5)
b = torch.rand(3, 5, 4)
torch.bmm(a, b)

tensor([[[1.0057, 0.6421, 1.5778, 1.4346],
         [1.6272, 0.9316, 2.4996, 2.3747],
         [1.3295, 0.9059, 1.7589, 1.7389],
         [0.9233, 0.5535, 1.3825, 1.3792]],

        [[1.0541, 0.7601, 0.8668, 1.5399],
         [0.4817, 0.6680, 0.6459, 0.9785],
         [1.1763, 0.3125, 0.8446, 1.3691],
         [1.0003, 0.8436, 1.6978, 1.9443]],

        [[1.1696, 1.2225, 0.9098, 0.9220],
         [0.3667, 0.7257, 0.8861, 0.5578],
         [1.0787, 1.3234, 1.2261, 1.0891],
         [0.5082, 0.7171, 0.9411, 0.6339]]])

8. Return the batch matrix-matrix product of a 3D matrix and a 2D matrix (a = torch.rand(3, 4, 5), b = torch.rand(5, 4)).

In [44]:
a = torch.rand(3, 4, 5)
b = torch.rand(5, 4)
torch.bmm(a, b.unsqueeze(0).expand(a.size(0), *b.size()))

tensor([[[1.8274, 2.0506, 1.3228, 1.1027],
         [1.2329, 1.3479, 1.1746, 0.8145],
         [1.1661, 1.4610, 1.1549, 0.7579],
         [1.8245, 2.2150, 1.1813, 0.8446]],

        [[1.8419, 2.3071, 1.4408, 1.2228],
         [0.9800, 1.3729, 0.9908, 0.6773],
         [0.9956, 1.5377, 1.0834, 0.8247],
         [1.4066, 1.8110, 1.1102, 0.8898]],

        [[1.1487, 1.3480, 0.9347, 0.7346],
         [1.8230, 2.1747, 1.2654, 1.1364],
         [1.9192, 2.2053, 1.0930, 1.0281],
         [1.7776, 1.6397, 0.8880, 0.6391]]])

In [45]:
print(*b.size())

5 4


**END**