# Tensors and Torch Operations

In [1]:
import torch

# Tensor creation operations
Specify shape and dtype as arguments. Shape can be a single integer, or tuple.

In [2]:
# A vector of 10 ones
torch.ones(10)

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [3]:
# A (3, 3) matrix of 0s as integers
torch.zeros((3, 3), dtype=torch.long)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [4]:
# A (2, 4, 4) tensor of numbers from standard normal distribution
torch.randn((2, 4, 4))

tensor([[[ 0.9363,  0.3162,  0.7511, -1.3430],
         [ 1.3663, -0.8448,  2.4356, -0.1408],
         [ 1.0460, -0.0701, -1.4731, -1.2486],
         [ 1.7917,  1.2577,  1.9010, -0.7319]],

        [[-0.7655,  0.6201,  1.1122,  0.2268],
         [-0.0203,  0.3423,  1.0897,  0.2429],
         [-0.1350, -0.7531, -0.0595,  0.2128],
         [ 0.6086,  0.9427, -0.8392,  0.4083]]])

# Tensor shapes
- Row vector: (B, 1)
- Feature matrix: (B, D)
- Greyscale images: (B, W, H, 1)
- RGB images: (B, W, H, 3)
- Arbitrary images: (B, W, H, C)
- Sequences of vectors: (B, L, D)

Create tensors of random numbers that have the same shapes as specified.

In [5]:
# TODO: Create a feature matrix with 4 examples whose feature size is 10
torch.randn((4, 10))

tensor([[-0.6147,  0.0427, -0.5562, -0.7068, -1.2642,  0.3802, -0.1313,  1.1799,
          0.4505, -0.1525],
        [ 1.8360, -0.9395,  1.2424,  2.0652, -0.2309,  0.0450, -2.3479, -0.2827,
         -1.1999, -0.2158],
        [-0.2493, -1.5048,  0.6128, -0.5287, -0.2175,  1.6820, -0.3587, -1.2516,
         -0.1518,  0.5955],
        [ 0.4771,  1.7978,  1.2441, -1.6271,  1.3185, -1.6270,  1.5288,  1.5141,
          1.1025, -0.6128]])

In [6]:
# TODO: Create a batch of 5 RGB images whose spatial dimensions are 32x32
torch.randn((5, 32, 32, 3))

tensor([[[[ 0.2027, -1.1366,  0.7774],
          [-0.2401, -0.7282,  0.8560],
          [ 0.8691,  2.7668, -0.6636],
          ...,
          [ 1.3713, -0.0357,  0.7443],
          [ 0.3781, -0.8476,  2.4250],
          [ 0.9127,  0.9393,  0.6135]],

         [[-0.7050,  1.9606,  1.3471],
          [ 1.5946,  0.0950, -0.2133],
          [-1.6131, -0.2609, -0.5743],
          ...,
          [-0.7657, -0.1005, -1.7460],
          [ 1.0679, -0.6975, -0.4910],
          [ 1.3332,  0.1012, -0.0114]],

         [[ 2.3520,  2.8420, -1.6052],
          [ 0.2768,  0.0233, -1.4937],
          [-1.5348, -0.3646,  0.5498],
          ...,
          [-0.4312,  0.6737,  1.3217],
          [ 0.1892, -0.5117,  0.7452],
          [-0.8440, -2.1202, -1.1583]],

         ...,

         [[-0.9878,  1.0099, -1.0879],
          [ 0.5793,  1.5808,  0.4065],
          [ 0.7270, -0.8774, -0.3284],
          ...,
          [ 0.4045,  0.3647,  0.4277],
          [ 0.5701, -0.4057,  0.5278],
          [-0.3150,  0

In [7]:
# TODO: Create a batch of 2 vector sequences, whose sequence lengths are 7 and feature dimension is 4
torch.randn((2, 7, 4))

tensor([[[ 0.7008, -0.2929,  0.5752,  0.8768],
         [-0.6764,  1.4848, -0.9929, -1.0976],
         [-0.8327,  2.1122, -0.6767,  0.9652],
         [ 0.7174,  0.5985, -0.3428, -0.8498],
         [-1.9296,  1.1661, -0.7060,  0.0589],
         [-0.2921, -0.1822,  0.7908, -0.6844],
         [-0.4218,  0.7073,  0.4393,  1.0033]],

        [[ 0.0109,  0.3343, -0.3397,  0.4333],
         [-0.1683, -0.2709,  0.9795,  0.2638],
         [ 0.6140,  0.4783,  0.3305,  1.3098],
         [-1.1850, -1.3816, -0.3515,  1.5610],
         [ 0.5919,  0.8282,  0.2370, -0.3555],
         [-1.6616, -1.5484, -1.1998,  2.1987],
         [ 1.6813, -2.1308, -1.0748, -1.8396]]])

# Torch dtypes
Some useful conversions:
- `torch.FloatTensor()` can be used to create tensors of `torch.float32` dtype
- `torch.LongTensor()` can be used to create tensors of `torch.int64` dtype
- Numpy arrays can be converted with `torch.from_numpy(x)`
- `dtype` can be specified in some creation operations
- Tensors can be cast using `x.type(new_type)`

# Tensor indexing

A tensor dimension of size D can be indexed in the following ways:
- Single integers from [0, D-1] or [-D, -1] for reverse indices
- Lists of integers or tensors of integer dtypes
- Slices, using colon notation, or slice objects
- Boolean masks of size D (or broadcastable)
- Ellipsis to infer other dimensions

In [8]:
a = torch.arange(12)
a

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [9]:
# Single integer indexing
a[1], a[-1], a[-3]

(tensor(1), tensor(11), tensor(9))

In [10]:
# List indexing
a[[1, 3, 5]], a[[2, 2, 2]]

(tensor([1, 3, 5]), tensor([2, 2, 2]))

In [11]:
# Single colon represents entire dim, here we select all of row 0
b = a.reshape(3, 4)
b, b[0, :]

(tensor([[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]]),
 tensor([0, 1, 2, 3]))

In [12]:
# Colons can represent ranges by `start:end`, exclusive of end when specified. Infers beginning or end
a[0:3], a[:5], a[5:], a[:-1]

(tensor([0, 1, 2]),
 tensor([0, 1, 2, 3, 4]),
 tensor([ 5,  6,  7,  8,  9, 10, 11]),
 tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))

In [13]:
# Boolean masks can be used to select based on conditions
mask = a < 5
a[mask], mask

(tensor([0, 1, 2, 3, 4]),
 tensor([ True,  True,  True,  True,  True, False, False, False, False, False,
         False, False]))

In [14]:
# Ellipsis can infer dimensions
c = torch.arange(60).reshape(3, 4, 5)
c, c[0, ...]

(tensor([[[ 0,  1,  2,  3,  4],
          [ 5,  6,  7,  8,  9],
          [10, 11, 12, 13, 14],
          [15, 16, 17, 18, 19]],
 
         [[20, 21, 22, 23, 24],
          [25, 26, 27, 28, 29],
          [30, 31, 32, 33, 34],
          [35, 36, 37, 38, 39]],
 
         [[40, 41, 42, 43, 44],
          [45, 46, 47, 48, 49],
          [50, 51, 52, 53, 54],
          [55, 56, 57, 58, 59]]]),
 tensor([[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]]))

# Elementwise operations

In [15]:
# TODO: Apply relu to tensor `a` and print results
a = torch.arange(-5, 5)
a = torch.relu(a)
a

tensor([0, 0, 0, 0, 0, 0, 1, 2, 3, 4])

# Broadcasting
Broadcasting rules:
- Right-most dimensions matches
- A dimension has size 1 (including scalars)

In [16]:
# We would expect without broadcasting to apply all operations elementwise with operands of the same size
a = torch.Tensor([1, 2, 3])
b = torch.Tensor([10, 10, 10])
a + b

tensor([11., 12., 13.])

In [17]:
# Example: Broadcasting to add a scalar to a tensor
a + 10

tensor([11., 12., 13.])

In [18]:
# Example: Adding a vector to each row of a matrix
a = torch.arange(12).reshape(3, 4)  # Matrix of size (3, 4)
b = torch.Tensor([1, 10, 100, 200])  # Column vector of size (4,)
# Sizes: (3, 4) + (4,)
a + b

tensor([[  1.,  11., 102., 203.],
        [  5.,  15., 106., 207.],
        [  9.,  19., 110., 211.]])

In [19]:
# Example: Creating a boolean mask from a tensor
a = torch.arange(12).reshape(3, 4)
a < 5

tensor([[ True,  True,  True,  True],
        [ True, False, False, False],
        [False, False, False, False]])

In [20]:
# Example: Adding a tensor that has a singleton dimension
a = torch.randn((2, 4, 3, 10))
b = torch.randn((4, 1, 10))
# (2, 4, 3, 10)
#    (4, 1, 10)
# =============
# (2, 4, 3, 10)

# Note that b with shape (4, 10) will not broadcast
# b = torch.randn((4, 10))

c = a + b
c.shape

torch.Size([2, 4, 3, 10])

In [21]:
# TODO: Transform a random normal tensor of shape (3, 10)
# The columns should have means of [-3, 5, 100] and standard deviations of [0.1, 1, 10]
a = torch.randn((10, 3))  # Shape     (10, 3)
means = torch.Tensor([-3, 5, 100])  #     (3,)
stds = torch.Tensor([0.1, 1, 10])   #     (3,)
(a * stds) + means

tensor([[ -3.1315,   4.6295, 117.7399],
        [ -3.1945,   5.5485, 104.2687],
        [ -3.1407,   6.0279,  94.9768],
        [ -3.0268,   4.0486, 100.5392],
        [ -2.8424,   4.9274,  74.5696],
        [ -2.8274,   4.8226,  95.3272],
        [ -3.0609,   4.3686,  96.3954],
        [ -2.9339,   4.3351, 110.9736],
        [ -3.0475,   4.1226,  92.2756],
        [ -3.0088,   4.6285,  97.4372]])

# Matrix multiplication

Matmul rules:
- A has shape `(..., l, m)`
- B has shape `     (m, n)`
- Last dimension of A must have same as second-last dimension of B
- Transform last dimension of A from `m` to `n`
- Can use `torch.mm()` or `@` operator

In [22]:
# Example: Multiply two matrices
A = torch.randn((3, 5))
B = torch.randn((5, 10))
# (3, 5)  @ (5, 10) -> (3, 10)
C = A @ B
C.shape

torch.Size([3, 10])

In [23]:
# Example: Matmul can be broadcasted
A = torch.randn((4, 32, 32, 3))
B = torch.randn((3, 10))
# (4, 32, 32, 3) @ (3, 10) -> (4, 32, 32, 10)
C = A @ B
C.shape

torch.Size([4, 32, 32, 10])

In [24]:
# TODO: Transform this batch of vector sequences from feature size 7 to feature size 32 through matmul
A = torch.randn(4, 100, 7)
B = torch.randn(7, 32)
# (4, 100, 7) @ (?, ?) -> (?, ?, ?)
C = A @ B
C.shape

torch.Size([4, 100, 32])

# Reduction operations
Reduction operation rules:
- By default reduces across whole tensor
- Specify the `dim` keyword to specify reduction dimension

In [25]:
a = torch.arange(12).reshape(3, 4)
a

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

In [26]:
# Example: Sum all elements of a
a.sum()

tensor(66)

In [27]:
# Example: Sum along rows (dim=0)
a.sum(dim=0)

tensor([12, 15, 18, 21])

In [28]:
# TODO: Sum along columns
a.sum(dim=1)

tensor([ 6, 22, 38])

In [29]:
# TODO: Compute the mean and standard deviation of this tensor along rows
a = torch.randn((10, 4))
a.mean(dim=1), a.std(dim=1)

(tensor([-0.7105, -0.2768, -0.5030, -0.0168,  0.3736,  0.4711,  0.0310,  0.6269,
          0.0770, -0.0220]),
 tensor([0.8379, 1.5381, 1.1045, 1.3531, 0.6035, 1.0231, 0.7048, 1.0874, 0.5550,
         0.9840]))

# Reshape operations
- `x.reshape(shape)`: Reshapes to `shape`, product of dims must be same before and after
- `x.squeeze()`: Remove singleton dims
- `x.unsqueeze(d)`: Add a singleton dim at dimension `d`
- `x.flatten()`: Unravel into a vector of shape `(x.size,)`
- `x.permute(order)`: Permute order of dims according to a tuple of dims


In [30]:
a = torch.arange(60).reshape(3, 1, 4, 5)
a

tensor([[[[ 0,  1,  2,  3,  4],
          [ 5,  6,  7,  8,  9],
          [10, 11, 12, 13, 14],
          [15, 16, 17, 18, 19]]],


        [[[20, 21, 22, 23, 24],
          [25, 26, 27, 28, 29],
          [30, 31, 32, 33, 34],
          [35, 36, 37, 38, 39]]],


        [[[40, 41, 42, 43, 44],
          [45, 46, 47, 48, 49],
          [50, 51, 52, 53, 54],
          [55, 56, 57, 58, 59]]]])

In [31]:
# Example: Reshape to (2, 30)
a.reshape(2, 30)

tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
         48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59]])

In [32]:
# Example: Squeeze out extra dimension
a.squeeze().shape

torch.Size([3, 4, 5])

In [33]:
# Example: Flatten into a vector
a.flatten(), a.flatten().shape

(tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
         36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
         54, 55, 56, 57, 58, 59]),
 torch.Size([60]))

In [34]:
# Example: Flatten just the last two dims
a.flatten(-2).shape

torch.Size([3, 1, 20])

In [35]:
# Example: Permute order of dims
# (3, 1, 4, 5)
# (0, 1, 2, 3)
# Permute to (1, 5, 4, 3)
a.permute((1, 3, 2, 0)).shape

torch.Size([1, 5, 4, 3])

# Activity: Process the logistic regression data

In [36]:
import pickle
with open("data/logistic-regression-data.pkl", "rb") as f:
    data = pickle.load(f)
x = data["training_x"]
x.shape, type(x)

((75, 4), numpy.ndarray)

In [37]:
# TODO: Convert the data to a tensor in float32 dtype
x = torch.from_numpy(x)
x = x.type(torch.float32)
x.shape, x.dtype

(torch.Size([75, 4]), torch.float32)

In [38]:
# TODO: Compute the mean and std over the batch dimension
mean = x.mean(dim=0)
std = x.std(dim=0)
mean, std

(tensor([6.2333, 2.8653, 4.8800, 1.6653]),
 tensor([0.6618, 0.3042, 0.8276, 0.4304]))

In [39]:
# TODO: Standardize the data by subtracting the mean and dividing by std
x = (x - mean) / std

In [40]:
# TODO: Verify the new mean/std are standard normal
x.mean(dim=0), x.std(dim=0)

(tensor([ 3.3140e-07,  5.4042e-08, -1.7643e-07, -1.5736e-07]),
 tensor([1., 1., 1., 1.]))

# Activity: Linear layer
The `torch.nn.Linear(m, n)` layer applies the equation `y = x @ w + b` such that `w` is a weight matrix of shape (m, n), `b` is a bias vector of length (n,) and takes `x` from shape (b, m) to `y` with shape (b, n).

In [41]:
# TODO: Apply a Linear layer transformation to this feature matrix.
# The resultant tensor, y, should have shape (100, 16)
x = torch.randn(100, 32)
w = torch.randn(32, 16)
b = torch.randn(16)
y = x @ w + b
y.shape

torch.Size([100, 16])

In [42]:
# TODO: Convert a to shape that is compatible with this matmul to result in a tensor of shape (3, 5, 7)
x = torch.randn(3, 4, 5)
w = torch.randn(4, 7)
b = torch.randn(7)
x = x.permute((0, 2, 1))
y = x @ w + b
y.shape

torch.Size([3, 5, 7])