In [2]:
import torch
torch.__version__

'2.1.0+cu118'

In [3]:
# Tensor
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [4]:
TENSOR.ndim

3

In [5]:
TENSOR.shape

torch.Size([1, 3, 3])

In [6]:
TENSOR.unsqueeze(-1).shape

torch.Size([1, 3, 3, 1])

In [7]:
TENSOR.shape

torch.Size([1, 3, 3])

In [8]:
# Create a random tensor of size (3, 4)
random_tensor = torch.rand(size=(3, 4))
random_tensor

tensor([[0.3722, 0.3922, 0.8977, 0.1237],
        [0.6741, 0.1451, 0.8282, 0.8919],
        [0.5782, 0.8885, 0.3918, 0.4668]])

In [9]:
random_tensor.dtype

torch.float32

In [10]:
# Create a tensor of all zeros
zeros = torch.zeros(size=(3, 4))
zeros, zeros.dtype

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.float32)

In [11]:
# Create a tensor of all ones
ones = torch.ones(size=(3, 4))
ones, ones.dtype

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)

In [12]:
# Use torch.arange(), torch.range() is deprecated
#zero_to_ten_deprecated = torch.range(0, 10) # Note: this may return an error in the future

# Create a range of values 0 to 10
zero_to_ten = torch.arange(start=0, end=10, step=1)
zero_to_ten

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [13]:
# Can also create a tensor of zeros similar to another tensor
ten_zeros = torch.zeros_like(input=zero_to_ten) # will have same shape
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

# Tensor datatype

The most common type (**and generally the default**) is ***torch.float32*** or torch.float.


This is referred to as "32-bit floating point".

But there's also 16-bit floating point (torch.float16 or torch.half) and 64-bit floating point (torch.float64 or torch.double).

And to confuse things even more there's also 8-bit, 16-bit, 32-bit and 64-bit integers.

The reason for all of these is to do with precision in computing.

Precision is the amount of detail used to describe a number.

The higher the precision value (8, 16, 32), the more detail and hence data used to express a number.

This matters in deep learning and numerical computing because you're making so many operations, the more detail you have to calculate on, the more compute you have to use.

So lower precision datatypes are generally faster to compute on but sacrifice some performance on evaluation metrics like accuracy (faster to compute but less accurate).

In [14]:
# Default datatype for tensors is float32
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # defaults to None, which is torch.float32 or whatever datatype is passed
                               device=None, # defaults to None, which uses the default tensor type
                               requires_grad=False) # if True, operations performed on the tensor are recorded

float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

In [15]:
float_16_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=torch.float16) # torch.half would also work

float_16_tensor.dtype

torch.float16

In [16]:
# Create a tensor
some_tensor = torch.rand(3, 4)

# Find out details about it
print(some_tensor)
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Device tensor is stored on: {some_tensor.device}") # will default to CPU

tensor([[0.5220, 0.1433, 0.0843, 0.1517],
        [0.8227, 0.8992, 0.4416, 0.3671],
        [0.0382, 0.9942, 0.7011, 0.5526]])
Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


In [17]:
# Create a tensor of values and add a number to it
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [18]:
# Multiply it by 10
tensor * 10

tensor([10, 20, 30])

In [19]:
# Tensors don't change unless reassigned
tensor

tensor([1, 2, 3])

In [20]:
# Subtract and reassign
tensor = tensor - 10
tensor

tensor([-9, -8, -7])

In [21]:
# Add and reassign
tensor = tensor + 11
tensor

tensor([2, 3, 4])

In [22]:
# Can also use torch functions
torch.multiply(tensor, 10)

tensor([20, 30, 40])

In [23]:
# Original tensor is still unchanged
tensor

tensor([2, 3, 4])

In [24]:
# Element-wise multiplication (each element multiplies its equivalent, index 0->0, 1->1, 2->2)
print(tensor, "*", tensor)
print("Equals:", tensor * tensor)

tensor([2, 3, 4]) * tensor([2, 3, 4])
Equals: tensor([ 4,  9, 16])


In [25]:
import torch
tensor = torch.tensor([1, 2, 3])
tensor.shape

torch.Size([3])

In [26]:
# Element-wise matrix multiplication
tensor * tensor

tensor([1, 4, 9])

In [27]:
# Matrix multiplication
torch.matmul(tensor, tensor)

tensor(14)

In [28]:
# Can also use the "@" symbol for matrix multiplication, though not recommended
tensor @ tensor

tensor(14)

The in-built torch.matmul() method is faster.

In [29]:
%%time
# Matrix multiplication by hand
# (avoid doing operations with for loops at all cost, they are computationally expensive)
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]
value

CPU times: user 401 µs, sys: 0 ns, total: 401 µs
Wall time: 464 µs


tensor(14)

In [30]:
%%time
torch.matmul(tensor, tensor)

CPU times: user 79 µs, sys: 0 ns, total: 79 µs
Wall time: 84.6 µs


tensor(14)

In [31]:
# Shapes need to be in the right way
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]], dtype=torch.float32)

In [32]:
# View tensor_A and tensor_B
print(tensor_A)
print(tensor_B)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7., 10.],
        [ 8., 11.],
        [ 9., 12.]])


In [33]:
# View tensor_A and tensor_B.T
print(tensor_A)
print(tensor_B.T)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])


In [34]:
# The operation works when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output)
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimensions match

Output:

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Output shape: torch.Size([3, 3])


In [35]:
# torch.mm is a shortcut for matmul
torch.mm(tensor_A, tensor_B.T)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

A matrix multiplication like this is also referred to as the ***dot product*** of two matrices.

The torch.nn.Linear() module (we'll see this in action later on), also known as a ***feed-forward layer*** or ***fully connected*** layer, implements a matrix multiplication between an input x and a weights matrix A.

In [36]:
# Since the linear layer starts with a random weights matrix, let's make it reproducible (more on this later)
torch.manual_seed(42)
# This uses matrix multiplication
linear = torch.nn.Linear(in_features=2, # in_features = matches inner dimension of input
                         out_features=6) # out_features = describes outer value
x = tensor_A
output = linear(x)
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

Input shape: torch.Size([3, 2])

Output:
tensor([[2.2368, 1.2292, 0.4714, 0.3864, 0.1309, 0.9838],
        [4.4919, 2.1970, 0.4469, 0.5285, 0.3401, 2.4777],
        [6.7469, 3.1648, 0.4224, 0.6705, 0.5493, 3.9716]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 6])


In [37]:
# Create a tensor
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [38]:
print(f"Minimum: {x.min()}")
print(f"Maximum: {x.max()}")
# print(f"Mean: {x.mean()}") # this will error
print(f"Mean: {x.type(torch.float32).mean()}") # won't work without float datatype
print(f"Sum: {x.sum()}")

Minimum: 0
Maximum: 90
Mean: 45.0
Sum: 450


In [39]:
torch.max(x), torch.min(x), torch.mean(x.type(torch.float32)), torch.sum(x)

(tensor(90), tensor(0), tensor(45.), tensor(450))

In [40]:
# Create a tensor
tensor = torch.arange(10, 100, 10)
print(f"Tensor: {tensor}")

# Returns index of max and min values
print(f"Index where max value occurs: {tensor.argmax()}")
print(f"Index where min value occurs: {tensor.argmin()}")

Tensor: tensor([10, 20, 30, 40, 50, 60, 70, 80, 90])
Index where max value occurs: 8
Index where min value occurs: 0


In [41]:
# Create a tensor and check its datatype
tensor = torch.arange(10., 100., 10.)
tensor.dtype

torch.float32

In [42]:
# Create a float16 tensor
tensor_float16 = tensor.type(torch.float16)
tensor_float16

tensor([10., 20., 30., 40., 50., 60., 70., 80., 90.], dtype=torch.float16)

In [43]:
# Create a int8 tensor
tensor_int8 = tensor.type(torch.int8)
tensor_int8

tensor([10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=torch.int8)

Mobile-based neural networks often operate with 8-bit integers, smaller and faster to run but less accurate than their float32 counterparts.

In [44]:
# Create a tensor
import torch
x = torch.arange(1., 8.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))

In [45]:
# Add an extra dimension
x_reshaped = x.reshape(1, 7)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [46]:
# Change view (keeps same data as original but changes view)
# See more: https://stackoverflow.com/a/54507446/7900723
z = x.view(7,1)
z, z.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.]]),
 torch.Size([7, 1]))

In [47]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim=0) # try changing dim to dim=1 and see what happens
x_stacked

tensor([[1., 2., 3., 4., 5., 6., 7.],
        [1., 2., 3., 4., 5., 6., 7.],
        [1., 2., 3., 4., 5., 6., 7.],
        [1., 2., 3., 4., 5., 6., 7.]])

To do so you can use torch.squeeze() (I remember this as squeezing the tensor to only have dimensions over 1).

In [48]:
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimension from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[1., 2., 3., 4., 5., 6., 7.]])
Previous shape: torch.Size([1, 7])

New tensor: tensor([1., 2., 3., 4., 5., 6., 7.])
New shape: torch.Size([7])


And to do the reverse of torch.squeeze() you can use torch.unsqueeze() to add a dimension value of 1 at a specific index.

In [49]:
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

## Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous tensor: tensor([1., 2., 3., 4., 5., 6., 7.])
Previous shape: torch.Size([7])

New tensor: tensor([[1., 2., 3., 4., 5., 6., 7.]])
New shape: torch.Size([1, 7])


In [50]:
# Create tensor with specific shape
x_original = torch.rand(size=(224, 224, 3))

# Permute the original tensor to rearrange the axis order
x_permuted = x_original.permute(2, 0, 1) # shifts axis 0->1, 1->2, 2->0

print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}")

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


In [51]:
# Create a tensor
import torch
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [52]:
# Let's index bracket by bracket
print(f"First square bracket:\n{x[0]}")
print(f"Second square bracket: {x[0][0]}")
print(f"Third square bracket: {x[0][0][0]}")

First square bracket:
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
Second square bracket: tensor([1, 2, 3])
Third square bracket: 1


In [53]:
# Get all values of 0th dimension and the 0 index of 1st dimension
x[:, 0]

tensor([[1, 2, 3]])

In [54]:
# Get all values of 0th & 1st dimensions but only index 1 of 2nd dimension
x[:, :, 1]

tensor([[2, 5, 8]])

In [55]:
# Get all values of 0th dimension and the 0 index of 1st dimension
x[:, 1]

tensor([[4, 5, 6]])

In [56]:
# Get all values of the 0 dimension but only the 1 index value of the 1st and 2nd dimension
x[:, 1, 1]

tensor([5])

In [57]:
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension
x[0, 0, :] # same as x[0][0]

tensor([1, 2, 3])

torch.from_numpy(ndarray) - NumPy array -> PyTorch tensor.

torch.Tensor.numpy() - PyTorch tensor -> NumPy array.

In [58]:
# NumPy array to tensor
import torch
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

Note: By default, **NumPy** arrays are created with the **datatype float64** and if you convert it to a PyTorch tensor, it'll keep the same datatype (as above).

However, many PyTorch calculations default to using float32.

So if you want to convert your NumPy array (float64) -> PyTorch tensor (float64) -> PyTorch tensor (float32), you can use tensor = torch.from_numpy(array).type(torch.float32).

In [59]:
# Change the array, keep the tensor
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [60]:
# Tensor to NumPy array
tensor = torch.ones(7) # create a tensor of ones with dtype=float32
numpy_tensor = tensor.numpy() # will be dtype=float32 unless changed
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

###reproducibility

In [61]:
import torch

# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B

Tensor A:
tensor([[0.8016, 0.3649, 0.6286, 0.9663],
        [0.7687, 0.4566, 0.5745, 0.9200],
        [0.3230, 0.8613, 0.0919, 0.3102]])

Tensor B:
tensor([[0.9536, 0.6002, 0.0351, 0.6826],
        [0.3743, 0.5220, 0.1336, 0.9666],
        [0.9754, 0.8474, 0.8988, 0.1105]])

Does Tensor A equal Tensor B? (anywhere)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [62]:
import torch
import random

# # Set the random seed
RANDOM_SEED=42 # try changing this to different values and see what happens to the numbers below
torch.manual_seed(seed=RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)

# Have to reset the seed every time a new rand() is called
# Without this, tensor_D would be different to tensor_C
#torch.random.manual_seed(seed=RANDOM_SEED) # try commenting this line out and seeing what happens
torch.manual_seed(seed=RANDOM_SEED) # try commenting this line out and seeing what happens
random_tensor_D = torch.rand(3, 4)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D

Tensor C:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Tensor D:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Does Tensor C equal Tensor D? (anywhere)


tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

In [63]:
# Check for GPU
import torch
torch.cuda.is_available()

False

In [64]:
# Set device type
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [65]:
# Count number of devices
torch.cuda.device_count()

0

In [66]:
# Create tensor (default on CPU)
tensor = torch.tensor([1, 2, 3])

# Tensor not on GPU
print(tensor, tensor.device)

# Move tensor to GPU (if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3]) cpu


tensor([1, 2, 3])

In [67]:
# Instead, copy the tensor back to cpu
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])

In [68]:
# Create a rank 2 tensor (2 dimensions)
rank_2_tensor = torch.tensor([[10, 7],
                             [3, 4]])
rank_2_tensor

tensor([[10,  7],
        [ 3,  4]])

In [79]:
rank_2_tensor.squeeze().shape

torch.Size([2, 2])

In [80]:
rank_2_tensor.unsqueeze(0).shape

torch.Size([1, 2, 2])

In [81]:
rank_2_tensor.unsqueeze(1).shape

torch.Size([2, 1, 2])

In [82]:
rank_2_tensor.unsqueeze(-1).shape

torch.Size([2, 2, 1])

In [83]:
rank_2_tensor.unsqueeze(0)

tensor([[[10,  7],
         [ 3,  4]]])

In [84]:
rank_2_tensor.unsqueeze(1)

tensor([[[10,  7]],

        [[ 3,  4]]])

In [85]:
rank_2_tensor.unsqueeze(-1)

tensor([[[10],
         [ 7]],

        [[ 3],
         [ 4]]])

In [86]:
rank_2_tensor[...,None]

tensor([[[10],
         [ 7]],

        [[ 3],
         [ 4]]])

In [87]:
rank_2_tensor[None,...]

tensor([[[10,  7],
         [ 3,  4]]])

In [94]:
rank_2_tensor[None,...,None]

tensor([[[[10],
          [ 7]],

         [[ 3],
          [ 4]]]])

In [95]:
rank4_tensor = rank_2_tensor[...,None]

In [96]:
rank4_tensor

tensor([[[10],
         [ 7]],

        [[ 3],
         [ 4]]])

In [97]:
rank4_tensor.squeeze(-1)

tensor([[10,  7],
        [ 3,  4]])

In [103]:
# Create a rank 5 (5 dimensions) tensor of 50 numbers between 0 and 100
G = torch.from_numpy(np.random.rand(1, 1, 1, 1, 50))
G

tensor([[[[[0.9532, 0.4938, 0.4196, 0.6888, 0.9346, 0.8827, 0.1005, 0.4796,
            0.1302, 0.7484, 0.5665, 0.2807, 0.3558, 0.4665, 0.3201, 0.1846,
            0.8992, 0.3986, 0.2709, 0.7777, 0.2694, 0.7170, 0.2649, 0.1778,
            0.5192, 0.6438, 0.2229, 0.7611, 0.8443, 0.1237, 0.9386, 0.3720,
            0.9914, 0.0330, 0.7377, 0.5519, 0.6145, 0.9520, 0.9030, 0.7121,
            0.7383, 0.7952, 0.4398, 0.2633, 0.4028, 0.5529, 0.2685, 0.4504,
            0.7634, 0.1538]]]]], dtype=torch.float64)

In [104]:
# Squeeze tensor G (remove all 1 dimensions)
G_squeezed = torch.squeeze(G)
G_squeezed.shape, G_squeezed.ndim

(torch.Size([50]), 1)

In [105]:
G_squeezed

tensor([0.9532, 0.4938, 0.4196, 0.6888, 0.9346, 0.8827, 0.1005, 0.4796, 0.1302,
        0.7484, 0.5665, 0.2807, 0.3558, 0.4665, 0.3201, 0.1846, 0.8992, 0.3986,
        0.2709, 0.7777, 0.2694, 0.7170, 0.2649, 0.1778, 0.5192, 0.6438, 0.2229,
        0.7611, 0.8443, 0.1237, 0.9386, 0.3720, 0.9914, 0.0330, 0.7377, 0.5519,
        0.6145, 0.9520, 0.9030, 0.7121, 0.7383, 0.7952, 0.4398, 0.2633, 0.4028,
        0.5529, 0.2685, 0.4504, 0.7634, 0.1538], dtype=torch.float64)

In [107]:
rank_3_tensor = rank_2_tensor.unsqueeze(1)
rank_3_tensor

tensor([[[10,  7]],

        [[ 3,  4]]])

In [108]:
rank_3_tensor.shape

torch.Size([2, 1, 2])

In [109]:
torch.squeeze(rank_3_tensor)


tensor([[10,  7],
        [ 3,  4]])

In [110]:
torch.squeeze(rank_3_tensor).shape

torch.Size([2, 2])

In [120]:
rank_2_tensor.unsqueeze(2).shape


torch.Size([2, 2, 1])