In [1]:
import numpy as np

import torch
print(torch.__version__)

2.5.1


# Introduction to tensors

## Creating tensors

PyTorch tensors are created using `torch.tensor()`.

In [2]:
# scalar
scalar = torch.tensor(3.0)
print(scalar)
print("scalar dimension: ", scalar.dim())
print("scalar item: ", scalar.item())

tensor(3.)
scalar dimension:  0
scalar item:  3.0


In [3]:
# vector
vector = torch.tensor([7.0, 7.0])
print(vector)
print("vector dimension: ", vector.dim())
print("vector shape: ", vector.shape)

tensor([7., 7.])
vector dimension:  1
vector shape:  torch.Size([2])


In [4]:
# matrix
MATRIX = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
print(MATRIX)
print("matrix dimension: ", MATRIX.dim())
print("matrix shape: ", MATRIX.shape)
print("matrix first dimension: ", MATRIX[0])
print("matrix second dimension: ", MATRIX[1])

tensor([[1., 2.],
        [3., 4.]])
matrix dimension:  2
matrix shape:  torch.Size([2, 2])
matrix first dimension:  tensor([1., 2.])
matrix second dimension:  tensor([3., 4.])


In [5]:
# tensor
TENSOR = torch.tensor([[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]])
print(TENSOR)
print("tensor dimension: ", TENSOR.dim())
print("tensor shape: ", TENSOR.shape)

tensor([[[1., 2.],
         [3., 4.],
         [5., 6.]]])
tensor dimension:  3
tensor shape:  torch.Size([1, 3, 2])


## Creating random tensors

Random tensors are important because the way many neural networks learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent the data.

`Start with random numbers -> look at data -> update random numbers -> look at data -> update random numbers -> ...`

In [6]:
# create a 2d random tensor of size (3, 4)
tensor = torch.rand(3, 4)
print(tensor)
print("random_tensor dimension: ", tensor.dim())

tensor([[0.6314, 0.8238, 0.9925, 0.7917],
        [0.7415, 0.0382, 0.8795, 0.7245],
        [0.1279, 0.4904, 0.6395, 0.6149]])
random_tensor dimension:  2


In [7]:
# create a 3d random tensor of size (1, 3, 4)
tensor_2 = torch.rand(1, 3, 4)
print(tensor_2)
print("random_tensor_2 dimension: ", tensor_2.dim())

tensor([[[0.8098, 0.8241, 0.2076, 0.2241],
         [0.4719, 0.8465, 0.1072, 0.1380],
         [0.7431, 0.3145, 0.6634, 0.9429]]])
random_tensor_2 dimension:  3


In [15]:
# create a random tensor with shape similar to an image tensor
# height, width, color channels
image_tensor = torch.rand(size=(224, 224, 3))
print("First three width configurations:\n", image_tensor[0][0:3])
print("image_tensor dimension: ", image_tensor.dim())

First three width configurations:
 tensor([[0.8143, 0.9440, 0.3479],
        [0.7630, 0.5406, 0.0653],
        [0.1154, 0.5749, 0.0954]])
image_tensor dimension:  3


## Creating tensors with zeroes and ones

In [16]:
tensor = torch.zeros(size=(3, 4))
print(tensor)
print("zero_tensor dimension: ", tensor.dim())
print("zero_tensor dtype: ", tensor.dtype)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
zero_tensor dimension:  2
zero_tensor dtype:  torch.float32


In [17]:
tensor = torch.ones(size=(3, 4))
print(tensor)
print("ones_tensor dimension: ", tensor.dim())
print("ones_tensor dtype: ", tensor.dtype)

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
ones_tensor dimension:  2
ones_tensor dtype:  torch.float32


## Creating a range of tensors and tensors-like

In [18]:
tensor = torch.arange(start=0.0, end=11.0, step=2.0)
print(tensor)
print("range_tensor shape: ", tensor.shape)

tensor([ 0.,  2.,  4.,  6.,  8., 10.])
range_tensor shape:  torch.Size([6])


In [19]:
# tensors-like: a particular shape to be replicated somewhere else, but not explicitly defined just yet
tensor_like = torch.zeros_like(input=tensor)
print(tensor_like)
print("tensor_like shape: ", tensor_like.shape)

tensor([0., 0., 0., 0., 0., 0.])
tensor_like shape:  torch.Size([6])


## Tensor data types

**Note:** Tensor datatypes is one of the 3 big errors you'll run into with PyTorch & deep learning:
1. Tensor not in right datatype
2. Tensor not in right shape
3. Tensor not on right device (CPU vs GPU)

In [22]:
# default int dtype is int64
tensor = torch.tensor([1, 2, 3])
print(tensor.dtype)

torch.int64


In [23]:
# default float dtype is float32
tensor = torch.tensor([1.0, 2.0, 3.0])
print(tensor.dtype)

torch.float32


In [24]:
# dtype conversion
tensor = tensor.type(torch.float16)
print(tensor)

tensor([1., 2., 3.], dtype=torch.float16)


In [25]:
# getting tensor attributes
print(tensor.dtype)
print(tensor.shape)
print(tensor.device)

torch.float16
torch.Size([3])
cpu


## Tensor operations

In [26]:
tensor = tensor.type(torch.float32)
tensor

tensor([1., 2., 3.])

In [27]:
tensor + 5

tensor([6., 7., 8.])

In [28]:
torch.add(tensor, 5)

tensor([6., 7., 8.])

In [29]:
tensor - 5

tensor([-4., -3., -2.])

In [30]:
torch.sub(tensor, 5)

tensor([-4., -3., -2.])

In [31]:
tensor * 5

tensor([ 5., 10., 15.])

In [32]:
torch.mul(tensor, 5)

tensor([ 5., 10., 15.])

### Matrix multiplication (dot product)

One of the most commom errors in deep learning is shape errors. There are two main rules that performing matrix multiplication needs to satisfy:

1. the **inner dimensions** must match:
* `(3, 2) @ (3, 2)` will not work
* `(3, 2) @ (2, 3)` will work

2. the resulting matrix has the shape of the **outer dimensions**:
* `(3, 2) @ (2, 3)` will result in a `(3, 3)` matrix

In [33]:
tensor_A = torch.rand(size=(3, 4))
tensor_A

tensor([[0.3401, 0.5060, 0.8018, 0.9090],
        [0.3957, 0.4663, 0.0658, 0.2722],
        [0.1487, 0.0516, 0.9073, 0.1180]])

In [34]:
tensor_B = torch.rand(size=(4, 3))
tensor_B

tensor([[0.0011, 0.4925, 0.8457],
        [0.2750, 0.8785, 0.5952],
        [0.1284, 0.8864, 0.3438],
        [0.3667, 0.7742, 0.6087]])

In [35]:
vector = torch.rand(size=(4,))
vector

tensor([0.4019, 0.2482, 0.7191, 0.1898])

In [36]:
# element-wise
tensor_A * vector

tensor([[0.1367, 0.1256, 0.5766, 0.1726],
        [0.1591, 0.1157, 0.0473, 0.0517],
        [0.0598, 0.0128, 0.6524, 0.0224]])

In [50]:
# matrix multiplication
torch.matmul(tensor_A, tensor_B)

tensor([[0.5758, 2.0264, 1.4177],
        [0.2369, 0.8736, 0.8005],
        [0.1741, 1.0141, 0.5402]])

In [51]:
torch.mm(tensor_A, tensor_B)

tensor([[0.5758, 2.0264, 1.4177],
        [0.2369, 0.8736, 0.8005],
        [0.1741, 1.0141, 0.5402]])

In [52]:
tensor_A @ tensor_B

tensor([[0.5758, 2.0264, 1.4177],
        [0.2369, 0.8736, 0.8005],
        [0.1741, 1.0141, 0.5402]])

In [53]:
try:
  torch.rand(3, 2) @ torch.rand(3, 2)
except Exception as e:
  print(f"Inner dimensions must match. {e}")

Inner dimensions must match. mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)


### Matrix Transpose (T)

To fix our tensor shape issues, we can manipulate the shape of one of our tensors using a **transpose `.T`**.

A **transpose** switches the axes or dimensions of a given tensor.

In [54]:
torch.rand(3, 2)

tensor([[0.5421, 0.7182],
        [0.9501, 0.8209],
        [0.3189, 0.1523]])

In [55]:
torch.rand(3, 2).T

tensor([[0.9129, 0.8614, 0.8044],
        [0.1332, 0.2079, 0.6473]])

In [56]:
torch.rand(3, 2) @ torch.rand(3, 2).T

tensor([[0.8021, 0.4943, 0.9796],
        [0.2215, 0.1453, 0.2525],
        [0.4501, 0.4243, 0.2483]])

## Tensor Aggregation

In [57]:
tensor = torch.arange(start=1, end=100, step=10)
tensor

tensor([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])

In [58]:
tensor.min(), torch.min(tensor)

(tensor(1), tensor(1))

In [59]:
tensor.max(), torch.max(tensor)

(tensor(91), tensor(91))

In [60]:
try: 
  tensor.mean(), torch.mean(tensor)
except Exception as e:
  print(f"Mean not implemented for integer tensors. {e}")

Mean not implemented for integer tensors. mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long


In [61]:
tensor.dtype

torch.int64

In [62]:
tensor.type(torch.float64).mean(), torch.mean(tensor.type(torch.float64))

(tensor(46., dtype=torch.float64), tensor(46., dtype=torch.float64))

In [63]:
tensor.sum(), torch.sum(tensor)

(tensor(460), tensor(460))

## Positional Maximum and Minimum values (argmax and argmin)

**Spoiler:** Argmax is specially helpful when the softmax activation function is used.

In [64]:
tensor

tensor([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])

In [65]:
tensor.argmin(), torch.argmin(tensor)

(tensor(0), tensor(0))

In [66]:
tensor.argmax(), torch.argmax(tensor)

(tensor(9), tensor(9))

## Reshaping, squeezing and stacking tensors

* Reshaping: one of the most commom errors in machine learning and deep learning is shape mismatching between matrices, because they have to satisfy certain rules. This method returns a **new tensor** with the same data as the `self` tensor but of a different shape.

* View: returns a view of an input tensor of certain shape, but keep the same memory as the original tensor.

* Stacking: combines multiple tensor on top of each other (vstack) or side by side (hstack).

* Squeezing: removes all `1` dimensions from a tensor.

* Unsqueeze: adds a `1` dimension to a target tensor.

* Permute: returns a **view** of the input with dimensions permuted (swapped) in a certain order.

In [67]:
tensor = torch.arange(start=1.0, end=10.0)
tensor, tensor.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [68]:
try: 
  tensor.reshape(1, 7)
except Exception as e:
  print(f"Dimensions have to be compatible with the original dimensions.\n{e}")

Dimensions have to be compatible with the original dimensions.
shape '[1, 7]' is invalid for input of size 9


In [69]:
tensor.reshape(3, 3)

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [70]:
tensor.view(3, 3)

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [71]:
torch.stack([tensor, tensor], dim=0)

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.],
        [1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [72]:
torch.stack([tensor, tensor], dim=1)

tensor([[1., 1.],
        [2., 2.],
        [3., 3.],
        [4., 4.],
        [5., 5.],
        [6., 6.],
        [7., 7.],
        [8., 8.],
        [9., 9.]])

In [73]:
tensor = tensor.reshape(1, 9)
tensor, tensor.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [74]:
tensor.squeeze(), tensor.squeeze().shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [75]:
tensor = tensor.reshape(9, 1)
tensor, tensor.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [76]:
tensor = tensor.squeeze()
tensor, tensor.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [77]:
tensor.unsqueeze(dim=0), tensor.unsqueeze(dim=0).shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [78]:
tensor.unsqueeze(dim=1), tensor.unsqueeze(dim=1).shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [79]:
tensor = tensor.reshape(3, 1, 3)
tensor, tensor.shape

(tensor([[[1., 2., 3.]],
 
         [[4., 5., 6.]],
 
         [[7., 8., 9.]]]),
 torch.Size([3, 1, 3]))

In [80]:
tensor.permute(1, 0, 2), tensor.permute(1, 0, 2).shape

(tensor([[[1., 2., 3.],
          [4., 5., 6.],
          [7., 8., 9.]]]),
 torch.Size([1, 3, 3]))

In [81]:
tensor.permute(1, 0, 2).squeeze(), tensor.permute(1, 0, 2).squeeze().shape

(tensor([[1., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]]),
 torch.Size([3, 3]))

In [82]:
tensor = tensor.squeeze()
tensor

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [83]:
tensor_permuted = tensor.permute(1, 0)
tensor_permuted

tensor([[1., 4., 7.],
        [2., 5., 8.],
        [3., 6., 9.]])

In [84]:
tensor_permuted[0, 0] = 100.0
tensor_permuted, tensor

(tensor([[100.,   4.,   7.],
         [  2.,   5.,   8.],
         [  3.,   6.,   9.]]),
 tensor([[100.,   2.,   3.],
         [  4.,   5.,   6.],
         [  7.,   8.,   9.]]))

## Indexing (selecting data from tensors)

In [85]:
tensor = torch.arange(start=1.0, end=10.0).reshape(1, 3, 3)
tensor, tensor.shape

(tensor([[[1., 2., 3.],
          [4., 5., 6.],
          [7., 8., 9.]]]),
 torch.Size([1, 3, 3]))

In [86]:
tensor[0]

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [87]:
tensor[0, 0]

tensor([1., 2., 3.])

In [88]:
tensor[0, 0, 0]

tensor(1.)

In [89]:
tensor[:, 0]

tensor([[1., 2., 3.]])

In [90]:
tensor[:, :, 1]

tensor([[2., 5., 8.]])

## PyTorch tensors and NumPy

NumPy is a popular library for working with numerical data in Python. It's powerful because it has a lot of optimised functions and data structures such as arrays and vectors.

And because of this, PyToch tensors can interact with NumPy arrays.

* Converting a NumPy array to a PyTorch tensor: `torch.from_numpy()`
* Converting a PyTorch tensor to a NumPy array: `torch.Tensor.numpy()`

**Warning:** when converting from NumPy to PyTorch, PyTorch reflects NumPy's default datatype of float64, leading to a PyTorch tensor of dtype float64. This is not ideal since PyTorch usually defaults to float32. To change a PyTorch tensor's datatype, you can use the `torch.Tensor.type()` method.

In [91]:
array = np.arange(start=1.0, stop=10.0)
tensor = torch.from_numpy(array).type(torch.float32)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7., 8., 9.]),
 tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [92]:
tensor = torch.ones(7)
array = tensor.numpy().astype(np.float64)
tensor, array

(tensor([1., 1., 1., 1., 1., 1., 1.]), array([1., 1., 1., 1., 1., 1., 1.]))

## Reproducibility (taking the random out of random)

In [93]:
# set random seed
torch.manual_seed(42)

# generate random numbers
rand1 = torch.rand(size=(3, 4))
rand2 = torch.rand(size=(3, 4))

# check if the two random tensors are equal
torch.equal(rand1, rand2)

False

In [94]:
# set random seed and generate random numbers
torch.manual_seed(42)
rand1 = torch.rand(size=(3, 4))

# set random seed and generate random numbers again
torch.manual_seed(42)
rand2 = torch.rand(size=(3, 4))

# check if the two random tensors are equal
torch.equal(rand1, rand2)

True

## Running PyTorch on GPU

In [95]:
torch.cuda.is_available()

False

In [96]:
# setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [97]:
tensor = torch.tensor([1, 2, 3], device=device)
tensor, tensor.device

(tensor([1, 2, 3]), device(type='cpu'))

## References:

1. [PyTorch at Tesla - Andrej Karpathy, Tesla](https://www.youtube.com/watch?v=oBklltKXtDE)
2. [Google's Best Practices for ML Engineering](https://developers.google.com/machine-learning/guides/rules-of-ml)
3. [Introduction to PyTorch - Official documentation](https://pytorch.org/tutorials/beginner/basics/intro.html)
4. [Quickstart - Official documentation](https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html)
5. [Tensors - Official documentation](https://pytorch.org/tutorials/beginner/basics/tensorqs_tutorial.html)
6. [torch.Tensor - API documentation](https://pytorch.org/docs/stable/tensors.html)
7. [torch.cuda - API documentation](https://pytorch.org/docs/stable/cuda.html)
8. [Unofficial Style Guide](https://github.com/IgorSusmelj/pytorch-styleguide#recommended-code-structure-for-training-your-model)
9. [Ground truth notebook](https://www.learnpytorch.io/00_pytorch_fundamentals/)