In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import torch
torch.__version__

'2.1.2'

# Introduction To Tensors
## Creating Tensors

In [3]:
# Scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [4]:
scalar.ndim # number of dimensions

0

In [5]:
scalar.shape

torch.Size([])

In [6]:
scalar.item() # returns the tensor as regular python int

7

In [7]:
# Vector
vector = torch.tensor([7, 7])
vector

tensor([7, 7])

In [8]:
vector.ndim

1

In [9]:
vector.shape

torch.Size([2])

In [10]:
vector1 = torch.tensor([7, 7, 7])
print(f"ndim = {vector1.ndim}, shape = {vector1.shape}")

ndim = 1, shape = torch.Size([3])


> It's a convention to name tensor with 2 dimensions or higher with uppercase

In [11]:
# MATRIX
MATRIX = torch.tensor([[7,8],
                       [9,10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [12]:
MATRIX.ndim

2

In [13]:
MATRIX.shape

torch.Size([2, 2])

In [14]:
# TENSOR
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 5, 4]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 5, 4]]])

In [15]:
TENSOR.ndim

3

In [16]:
TENSOR.shape

torch.Size([1, 3, 3])

In [17]:
TENSOR1 = torch.tensor([[[1, 2],
                         [2, 3]],
                        [[5, 6],
                         [6, 7]]])
TENSOR1

tensor([[[1, 2],
         [2, 3]],

        [[5, 6],
         [6, 7]]])

In [18]:
TENSOR1.ndim

3

In [19]:
TENSOR1.shape

torch.Size([2, 2, 2])

## Random Tensors

Why random tensors?

Random tensors are important because the way many neural networks learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent the data.

`Start with random numbers -> look at data -> update random numbers -> look at data -> update random numbers...`

In [20]:
# Create a random tensor of size (3, 4)
R = torch.rand(3, 4)
R

tensor([[0.4435, 0.2510, 0.9914, 0.5987],
        [0.4387, 0.5472, 0.4037, 0.0930],
        [0.0096, 0.9064, 0.4961, 0.5623]])

In [21]:
# Create a random tensor with similar shape to an image tensor
IMG_TENSOR = torch.rand(size=(224, 224, 3)) # height, width, RGB
IMG_TENSOR.shape, IMG_TENSOR.ndim

(torch.Size([224, 224, 3]), 3)

## Zeros and Ones

In [22]:
# Create a tensor of all zeros
ZEROES = torch.zeros(size=(3, 4))
ZEROES

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [23]:
# Create a tensor of all ones
ONES = torch.ones(size=(3, 4))
ONES

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

## Creating A Range Of Tensors and Tensors-like

In [24]:
torch.range(0, 10) # include the `end`. Will be removed in the future!

  torch.range(0, 10) # include the `end`. Will be removed in the future!


tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [25]:
torch.arange(0, 10) # acts like Python's range function

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [26]:
torch.arange(0, 10, 2) # start, end, step

tensor([0, 2, 4, 6, 8])

In [27]:
# Creating tensor-like
torch.zeros_like(torch.arange(0, 10))

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Tensor Datatypes

> For list of datatypes, check torch.Tensor documentation

In [28]:
# Float32
float32_tensor = torch.tensor([3.0, 6.0, 9.0], dtype=None) # float32 by default
float32_tensor1 = torch.tensor([3.0, 6.0, 9.0], dtype=torch.float32)
float32_tensor.dtype, float32_tensor1.dtype

(torch.float32, torch.float32)

In [29]:
float16_tensor = torch.tensor([3.0, 6.0, 9.0], dtype=torch.float16)
float16_tensor.dtype

torch.float16

In [30]:
## Another way to get float16_tensor above
float16_tensor1 = float32_tensor.type(torch.float16)
float16_tensor1.dtype

torch.float16

## Important Tensor Creation Parameters

In [31]:
float32_tensor = torch.tensor([3.0, 6.0, 9.0], 
                              dtype=None, # datatype of the tensor
                              device=None, # can be cpu, cuda, etc
                              requires_grad=False) # if you want pytorch to track the gradient 

In [32]:
print(float32_tensor.dtype)
print(float32_tensor.device)
print(float32_tensor.requires_grad)

torch.float32
cpu
False


## Tensor Operations

In [33]:
## Create a tensor
tensor = torch.tensor([1, 2, 3])

In [34]:
# Addition
tensor + 10

tensor([11, 12, 13])

In [35]:
# Subtraction
tensor - 10

tensor([-9, -8, -7])

In [36]:
# Multiplication
tensor * 10

tensor([10, 20, 30])

In [37]:
# Division
tensor / 10

tensor([0.1000, 0.2000, 0.3000])

In [38]:
# Pytorch's builtin
tensor.add(3), tensor.sub(3), tensor.mul(3), tensor.div(3)

(tensor([4, 5, 6]),
 tensor([-2, -1,  0]),
 tensor([3, 6, 9]),
 tensor([0.3333, 0.6667, 1.0000]))

In [None]:
# Matrix multiplication
print(tensor * tensor) # element wise multiplication
print(torch.matmul(tensor, tensor)) # dot product. Alt syntax: torch.mm(), @ operator

tensor([1, 4, 9])
tensor(14)


`torch.matmul` is very optimized compared to doing things by hand

In [None]:
%%time
res = 0
for i in range(len(tensor)):
    res += tensor[i] * tensor[i]
print(res)

In [41]:
%%time
torch.matmul(tensor, tensor)

CPU times: user 44 µs, sys: 11 µs, total: 55 µs
Wall time: 135 µs


tensor(14)

Always try to use Pytorch's builtin operation if possible! (except for basic operations like `+-*/`. Pytorch will call the add, sub, etc. function internally)

When it comes to matrix multiplication. There are two main rules:
1. The **inner_dimension** must match:
* `(3, 2) @ (3, 2)` won't work
* `(3, 2) @ (2, 3)` will work
* `(2, 3) @ (3, 2)` will work
2. The resulting matrix has the shape of the **outer_dimension**:
* `(2, 3) @ (3, 2)` -> `(2, 2)`
* `(3, 2) @ (2, 3)` -> `(3, 3)`

## Transpose

A **transpose** switches the axes or dimensions of a given tensor.

In [42]:
tensor = torch.tensor([[1, 2, 3],
                       [4, 5, 6]])
tensor.T # matrix transpose

tensor([[1, 4],
        [2, 5],
        [3, 6]])

## Aggregation Functions

In [43]:
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [44]:
x.sum(), torch.sum(x)

(tensor(450), tensor(450))

In [45]:
x.min(), x.max()

(tensor(0), tensor(90))

In [46]:
torch.min(x), torch.max(x) # same as above, but with the options to get the indices too

(tensor(0), tensor(90))

In [47]:
value, index = torch.max(x, dim=0) # vector only has dim = 0 (row)
print(value, index)

tensor(90) tensor(9)


In [48]:
x1 = torch.tensor([[1, 2], [3, 4]])
min_value = torch.min(x1)
print(min_value)  # Output: tensor(1)

# With dimension specified
min_values, min_indices = torch.min(x1, dim=0) # dim = 0 means the operation is applied across rows. (1 for columns)
print(min_values)  # Output: tensor([1, 2])
print(min_indices)  # Output: tensor([0, 0])

tensor(1)
tensor([1, 2])
tensor([0, 0])


In [49]:
# Mean - note: mean only works with float or complex dtype
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(45.), tensor(45.))

## Find Positional Min and Max

In [50]:
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [51]:
x.argmin() # the index where min occurs

tensor(0)

In [52]:
x.argmax() # the index where min occurs

tensor(9)

## Reshaping, Stacking, Squeezing, and Unsqueezing


In [53]:
# Let's create a tensor
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [54]:
# Reshape - Note: the multiplication of a and b (a and b being the arguments to reshape) have to equal the size of the original tensor
print(x.reshape(1, 9))
print(x.reshape(9, 1))
print(x.reshape(3, 3))

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])
tensor([[1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]])
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])


In [55]:
# View - Note: A view tensor share the same memory as its original tensor
z = x.view(1, 9)
z

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [56]:
# Changing z changes x
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [57]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim=0) # each row is x
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [58]:
x_stacked1 = torch.stack([x, x, x, x], dim=1) # each column is x
x_stacked1

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [59]:
a = torch.arange(1, 4)
b = torch.arange(6, 9)
torch.stack([a, b])

tensor([[1, 2, 3],
        [6, 7, 8]])

In [60]:
torch.vstack([a, b]) # stack vertically

tensor([[1, 2, 3],
        [6, 7, 8]])

In [61]:
torch.hstack([a, b]) # stack horizontally

tensor([1, 2, 3, 6, 7, 8])

In [62]:
# Squeeze - Remove all single dimensions from a tensor
x_reshaped = x.reshape(1, 9)
print(x_reshaped, x_reshaped.shape)
print(x_reshaped.squeeze(), x_reshaped.squeeze().shape)

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]) torch.Size([1, 9])
tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]) torch.Size([9])


In [63]:
# Another example of Squeeze
a = torch.zeros(2, 1, 2, 1, 2)
print(a.size())
b = torch.squeeze(a)
print(b.size())
b = torch.squeeze(a, 0) # squeeze dimension 0
print(b.size())
b = torch.squeeze(a, 1) # squeeze dimension 1
print(b.size())
b = torch.squeeze(a, (1, 3)) # squeeze dimension 1, 2, 3
print(b.size())

torch.Size([2, 1, 2, 1, 2])
torch.Size([2, 2, 2])
torch.Size([2, 1, 2, 1, 2])
torch.Size([2, 2, 1, 2])
torch.Size([2, 2, 2])


In [64]:
# Unsqueeze - Add a dimension at the specified position
a = torch.tensor([1, 2, 3, 4])
print(a, a.shape)
print(torch.unsqueeze(a, 0), torch.unsqueeze(a, 0).shape)
print(torch.unsqueeze(a, 1), torch.unsqueeze(a, 1).shape)

tensor([1, 2, 3, 4]) torch.Size([4])
tensor([[1, 2, 3, 4]]) torch.Size([1, 4])
tensor([[1],
        [2],
        [3],
        [4]]) torch.Size([4, 1])


In [65]:
# Permute - Rearranges the dimensions of a tensor in the specified order
x_original = torch.rand(size=(224, 224, 3)) # height, width, RGB
print(x_original.shape)
x_permuted = torch.permute(x_original, (2, 0, 1)) # RGB, height, width. Shifted axis 0 -> 1, 1 -> 2, 2 -> 0
print(x_permuted.shape)

torch.Size([224, 224, 3])
torch.Size([3, 224, 224])


## Indexing (selecting data from tensors)

In [66]:
# Create a tensor
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [67]:
# Let's index bracket by bracket
print(f"First square bracket:\n{x[0]}") 
print(f"Second square bracket: {x[0][0]}") 
print(f"Third square bracket: {x[0][0][0]}")

First square bracket:
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
Second square bracket: tensor([1, 2, 3])
Third square bracket: 1


In [68]:
# Get all values of 0th dimension and the 0 index of 1st dimension
x[:, 0]

tensor([[1, 2, 3]])

In [69]:
# Get all values of 0th & 1st dimensions but only index 1 of 2nd dimension
x[:, :, 1]

tensor([[2, 5, 8]])

In [70]:
# Get all values of the 0 dimension but only the 1 index value of the 1st and 2nd dimension
x[:, 1, 1]

tensor([5])

In [71]:
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension 
x[0, 0, :] # same as x[0][0]PyTorch tensors & NumPy

tensor([1, 2, 3])

## PyTorch tensors & NumPy

Since NumPy is a popular Python numerical computing library, PyTorch has functionality to interact with it nicely.

The two main methods you'll want to use for NumPy to PyTorch (and back again) are:

- `torch.from_numpy(ndarray)` - NumPy array -> PyTorch tensor.
- `torch.Tensor.numpy()` - PyTorch tensor -> NumPy array.


In [72]:
# Numpy array to tensor
arr = np.arange(1.0, 8.0)
tensor = torch.from_numpy(arr)
arr, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [73]:
# Tensor to Numpy array
tensor = torch.ones(7)
arr = tensor.numpy()
tensor, arr

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility (trying to take the random out of random)

In [74]:
rand_a = torch.rand(3, 4)
rand_b = torch.rand(3, 4)
print(rand_a == rand_b)

RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED)
rand_a = torch.rand(3, 4)

torch.manual_seed(RANDOM_SEED)
rand_b = torch.rand(3, 4)

print(rand_a == rand_b)

tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Running tensors on GPUs (and making faster computations)

In [75]:
!nvidia-smi

Sun Jul 28 07:26:52 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.90.07              Driver Version: 550.90.07      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off |   00000000:00:04.0 Off |                    0 |
| N/A   36C    P0             25W /  250W |       0MiB /  16384MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [76]:
# Check for GPU access with PyTorch
torch.cuda.is_available()

True

In [77]:
# Setup device agnostic code (Use GPU when available)
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

Read more about setting up [device agnostic code](https://pytorch.org/docs/stable/notes/cuda.html#device-agnostic-code).

In [78]:
# Count number of devices
torch.cuda.device_count()

1

Putting tensors on the GPU

In [79]:
# Create a tensor (default device = "cpu")
tensor = torch.tensor([1, 2, 3])
tensor.device

device(type='cpu')

In [83]:
# Move tensor to GPU if available
tensor_on_gpu = tensor.to(device)
tensor_on_gpu.device

device(type='cuda', index=0)

In [85]:
# Move tensor back to CPU
# Example usecase: when you want to convert tensor to numpy array
tensor_on_cpu = tensor_on_gpu.cpu()
tensor_on_cpu.device

device(type='cpu')

# Exercises

[Source](https://www.learnpytorch.io/00_pytorch_fundamentals/#exercises)

In [87]:
# Create a random tensor with shape (7, 7)
a = torch.rand(size=(7, 7))
a

tensor([[0.1587, 0.6542, 0.3278, 0.6532, 0.3958, 0.9147, 0.2036],
        [0.2018, 0.2018, 0.9497, 0.6666, 0.9811, 0.0874, 0.0041],
        [0.1088, 0.1637, 0.7025, 0.6790, 0.9155, 0.2418, 0.1591],
        [0.7653, 0.2979, 0.8035, 0.3813, 0.7860, 0.1115, 0.2477],
        [0.6524, 0.6057, 0.3725, 0.7980, 0.8399, 0.1374, 0.2331],
        [0.9578, 0.3313, 0.3227, 0.0162, 0.2137, 0.6249, 0.4340],
        [0.1371, 0.5117, 0.1585, 0.0758, 0.2247, 0.0624, 0.1816]])

In [91]:
# Perform a matrix multiplication on the tensor from 2 with another random tensor with shape (1, 7) 
# (hint: you may have to transpose the second tensor).
b = torch.rand(size=(1, 7))
c = torch.mm(a, b.T)
c

tensor([[0.8686],
        [1.5028],
        [1.2276],
        [1.5952],
        [1.3500],
        [1.0527],
        [0.3657]])

In [122]:
# Set the random seed to 0 and do the exercises above over again.
torch.manual_seed(0)
a = torch.rand(size=(7, 7))
b = torch.rand(size=(1, 7))
c = torch.mm(a, b.T)
c

tensor([[1.8542],
        [1.9611],
        [2.2884],
        [3.0481],
        [1.7067],
        [2.5290],
        [1.7989]])

In [142]:
# Random seed for GPU
torch.cuda.manual_seed(1234)
a = torch.rand(size=(7, 7), device=device)
b = torch.rand(size=(1, 7), device=device)
c = torch.mm(a, b.T)
c

tensor([[0.9558],
        [1.2227],
        [0.9335],
        [1.6030],
        [0.9344],
        [0.5282],
        [0.5664]], device='cuda:0')

In [143]:
# Find the maximum and minimum values of the output tensor above.
c.max(), c.min()

(tensor(1.6030, device='cuda:0'), tensor(0.5282, device='cuda:0'))

In [144]:
# Find the maximum and minimum index values of the output tensor above.
c.argmax(), c.argmin()

(tensor(3, device='cuda:0'), tensor(5, device='cuda:0'))

In [145]:
# Make a random tensor with shape (1, 1, 1, 10) 
# and then create a new tensor with all the 1 dimensions removed to be left with a tensor of shape (10). 
# Set the seed to 7 when you create it 
# and print out the first tensor and it's shape as well as the second tensor and it's shape.
torch.manual_seed(7)
a = torch.rand(1, 1, 1, 10)
b = torch.squeeze(a)
print(a, a.shape)
print(b, b.shape)

tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
           0.3653, 0.8513]]]]) torch.Size([1, 1, 1, 10])
tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
        0.8513]) torch.Size([10])
