## 00 Pytorch Fundamentals

In [None]:
import torch
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
torch.__version__

'1.12.1+cu113'

## Introduction to Tensors

# creating Tensor

In [None]:
# scalar 
scalar = torch.tensor(7)
scalar

tensor(7)

In [None]:
# get tensor item back as python int

scalar.item()

7

In [None]:
# vector
vector = torch.tensor([4,7])
vector

tensor([4, 7])

In [None]:
vector.ndim

1

In [None]:
vector.shape

torch.Size([2])

In [None]:
# MATRIX
MATRIX = torch.tensor([[4,7],
                       [1,3]])


MATRIX

tensor([[4, 7],
        [1, 3]])

In [None]:
MATRIX.ndim

2

In [None]:
# TENSOR

TENSOR = torch.tensor([[[1,2,3],
                        [4,5,6]],
                       [[7,8,9],
                        [10,11,12]]])

TENSOR

tensor([[[ 1,  2,  3],
         [ 4,  5,  6]],

        [[ 7,  8,  9],
         [10, 11, 12]]])

In [None]:
TENSOR.ndim

3

In [None]:
TENSOR.shape

torch.Size([2, 2, 3])

In [None]:
TENSOR = torch.tensor([[[1,2,3],
                        [4,5,6]],
                       [[7,8,9],
                        [10,11,12]],
                       [[13,14,15],
                        [16,17,18]],
                       [[19,20,21],
                        [22,23,24]]])


In [None]:
TENSOR.ndim

3

In [None]:
TENSOR.shape

torch.Size([4, 2, 3])

### Random Tensors

Why Random Tensors??

Random Tensors are important the way many neural networks learn is that they start with full of random numbers and then adjust those random numbers to represent the data

`start with random numbers -> look at the data -> update the random numbers -> look at the data -> update the random numbers`

In [None]:
# create a random tensor of size (3,4)

random_tensor = torch.rand(3,4)

random_tensor

tensor([[0.9012, 0.0211, 0.7665, 0.7957],
        [0.0360, 0.8018, 0.1503, 0.0319],
        [0.0188, 0.7549, 0.0820, 0.0671]])

In [None]:
random_tensor.ndim

2

In [None]:
# create random tensor with similar shape to an image tensor
random_image_tensor = torch.rand((224,224,3))
random_image_tensor.shape,random_image_tensor.ndim

(torch.Size([224, 224, 3]), 3)

### Zeros and Ones

In [None]:
zeros = torch.zeros(size=(3,4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [None]:
# create a tensor of all ones
ones = torch.ones(size=(3,4))
ones.dtype

torch.float32

In [None]:
random_tensor.dtype

torch.float32

### Creating a range of tensors and tensors-like

In [None]:
# use torch.arange()

one_to_ten = torch.arange(start=1,end=11,step=1)
one_to_ten

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [None]:
# creating tensors_like

ten_zeros = torch.zeros_like(one_to_ten)

ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor Datatypes


In [None]:
# Float 32 tensor

float_32_tensor = torch.tensor([3.0,6.0,9.0],
                               dtype=torch.float32, # What datatype is tensor (ex: float32, float16)
                               device=None, # what device your tensor is on (ex: 'cpu' or 'cuda')
                               requires_grad=False # whether or not to track gradients
                               ) 
float_32_tensor

tensor([3., 6., 9.])

In [None]:
float_32_tensor.dtype

torch.float32

In [None]:
float_16_tensor = float_32_tensor.type(torch.HalfTensor)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [None]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.])

In [None]:
cpu_tensor = torch.tensor([1,2,3],device='cpu')
gpu_tensor = torch.tensor([4,5,6],device='cuda')

cpu_tensor + gpu_tensor

RuntimeError: ignored

In [None]:
torch.tensor([65536],dtype=torch.int8)

tensor([0], dtype=torch.int8)

In [None]:
torch.tensor([1,2,3],dtype=torch.complex64)

tensor([1.+0.j, 2.+0.j, 3.+0.j])

In [None]:
int_32_tensor = torch.tensor([1,2,3],dtype=torch.int32)
int_32_tensor

tensor([1, 2, 3], dtype=torch.int32)

In [None]:
float_32_tensor * int_32_tensor

tensor([ 3., 12., 27.])

In [None]:
long_tensor = torch.tensor([1,2,3],dtype=torch.long)


In [None]:
float_32_tensor * long_tensor

tensor([ 3., 12., 27.])

### Getting information from tensors

In [None]:
some_tensor = torch.rand(3,4)
some_tensor

tensor([[0.0421, 0.9043, 0.6849, 0.3242],
        [0.7817, 0.5990, 0.6497, 0.0146],
        [0.8695, 0.5006, 0.3401, 0.7432]])

In [None]:
print("Some tensor")
print(f"Tensor dtype : {some_tensor.dtype}")
print(f"Tensor shape: {some_tensor.shape}")
print(f"Tensor is on : {some_tensor.device}")

Some tensor
Tensor dtype : torch.float32
Tensor shape: torch.Size([3, 4])
Tensor is on : cpu


### Manipulating Tensors

Tensor operations include:

* Addition
* Subtraction
* Multiplication (element wise)
* Division
* Matrix Multiplicaton



In [None]:
# Create a tensor and add 10 to it
tensor = torch.tensor([1,2,3])
tensor + 10

tensor([11, 12, 13])

In [None]:
# Multiply a tensor by 10
tensor = torch.tensor([1,2,3])
tensor * 10

tensor([10, 20, 30])

In [None]:
# Subtract a tensor by -10
tensor - 10

tensor([-9, -8, -7])

In [None]:
# try out pytorch in-built function
torch.mul(tensor,10)

tensor([10, 20, 30])

In [None]:
torch.add(tensor,10)

tensor([11, 12, 13])

### Matrix Multiplication

Two main ways of performing multiplication in neural networks and deep learning

1. Element wise 
2. Matrix Multiplication (dot product)

There are two main rules that performing matrix multiplication needs to satisfy:

1. The inner dimension must match:

  (3,2) @ (2,3) -> will work
  (3,2) @ (3,2) -> wont work
  (2,3) @ (3,2) -> will work

2. Resulting shape has to be the outer dimensions of the tensors

  (3,2) @ (2,3) -> (3,3)
  (2,3) @ (3,2) -> (2,2)

In [None]:
# Elementwise multiplication
print(tensor,"*",tensor)
print(f"Tensor equals : {tensor*tensor}")

tensor([1, 2, 3]) * tensor([1, 2, 3])
Tensor equals : tensor([1, 4, 9])


In [None]:
# Matrix Multiplication
torch.matmul(tensor,tensor)

tensor(14)

In [None]:
%%time
value = 0
for idx in range(len(tensor)):
  value += tensor[idx] * tensor[idx]

print(value)

tensor(14)
CPU times: user 2.06 ms, sys: 0 ns, total: 2.06 ms
Wall time: 1.9 ms


In [None]:
%%time
torch.matmul(tensor,tensor)

CPU times: user 136 µs, sys: 15 µs, total: 151 µs
Wall time: 157 µs


tensor(14)

In [None]:
tensor @ tensor

tensor(14)

In [None]:
torch.matmul(torch.rand(2,3),torch.rand(3,2))

tensor([[0.3909, 0.4158],
        [0.9502, 1.2514]])

### one of the most common errors in deep learning : shape errors

In [None]:
# shapes for matrix multiplication
tensor_A = torch.tensor([[1,2],
                         [3,4],
                         [5,6]])

tensor_B = torch.tensor([[7,8],
                         [9,10],
                         [11,12]])

torch.mm(tensor_A,tensor_B) # torch.mm is same as torch.matmul (it's an alias of torch.matmul)


RuntimeError: ignored

### To fix our tensor shape issues, we can manipulate the shape of one of our tensors using a transpose



In [None]:
print(f"Original shapes: Tensor_A : {tensor_A.shape}, Tensor_B : {tensor_B.shape}")
print(f"New shapes: Tensor_A : {tensor_A.shape} same as above, Tensor_B : {tensor_B.T.shape}")
print(f"Multiplying Matrices {tensor_A.shape} @ {tensor_B.shape} -> must match inner dimension")
print(f"Output shapes: {torch.mm(tensor_A,tensor_B.T).shape}")

torch.mm(tensor_A,tensor_B.T)

Original shapes: Tensor_A : torch.Size([3, 2]), Tensor_B : torch.Size([3, 2])
New shapes: Tensor_A : torch.Size([3, 2]) same as above, Tensor_B : torch.Size([2, 3])
Multiplying Matrices torch.Size([3, 2]) @ torch.Size([3, 2]) -> must match inner dimension
Output shapes: torch.Size([3, 3])


tensor([[ 23,  29,  35],
        [ 53,  67,  81],
        [ 83, 105, 127]])

### Finding the min,max,mean,sum etc(tensor aggregation)

In [None]:
# create a tensor 
x = torch.arange(0,100,10)

In [None]:
x.min(),torch.min(x)

(tensor(0), tensor(0))

In [None]:
x.max(),torch.max(x)

(tensor(90), tensor(90))

In [None]:
torch.mean(x.type(torch.float32))

tensor(45.)

In [None]:
torch.argmax(x)

tensor(9)

In [None]:
torch.argmin(x)

tensor(0)

### Finding the positional min and max

In [None]:
# Finding a tensor index that has minimum value
x.argmin()

tensor(0)

In [None]:
# Finding a tensor index that has maximum value
x.argmax()

tensor(9)

In [None]:
x[9]

tensor(90)

### Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes an inpute tensor to a defined shape
* View - returns tensor with new shape specified in view parameter but with same memory location as the original tensor.
* Stacking - combine multiple tensors on top of each other(vstack) or side by side(hstack)
* Squeeze - removes all `1` dimensions from a tensor
* Unsqueeze - adds `1` dimensions from a tensor
* permute - returns tensors  with dimensions permuted(swapped) in a certain way

In [None]:
# Let's create a tensor
x = torch.arange(0,100,10)
x,x.shape

(tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]), torch.Size([10]))

In [None]:
x_reshaped = x.reshape(1,10)
x_reshaped.shape

torch.Size([1, 10])

In [None]:
x_reshaped = x.reshape(2,5)
x_reshaped.shape

torch.Size([2, 5])

In [None]:
# change the view
z = x.view(2,5)
z.shape

torch.Size([2, 5])

In [None]:
# change in z changes x because z and x shares the same memory location
z[0,0] = -90
z,x

(tensor([[-90,  10,  20,  30,  40],
         [ 50,  60,  70,  80,  90]]),
 tensor([-90,  10,  20,  30,  40,  50,  60,  70,  80,  90]))

In [None]:
x_reshape = x.reshape(1,10)
x_reshape.shape

torch.Size([1, 10])

In [None]:
torch.stack([x,x],dim=1).shape

torch.Size([10, 2])

In [None]:
torch.stack([x,x],dim=0).shape

torch.Size([2, 10])

In [None]:
torch.vstack([x,x])


tensor([[-90,  10,  20,  30,  40,  50,  60,  70,  80,  90],
        [-90,  10,  20,  30,  40,  50,  60,  70,  80,  90]])

In [None]:
# torch.squeeze() - removes all single dimensions from a tensor

print(f" Previous tensor : {x_reshape}")
print(f" Previous shape : {x_reshape.shape}")


x_squeezed = torch.squeeze(x_reshape,dim=0)


print(f"New tensor : {x_squeezed}")
print(f"New tensor shape : {x_squeezed.shape}")

 Previous tensor : tensor([[-90,  10,  20,  30,  40,  50,  60,  70,  80,  90]])
 Previous shape : torch.Size([1, 10])
New tensor : tensor([-90,  10,  20,  30,  40,  50,  60,  70,  80,  90])
New tensor shape : torch.Size([10])


### Indexing (Selecting data from tensors)

Indexing with pytorch is similar to indexing with NumPy.

In [None]:
x = torch.arange(1,10).reshape(1,3,3)
x,x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]), torch.Size([1, 3, 3]))

In [None]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [None]:
x[0,0,0]

tensor(1)

In [None]:
# Get all values of 0th and 1st dimensions but only index 1 of 2nd Dimension
x[:,:,1]

tensor([[2, 5, 8]])

In [None]:
# Get all values of 0th dimension but only 1 index value of 1st and 2nd Dimension
x[:,1,1]

tensor([5])

In [None]:
# Index on x to return 9
x[:,-1,-1]

# Index on x to return 3, 6, 9
x[:,:,2]

tensor([[3, 6, 9]])

## Pytorch Tensors & Numpy

Numpy is a popular scientific python numerical library

And because of this pytorch has functionality to interact with it.

* Data in Numpy want in pytorch tensor -> `torch.from_numpy(numpy_array)
* Data in pytorch tensor to numpy array -> `torch.tensor.numpy()

In [None]:
# Pytorch Tensor & Numpy
import torch
import numpy as np
array = np.arange(1.0,8.0)
tensor= torch.from_numpy(array) # warning : when converting numpy array to pytorch tensor, default datatype will be float64
array,tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [None]:
# change the value of array, what it will do to tensor?
array = array + 1
array,tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [None]:
ones_tensor = torch.ones(8)
numpy_tensor = ones_tensor.numpy()
numpy_tensor.dtype

dtype('float32')

In [None]:
# change the tensor, what happens to numpy?
ones_tensor + 1
numpy_tensor

array([1., 1., 1., 1., 1., 1., 1., 1.], dtype=float32)

## Reproducibility (trying to take out random from random)

In short how a neural network learns:

`start with random numbers -> tensor operations -> update random numbers and try to make them learn from data -> again -> again -> again `

To reduce randomness in neural networks and pytorch, the concept of **random_Seed** comes in the picture.

Essentially what random seed does is "flavours" of randomness.

In [None]:
# create two random tensors
random_tensor_A = torch.rand(3,4)
random_tensor_B = torch.rand(3,4)

print(random_tensor_A)
print(random_tensor_B)

print(random_tensor_A==random_tensor_B)

tensor([[0.1320, 0.7798, 0.0512, 0.1438],
        [0.9160, 0.7933, 0.1323, 0.2520],
        [0.9036, 0.7861, 0.7694, 0.1861]])
tensor([[0.4889, 0.5927, 0.6518, 0.8617],
        [0.8500, 0.0261, 0.9950, 0.1538],
        [0.3692, 0.5844, 0.5417, 0.4779]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [None]:
# Let's make some random but reproducible tensors
import torch

RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)

random_tensor_C = torch.rand(3,4)

torch.manual_seed(RANDOM_SEED)


random_tensor_D = torch.rand(3,4)

print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_C == random_tensor_D)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


Extra resources for reproduciblity

* https://pytorch.org/docs/stable/notes/randomness.html
* https://en.wikipedia.org/wiki/Random_seed

## Running pytorch tensors and objects on GPUs for faster processing

GPUs = faster computation on Numbers, thanks to CUDA + NVIDIA hardware + Pytorch working behind the scenes.

In [None]:
!nvidia-smi

Sun Aug 21 10:51:27 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   43C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### 2. Check for gpu access with pytorch

In [None]:
# Check GPU access
import torch
torch.cuda.is_available()

True

In [None]:
# Setup Device agnostic code
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [None]:
# Count number of GPUs
torch.cuda.device_count()

1

## 3. Putting tensors(and models) on GPUs

The reason, for putting tensors(and models) on GPUs for faster computation and faster experiments

In [None]:
tensor = torch.tensor([1,2,3],device='cpu') # Tensor not on GPU

print(tensor,tensor.device)

tensor([1, 2, 3]) cpu


In [None]:
# Move tensor to GPU if available
tensor_on_gpu = tensor.to(device)
tensor_on_gpu.device

device(type='cuda', index=0)

## 4. Moving tensors back to CPU

In [None]:
# If tensor is on GPU, we cant convert tensor back to numpy
numpy_tensor = tensor_on_gpu.numpy()

TypeError: ignored

In [None]:
# To fix the GPU tensor with numpy issue, we will bring tensor back on cpu and then convert back to numpy
numpy_tensor = tensor_on_gpu.cpu().numpy()
numpy_tensor

array([1, 2, 3])