### This is a notebook to follow along the #21 lesson from "Learn PyTorch. Become a Deep Learning Engineer. Get Hired." by Udemy

In [2]:
import torch
import numpy as np

In [3]:
a = torch.tensor(
    [
        [[1, 2],
         [3, 4],
         [5, 6]],
        [[7, 8],
         [9, 10],
         [11, 12]]
     ]
)
a

tensor([[[ 1,  2],
         [ 3,  4],
         [ 5,  6]],

        [[ 7,  8],
         [ 9, 10],
         [11, 12]]])

In [4]:
shape, dim = a.shape, a.ndim
print(shape, dim)

torch.Size([2, 3, 2]) 3


#### To access data in a tensor, we can use Python slicing as per all of the other containers we met so far

In [5]:
a[0][1][1]

tensor(4)

In [6]:
#as we can see we are not accessing directly the scalar. we are getting back only a tensor object
print(type(a[0][1][1]))
#to access the scalar we have to pass through the function item()
print('Type check: ', type(a[0][1][1].item()),' | Scalar value through item(): ', a[0][1][1].item())

<class 'torch.Tensor'>
Type check:  <class 'int'>  | Scalar value through item():  4


#### Another interesting fact about tensors iin PyTorch is regarding the device you can mount them on:

In [7]:
a.dtype

torch.int64

In [8]:
#We move the a tensor to our macbook GPU ('mps' is used to point to metal supported GPUs where available)
#Moreover, we can set the dtype of the tensor, by means of the argument dtype

#for both changing the device on which the tensor is on, and the dtype we use the function .to()
a = a.to(dtype=torch.float16, device='mps')
a.dtype

torch.float16

In [9]:
#Another way to deal with dtype is to use the .type()
a = a.type(torch.uint8)
a.dtype

torch.uint8

### There is a parameter we can set on every tensor we create which holds the information about the gradient of a function / operation, with regards to that tensor we point out. <font color='red'>WARNING: </font> a gradient can only be calculated by means of the .backward() function, applied on a scalar value

In [10]:
x = torch.tensor([[1., -1.], [1., 1.]], requires_grad=True, dtype=torch.float16)
y = torch.rand(size=(2,1), dtype=torch.float16)
out = (x.pow(2) * y).sum()
out.backward()
x.grad

tensor([[ 0.0400, -0.0400],
        [ 0.7051,  0.7051]], dtype=torch.float16)

In [11]:
B = torch.randn(size=(10, 3), dtype=torch.float16, device='mps') #it generates number from a STANDARD NORMAL distribution
C = torch.rand(size=(3,1), #it generates number from a UNIFORM distribution ranging [0, 1) (every number as the same probability)
               dtype=torch.float16, 
               device='mps')
D = B @ C
D

tensor([[-0.0950],
        [-1.2539],
        [-0.3042],
        [-0.3145],
        [-2.1016],
        [-0.9082],
        [-0.4675],
        [ 2.2480],
        [-0.1793],
        [-0.2212]], device='mps:0', dtype=torch.float16)

#### Just to talk about the most common methods to create tensors, we must list:
* zeros
* ones
* zeros_like
* ones_like

In [12]:
ZEROS = torch.zeros(size=(3,4))
ONES = torch.ones(size=(3,4))
ZEROS, ONES 

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]))

In [13]:
ZEROS_LIKE = torch.zeros_like(input=D) #we set the size of this new tensor to be like the one of tensor named D
ZEROS_LIKE

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]], device='mps:0', dtype=torch.float16)

### <font color='yellow'>NOTE: </font>the device of the new tensor created with zeros_like is the same of the input tensor (in this case D is on the mps)

### Some operations on tensors now...keeping in mind the general rules for the broadcasting:

General semantics

Two tensors are “broadcastable” if the following rules hold:

Each tensor has at least one dimension.
When iterating over the dimension sizes, starting at the trailing dimension, the dimension sizes must either be equal, one of them is 1, or one of them does not exist.

In [14]:
first_tensor = torch.randint(
    low=0,
    high=5,
    size=(5, 3),
)

second_tensor = torch.randint(
    low=0,
    high=5,
    size=(5, 1)
)

first_tensor, first_tensor.shape, second_tensor

(tensor([[0, 0, 0],
         [3, 3, 2],
         [3, 4, 3],
         [3, 4, 2],
         [0, 4, 1]]),
 torch.Size([5, 3]),
 tensor([[1],
         [4],
         [0],
         [2],
         [2]]))

In [15]:
print(f'The shape of first_tensor is {first_tensor.shape} | The shape of second_tensor is {second_tensor.shape}')
print(f'The dtype of first_tensor is {first_tensor.dtype} | The dtype of second_tensor is {second_tensor.dtype}')

The shape of first_tensor is torch.Size([5, 3]) | The shape of second_tensor is torch.Size([5, 1])
The dtype of first_tensor is torch.int64 | The dtype of second_tensor is torch.int64


In [16]:
mul_tensor = torch.mul(input=first_tensor, other=second_tensor)
mul_tensor.shape, mul_tensor

(torch.Size([5, 3]),
 tensor([[ 0,  0,  0],
         [12, 12,  8],
         [ 0,  0,  0],
         [ 6,  8,  4],
         [ 0,  8,  2]]))

#### Another important thing about the tensors is regarding the usage of the parameter axis in the aggregation operations, such as min, max, sum etc...
#### axis must be set to an integer which stands for the dimension <font color='yellow'>WE WANT TO SQUEEZE</font>.
#### e.g.: if we squeeze a dimension in a 3-dimensioned tensor and we are computing the minimum, then for all the values in that dimension the system will get the minimum value and will return a named tuple as follows: 
#### * the first term of the tuple will hold the content of the minimum for the given squeezed dimension of the tensor
#### * the second term will hold the indices where those numbers where found in that same squeezed dimension 

In [17]:
min_tensor = torch.arange(start=0,
                          end=80,
                          step=2,
                          ).reshape(2, 2, -1)
min_tensor.shape

torch.Size([2, 2, 10])

In [18]:
min_tensor.min(dim=0)

torch.return_types.min(
values=tensor([[ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18],
        [20, 22, 24, 26, 28, 30, 32, 34, 36, 38]]),
indices=tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]))

In [19]:
#Just as a 
min_tensor

tensor([[[ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18],
         [20, 22, 24, 26, 28, 30, 32, 34, 36, 38]],

        [[40, 42, 44, 46, 48, 50, 52, 54, 56, 58],
         [60, 62, 64, 66, 68, 70, 72, 74, 76, 78]]])

#### Just to reinforce the example: here under we have the same applied to a .sum()

In [20]:
min_tensor.sum(dim=1)

tensor([[ 20,  24,  28,  32,  36,  40,  44,  48,  52,  56],
        [100, 104, 108, 112, 116, 120, 124, 128, 132, 136]])

#### To close the topic regarding the min and max values, we can just pick the index where the min or max value lies, using argmin and argmax. 
#### The dim parameter behaves and affects the function return as per the other agrgegation functions.

In [21]:
rand_tensor = torch.randint(
    low=0,
    high=100,
    size=(3, 3, 2)
)
rand_tensor

tensor([[[14, 77],
         [48, 76],
         [74, 73]],

        [[53, 28],
         [80,  0],
         [48,  6]],

        [[88, 92],
         [96, 37],
         [74, 61]]])

In [22]:
rand_tensor.argmax(dim=0)

tensor([[2, 2],
        [2, 0],
        [0, 0]])

#### Let's talk about stacking tensors together using <font color='orange'>torch.stack()</font>
#### <font color='yellow'>FROM THE DOCS:</font>
#### Concatenates a sequence of tensors along a new dimension.

#### All tensors need to be of the same size.
### The keyword is 'new dimension'. The effect of stack are very different from the stack we could expect to get back (which actually comes out from another foo, called torch.cat())


In [25]:
#let's create an example of tensor
x = torch.arange(
    start=0,
    end=10,
).reshape(2,5)
y = torch.arange(
    start=10,
    end=20,
).reshape(2,5)

torch.stack(tensors=[x, y], dim=1)

tensor([[[ 0,  1,  2,  3,  4],
         [10, 11, 12, 13, 14]],

        [[ 5,  6,  7,  8,  9],
         [15, 16, 17, 18, 19]]])

#### Let's talk about stacking tensors together using <font color='orange'>torch.cat()</font>
#### .cat() concatenates a list of tensors in tensors=[] along a given dimenion dim


In [26]:
torch.cat(tensors=[x, y], dim=1)

tensor([[ 0,  1,  2,  3,  4, 10, 11, 12, 13, 14],
        [ 5,  6,  7,  8,  9, 15, 16, 17, 18, 19]])

#### Let's see the effect of squeeze(), unsqueeze() and permute()

In [28]:
del(x)
x = torch.rand(size=(3,2,1,3))
#squeeze() Returns a tensor with all specified dimensions of input of size 1 removed.
y = torch.squeeze(input=x)
x, y

(tensor([[[[0.7613, 0.3361, 0.1974]],
 
          [[0.9290, 0.9620, 0.4837]]],
 
 
         [[[0.9209, 0.4363, 0.2314]],
 
          [[0.9512, 0.8960, 0.6021]]],
 
 
         [[[0.2444, 0.6819, 0.3668]],
 
          [[0.2913, 0.6518, 0.7237]]]]),
 tensor([[[0.7613, 0.3361, 0.1974],
          [0.9290, 0.9620, 0.4837]],
 
         [[0.9209, 0.4363, 0.2314],
          [0.9512, 0.8960, 0.6021]],
 
         [[0.2444, 0.6819, 0.3668],
          [0.2913, 0.6518, 0.7237]]]))

In [30]:
#on the other hand unsqueeze() Returns a new tensor with a dimension of size one inserted at the specified position.
del(x)
x = torch.rand(size=(3,3))
y = torch.unsqueeze(input=x,
                    dim=1)
x, y

(tensor([[0.6129, 0.3554, 0.0899],
         [0.8250, 0.5126, 0.5973],
         [0.2072, 0.7395, 0.5498]]),
 tensor([[[0.6129, 0.3554, 0.0899]],
 
         [[0.8250, 0.5126, 0.5973]],
 
         [[0.2072, 0.7395, 0.5498]]]))

In [40]:
#permute() Returns a view of the original tensor input with its dimensions permuted.
#a view always shares memory with the initial input
x = torch.arange(
    start=0,
    end=100,
    step=2
).reshape(2, 5, -1)
y = torch.permute(input=x, dims=(2, 0, 1))
x.shape, y.shape

(torch.Size([2, 5, 5]), torch.Size([5, 2, 5]))

In [49]:
torch.mps.driver_allocated_memory()

25424.0