<a href="https://colab.research.google.com/github/nlscng/turbo-doodle/blob/main/00_pytorch_fundamentals.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
torch.__version__

'2.5.1+cu124'

### Introduction to tensors
Creating tensors

Pytorch tensors are created using torch.Tensor()

In [3]:
scalar = torch.tensor(7)

In [4]:
scalar

tensor(7)

In [5]:
scalar.type()

'torch.LongTensor'

In [6]:
scalar.ndim
# scala has zero or no dimension

0

In [7]:
scalar.item()
# only works with one-element tensors

7

In [8]:
type(scalar.item())
# returns a python type int

int

In [9]:
vector = torch.tensor([7, 7])
vector

tensor([7, 7])

In [10]:
vector.ndim

1

In [11]:
vector.shape
# prints with torch.size?

torch.Size([2])

In [12]:
# matrix
matrix = torch.tensor([[7, 8], [9, 10]])

In [13]:
matrix

tensor([[ 7,  8],
        [ 9, 10]])

In [14]:
matrix.ndim

2

In [15]:
matrix.shape

torch.Size([2, 2])

Creating tensors

In [16]:
# tensors
tensor = torch.tensor([[[1, 2, 3], [3, 6, 9], [2, 4, 5]]])
tensor

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [17]:
tensor.ndim

3

In [18]:
tensor.shape
# the first dim is 1, not 3

torch.Size([1, 3, 3])

In [19]:
tensor[0]

tensor([[1, 2, 3],
        [3, 6, 9],
        [2, 4, 5]])

In [20]:
tensor.device

device(type='cpu')

In [21]:
tensor.dtype

torch.int64

### Random tensors
Why random tensors?
Random tensors are important because many NN start with random numbers for initial states

# Create a random tensor of size/shape (3, 4)

In [22]:
random_tensor = torch.rand(3, 4)
random_tensor

tensor([[0.0783, 0.9216, 0.8768, 0.6236],
        [0.9030, 0.3598, 0.1502, 0.2643],
        [0.2371, 0.1421, 0.4858, 0.1110]])

In [23]:
random_tensor.ndim

2

In [24]:
random_tensor = torch.rand(1, 10, 10)
random_tensor

tensor([[[0.4973, 0.4250, 0.6099, 0.1173, 0.5884, 0.0323, 0.5997, 0.2297,
          0.9846, 0.9913],
         [0.7582, 0.3287, 0.5095, 0.0583, 0.5757, 0.0786, 0.2395, 0.0903,
          0.1092, 0.8669],
         [0.2534, 0.0785, 0.7023, 0.9248, 0.2530, 0.8223, 0.6823, 0.6013,
          0.8263, 0.1417],
         [0.2729, 0.5369, 0.0818, 0.8810, 0.9965, 0.4351, 0.6362, 0.3257,
          0.5614, 0.7384],
         [0.8004, 0.9018, 0.9215, 0.3730, 0.0857, 0.7237, 0.9684, 0.6815,
          0.2394, 0.4826],
         [0.2618, 0.8892, 0.3428, 0.7753, 0.9586, 0.5305, 0.5681, 0.0872,
          0.5110, 0.1044],
         [0.4564, 0.8855, 0.9306, 0.1626, 0.2042, 0.6039, 0.6713, 0.7771,
          0.9566, 0.7119],
         [0.3683, 0.5757, 0.6068, 0.5640, 0.6054, 0.6296, 0.6646, 0.7595,
          0.1818, 0.6541],
         [0.0952, 0.6547, 0.2693, 0.4577, 0.8660, 0.3624, 0.2125, 0.6734,
          0.9568, 0.7283],
         [0.7054, 0.4019, 0.9525, 0.2223, 0.6621, 0.5961, 0.9010, 0.3632,
          0.1210,

In [25]:
random_tensor.dtype

torch.float32

# Creating tensors of zeros and ones

In [26]:
zeros = torch.zeros(size=(3, 4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [27]:
ones = torch.ones(size=(3, 4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

# Creating tensors with arange and like
With three args, start (inclusive), end (exclusive), and step

The _like method create tensors with same shapes

In [28]:
torch.arange(1, 10)

tensor([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [29]:
torch.arange(start=0, end=1000, step=77)

tensor([  0,  77, 154, 231, 308, 385, 462, 539, 616, 693, 770, 847, 924])

In [30]:
torch.arange(1, 11, 2)

tensor([1, 3, 5, 7, 9])

In [31]:
ten_zeros = torch.zeros_like(input=torch.arange(0, 10))
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor datatypes
** Notes: ** Tensor datatype is one of the 3 big errors you'll run into with PyTorch and Deep Learning:
1. Tensor not the right datatype
2. Tensor not the right shape
3. Tensor not on the right device

In [32]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # data types, float32 or float16, etc
                               device='cuda', # what device is the tensor on, cpu or cuda
                               requires_grad=False # should track gradients
                               )
float_32_tensor

tensor([3., 6., 9.], device='cuda:0')

In [33]:
# dtype of a tensor is default to float32, even if we specified it to None
float_32_tensor.dtype

torch.float32

In [34]:
float_16_tensor = float_32_tensor.type(torch.half)
# half is same as float16, and float_16 apparently takes other attributes like device from float_32
float_16_tensor, float_16_tensor.device, float_16_tensor.dtype

(tensor([3., 6., 9.], device='cuda:0', dtype=torch.float16),
 device(type='cuda', index=0),
 torch.float16)

In [35]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.], device='cuda:0')

### Getting information from tensors
* `shape` what shape is the tensor (some operations require specific shape rule)
* `dtype` what data type are stored in the tensor
* `device` what device the tensor is on (usually GPU or CPU)


In [36]:
# create a tensor
some_tensor = torch.rand(3, 4)

# find some details
print(some_tensor)
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Device tensor is on: {some_tensor.device}")

tensor([[0.8429, 0.4941, 0.6333, 0.3909],
        [0.4863, 0.8481, 0.3958, 0.0246],
        [0.4815, 0.0827, 0.0598, 0.4674]])
Datatype of tensor: torch.float32
Shape of tensor: torch.Size([3, 4])
Device tensor is on: cpu


### Manipulating tensors (tensor operations)
* Addition
* Substraction
* Multiplication
* Division
* Matrix multiplication

In [37]:
# create tensor and add numbers to elements
tensor = torch.tensor([1,2,3])
tensor

tensor([1, 2, 3])

In [38]:
tensor + 10

tensor([11, 12, 13])

In [39]:
# multiply elements
tensor * 10

tensor([10, 20, 30])

In [40]:
# tensor doesn't change unless reassigned
tensor

tensor([1, 2, 3])

### Matrix multiplication (is all you need)
The most common operation in neural net and deep learning is `matrix multiplication`, aka matmul in PyTorch.

The two rules for mat mul is:
1) The **inner dimensions** must match
2) The resulting matrix has the **outer dimensions**

In [41]:
# element wise multi
print(tensor, "*", tensor)
print(f"Equals: {tensor * tensor}")


tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


In [42]:
# matrix multiplication
torch.matmul(tensor, tensor)

tensor(14)

In [47]:
# the at @ symbol is an operator for matmul
tensor @ tensor

tensor(14)

In [48]:
# so is torch.mm
# BUT torch.mm only works with matrix, not vector or tensor

# torch.mm(tensor, tensor)


RuntimeError: self must be a matrix

In [43]:
%%time

# mat mul with loops
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]
print(value)

tensor(14)
CPU times: user 1.58 ms, sys: 956 µs, total: 2.53 ms
Wall time: 2.52 ms


In [44]:
%%time

# mat mul with torch method, which is faster by 3 order of magnitude
torch.matmul(tensor, tensor)

CPU times: user 477 µs, sys: 0 ns, total: 477 µs
Wall time: 402 µs


tensor(14)

### The most common in deep learning: shape errors   
To fix tensor shape issue, we can manipulate the shape of tensor using transpose


In [49]:
# create two tensor
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]])
tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]])

tensor_A.shape, tensor_B.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

In [50]:
# use T, or transpose
tensor_B.T, tensor_B.T.shape

(tensor([[ 7,  8,  9],
         [10, 11, 12]]),
 torch.Size([2, 3]))

In [51]:
# with b transposed, now the rule for mat mul is met
torch.matmul(tensor_A, tensor_B.T)


tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

### Finding the min, max, mean, sum and other tensor aggregation.

In [53]:
# create a tensor
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [54]:
# find min
torch.min(x), x.min()

(tensor(0), tensor(0))

In [55]:
# find max
torch.max(x), x.max()

(tensor(90), tensor(90))

In [59]:
# find avg

# watch out for dtype error for the dtype error
#torch.mean(x), x.mean()


In [58]:
# Need to specify data type from int/long to some float so we can do mean
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(45.), tensor(45.))

In [60]:
# find sum
torch.sum(x), x.sum()

(tensor(450), tensor(450))

### Finding position min and max, aka argmin and argmax

In [61]:
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [62]:
# argmin
torch.argmin(x), x.argmin()

(tensor(0), tensor(0))

In [63]:
# argmax
torch.argmax(x), x.argmax()

(tensor(9), tensor(9))

In [64]:
x[x.argmax()]

tensor(90)