# Introduction to pytorch

In [1]:
import torch

In [2]:
if torch.cuda.is_available:
    print(f"GPU: {torch.cuda.get_device_name()} is available")
    print(f"{torch.cuda.get_device_capability()}")

GPU: NVIDIA GeForce RTX 4070 is available
(8, 9)


## Basic information about the tensors
Tensor can be either:
- scalar 
- vector
- matrix
- tensor

In [3]:
scalar = torch.tensor(7)
vector = torch.tensor([1,7])
matrix = torch.tensor([[1,-1], [1,-1]])
tensor = torch.tensor([[[1,-1],[1,-1], [1,-1], [1,-1]]])

In [4]:
for k,f in {"scalar": scalar,"vector": vector,"matrix":matrix,"tensor": tensor}.items():
    print(f"{k}:", f)
    print(f"{f.size()=}")
    print(f"{f.shape=}")
    print(f"{f.dim()=}")


scalar: tensor(7)
f.size()=torch.Size([])
f.shape=torch.Size([])
f.dim()=0
vector: tensor([1, 7])
f.size()=torch.Size([2])
f.shape=torch.Size([2])
f.dim()=1
matrix: tensor([[ 1, -1],
        [ 1, -1]])
f.size()=torch.Size([2, 2])
f.shape=torch.Size([2, 2])
f.dim()=2
tensor: tensor([[[ 1, -1],
         [ 1, -1],
         [ 1, -1],
         [ 1, -1]]])
f.size()=torch.Size([1, 4, 2])
f.shape=torch.Size([1, 4, 2])
f.dim()=3


### Get information from the tensors

1. get dtype - use `tensor.dtype`
2. get shape of a tensor - use `tensor.shape`
3. get device - use `tensor.device`

In [5]:
some_tensor = torch.rand(size=(3, 4), dtype=torch.float32, device="cuda")
print(some_tensor)
print(some_tensor.dtype) # torch.float32 by default
print(some_tensor.shape) # torch.Size[3,4]
print(some_tensor.device) # cuda(0) by default cpu

print(some_tensor.size())  # semantically equivalent to `some_tensor.shape`


tensor([[0.9454, 0.7454, 0.6557, 0.2135],
        [0.4512, 0.7742, 0.5876, 0.6407],
        [0.4083, 0.8609, 0.3850, 0.2991]], device='cuda:0')
torch.float32
torch.Size([3, 4])
cuda:0
torch.Size([3, 4])


The above values accomodate almost all issues with testors:
* tensors not in the right type
* tesnors not in the right shape
* tensors not in the same device

### Manipulating tensors

Tensor operations include
* Addition
* Subtraction
* Multiplication
* Division
* Matrix multiplication (MatMul)

In [6]:
tensor = torch.tensor([1,2,3])
print(tensor + 10)
print(tensor * 10)
print(tensor - 10)
print(tensor / 10)


tensor([11, 12, 13])
tensor([10, 20, 30])
tensor([-9, -8, -7])
tensor([0.1000, 0.2000, 0.3000])


From the context above we can see, that the tensors are immutable within these operations.
There are also `torch` operations that do the same under the hood

In [7]:
print(torch.add(tensor, 10))
print(torch.mul(tensor, 10))
print(torch.sub(tensor, 10))
print(torch.div(tensor, 10))

tensor([11, 12, 13])
tensor([10, 20, 30])
tensor([-9, -8, -7])
tensor([0.1000, 0.2000, 0.3000])


#### Matrix multiplication

1. Element-wise multiplication
2. matrix multiplication (dot product)

In [8]:
tensor = torch.Tensor([[1,2], [3,4]])

print(f"{tensor} * {tensor} = {tensor * tensor}")
print(f"{tensor} @ {tensor} = {tensor.matmul(tensor)}")

tensor([[1., 2.],
        [3., 4.]]) * tensor([[1., 2.],
        [3., 4.]]) = tensor([[ 1.,  4.],
        [ 9., 16.]])
tensor([[1., 2.],
        [3., 4.]]) @ tensor([[1., 2.],
        [3., 4.]]) = tensor([[ 7., 10.],
        [15., 22.]])


Dot product must follow the condition:
* The inner dimentions must match!
* The resulting matrix has the shape of outer dimentions!


#### Transposition

In [9]:
print(tensor)
print(tensor.T)

tensor([[1., 2.],
        [3., 4.]])
tensor([[1., 3.],
        [2., 4.]])


#### Tesnor aggregations
* min
* max
* mean 
* sum

In [10]:
print(torch.mean(tensor))
print(torch.min(tensor))
print(torch.max(tensor))
print(torch.sum(tensor))
print(tensor.argmax())
print(tensor.argmin())

tensor(2.5000)
tensor(1.)
tensor(4.)
tensor(10.)
tensor(3)
tensor(0)


### Reshaping tensors



In [11]:
x = torch.arange(1., 10.)
print(f"Original tensor: {x}")
# x_reshaped = x.reshape(1,7) # will throw error that tensor of size 9 can not be reshaped
x_reshaped = x.reshape(3,3)
print("\nReshaping size 9 to 3x3")
print(f"Reshaped tensor: {x_reshaped}")

Original tensor: tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.])

Reshaping size 9 to 3x3
Reshaped tensor: tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])


### Tensor view

In [12]:
z = x.view(1,9)
print(f"Original tensor: {x}")
print(f"Original tensor view {z}")

# Now we reshape the tensor z
print("\nReshaping the view")
x[0] = 10
print(f"Original tensor: {x}")
print(f"Original tensor view {z}")

Original tensor: tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.])
Original tensor view tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])

Reshaping the view
Original tensor: tensor([10.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.])
Original tensor view tensor([[10.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]])


### Tensor stack

In [13]:
x_stacked = torch.stack([x,x,x,x], dim = 1)
print(f"Original dim: {x.dim()}")
print(f"Stacked 4 x tensors = {x_stacked}")
print(f"New dim: {x_stacked.dim()}")

Original dim: 1
Stacked 4 x tensors = tensor([[10., 10., 10., 10.],
        [ 2.,  2.,  2.,  2.],
        [ 3.,  3.,  3.,  3.],
        [ 4.,  4.,  4.,  4.],
        [ 5.,  5.,  5.,  5.],
        [ 6.,  6.,  6.,  6.],
        [ 7.,  7.,  7.,  7.],
        [ 8.,  8.,  8.,  8.],
        [ 9.,  9.,  9.,  9.]])
New dim: 2


In [14]:
# as we can see the dim can be either 0 -> rows or 1 -> cols
# Lets see the dim 2 tensor
x2 = torch.rand(size=(2,2))
print(f"New tensor {x2}")

x2_stacked = torch.stack([x2,x2], dim=2)
print(x2_stacked)

New tensor tensor([[0.4981, 0.5100],
        [0.4335, 0.6107]])
tensor([[[0.4981, 0.4981],
         [0.5100, 0.5100]],

        [[0.4335, 0.4335],
         [0.6107, 0.6107]]])


In [15]:
# hstack and vstack
print(torch.hstack([x,x]))
print(torch.vstack([x,x]))

tensor([10.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.,  2.,  3.,  4.,  5.,
         6.,  7.,  8.,  9.])
tensor([[10.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
        [10.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]])


In [16]:
# squeeze and unsqueeze
x3 = torch.zeros(2,1,2,1,2)
print(f"For squeeze we need a tensor with any shape element == 1, they will be dropped\nInput tensor: {x3}\nShape: {x3.shape}")
print(f"\nSqueezed tensor {torch.squeeze(x3)}\nShape: {torch.squeeze(x3).shape}")
print(f"\nUnsqueezed tensor {torch.unsqueeze(torch.squeeze(x3), dim=2)}\nShape: {torch.unsqueeze(torch.squeeze(x3), dim=2).shape}")

For squeeze we need a tensor with any shape element == 1, they will be dropped
Input tensor: tensor([[[[[0., 0.]],

          [[0., 0.]]]],



        [[[[0., 0.]],

          [[0., 0.]]]]])
Shape: torch.Size([2, 1, 2, 1, 2])

Squeezed tensor tensor([[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]])
Shape: torch.Size([2, 2, 2])

Unsqueezed tensor tensor([[[[0., 0.]],

         [[0., 0.]]],


        [[[0., 0.]],

         [[0., 0.]]]])
Shape: torch.Size([2, 2, 1, 2])


In [17]:
## Permute swaps order of the dimentions
x4 = torch.randn(2,3,5)
print(x4.size())
print(x4)
print(torch.permute(x4, (2,0,1)).shape)
print(torch.permute(x4, (2,0,1)))

torch.Size([2, 3, 5])
tensor([[[-0.1620,  0.5361,  1.1587, -0.2928, -1.9598],
         [ 2.0286, -1.2822,  0.2926,  1.5987,  0.7542],
         [ 0.7225,  1.7440,  0.6000, -0.4700,  0.4795]],

        [[-1.3284, -0.1983,  0.4187,  0.1251, -1.0933],
         [ 0.8309, -0.0958, -1.7125, -0.3283, -1.2900],
         [-0.8559,  0.2181, -0.9814,  0.5835, -0.1846]]])
torch.Size([5, 2, 3])
tensor([[[-0.1620,  2.0286,  0.7225],
         [-1.3284,  0.8309, -0.8559]],

        [[ 0.5361, -1.2822,  1.7440],
         [-0.1983, -0.0958,  0.2181]],

        [[ 1.1587,  0.2926,  0.6000],
         [ 0.4187, -1.7125, -0.9814]],

        [[-0.2928,  1.5987, -0.4700],
         [ 0.1251, -0.3283,  0.5835]],

        [[-1.9598,  0.7542,  0.4795],
         [-1.0933, -1.2900, -0.1846]]])


### Selecting data from tensors

In [18]:
x5 = torch.arange(1, 10).reshape(1,3,3)
print(f"Original tensor {x5}\nShape: {x5.shape}")

Original tensor tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])
Shape: torch.Size([1, 3, 3])


In [19]:
print(x5[0])
print(x5[0,0])
print(x5[0,0,0])
print(x5[0][-1][-1])
print(x5[0, :, -1])

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
tensor([1, 2, 3])
tensor(1)
tensor(9)
tensor([3, 6, 9])


### Pytorch and Numpy

In [20]:
import numpy as np

# From numpy to tensor
array = np.array([1.0, 8.0])
print(f"Numpy array: {array}")
tensor = torch.from_numpy(array)
print(f"Tensor from array: {tensor}")
# NOTE: Default numpy dtype is float64, this will be kept even though torch default is float32
# Changing the array in place also changes tensor (this means that the torch just links to numpy array heap space)
array[0] = 2.00
print(tensor)
print(array)

# From tensor to numpy
array2 = tensor.numpy()
print(f"Array from tensor: {array2}")

Numpy array: [1. 8.]
Tensor from array: tensor([1., 8.], dtype=torch.float64)
tensor([2., 8.], dtype=torch.float64)
[2. 8.]
Array from tensor: [2. 8.]


## Reproducibility

Trying to take the random out of random !

` Start with random numbers -> tensor operations -> update random numbers to try and make them better representations of the data -> again -> again...`


In [21]:
RANDOM_SEED = 100

# Will make 2 different tensorsb
torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3,4)
random_tensor_D = torch.rand(3,4)
print(random_tensor_C == random_tensor_D)


# With random seed reset will make the same tensor!
torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3,4)
torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3,4)
print(random_tensor_C == random_tensor_D)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Running on GPU

In [22]:
!nvidia-smi

Sat Nov  9 18:00:07 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120                Driver Version: 550.120        CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4070        Off |   00000000:07:00.0  On |                  N/A |
|  0%   27C    P2             27W /  200W |     582MiB /  12282MiB |      4%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                     

In [23]:
tensor = torch.tensor([[1,2,3]], device="cuda")
tensor

tensor([[1, 2, 3]], device='cuda:0')

In [24]:
%%time

torch.manual_seed(RANDOM_SEED)
t1 = torch.rand([1000, 1000], device="cpu")
t2 = torch.rand([1000, 1000], device="cpu") 
t3 = t1 @ t2
print(t3.dim())

2
CPU times: user 80.3 ms, sys: 5.57 ms, total: 85.9 ms
Wall time: 25.1 ms


In [25]:
%%time

torch.manual_seed(RANDOM_SEED)
t1 = torch.rand([1000, 1000], device="cuda")
t2 = torch.rand([1000, 1000], device="cuda") 
t3 = t1 @ t2
print(t3.dim())

2
CPU times: user 67.2 ms, sys: 41.7 ms, total: 109 ms
Wall time: 84.3 ms
