# Dependencies

In [1]:
import torch
import numpy as np

# array_like
   - list
      - Used for storing elements of different data types
      - Flexible: there is no length & shape limit
      - Not optimized for mathematical operations
   - numpy.ndarray
      - Implemented in C
      - Used for mathematical operations
      - Arrays are homogeneous: they can store elements of the same data type
   - troch.Tensor
      - PyTorch's core functionality is implemented in C++
      - Optimized for deep learning operations e.g. auto gradient
      - Support GPU acceleration [NVIDIA GPUs]

In [2]:
# scalar : 0-dimensional array/tensor
scalar_1 = 2
scalar_2 = np.array(2)
scalar_3 = torch.tensor(2)

# log
print(f"scalar_1: {scalar_1} | ndim: 0 | dtype: {type(scalar_1)}")
print(f"scalar_2: {scalar_2} | ndim: {scalar_2.ndim} | dtype: numpy.{scalar_2.dtype}")
print(f"scalar_3: {scalar_3} | ndim: {scalar_3.ndim} | dtype: {scalar_3.dtype}")

scalar_1: 2 | ndim: 0 | dtype: <class 'int'>
scalar_2: 2 | ndim: 0 | dtype: numpy.int32
scalar_3: 2 | ndim: 0 | dtype: torch.int64


In [3]:
# vector : 1-dimensional list/array/tensor
vector_1 = [1, 2, 3]
vector_2 = np.array(vector_1)
vector_3 = torch.tensor(vector_1)

# log
print(f"vector_1: {str(vector_1):<17} | ndim: 1 | dtype: {type(vector_1[0])}")
print(f"vector_2: {str(vector_2):<17} | ndim: {vector_2.ndim} | dtype: numpy.{vector_2.dtype}")
print(f"vector_3: {vector_3} | ndim: {vector_3.ndim} | dtype: {vector_3.dtype}")

vector_1: [1, 2, 3]         | ndim: 1 | dtype: <class 'int'>
vector_2: [1 2 3]           | ndim: 1 | dtype: numpy.int32
vector_3: tensor([1, 2, 3]) | ndim: 1 | dtype: torch.int64


In [4]:
# matrix : 2-dimensional list/array/tensor
matrix_1 = [[0, 1], [2, 3]]
matrix_2 = np.array(matrix_1)
matrix_3 = torch.tensor(matrix_1)

# log
print(f"matrix_1:\n{matrix_1}\nndim : 2\ndtype: {type(matrix_1[0][0])}")
print('-' * 50)
print(f"matrix_2:\n{matrix_2}\nmatrix_2.ndim : {matrix_2.ndim}\nmatrix_2.shape: {matrix_2.shape}\nmatrix_2.dtype: numpy.{matrix_2.dtype}")
print('-' * 50)
print(f"matrix_3:\n{matrix_3}\nmatrix_3.ndim : {matrix_3.ndim}\nmatrix_3.shape: {matrix_3.shape}\nmatrix_3.dtype: {matrix_3.dtype}")

matrix_1:
[[0, 1], [2, 3]]
ndim : 2
dtype: <class 'int'>
--------------------------------------------------
matrix_2:
[[0 1]
 [2 3]]
matrix_2.ndim : 2
matrix_2.shape: (2, 2)
matrix_2.dtype: numpy.int32
--------------------------------------------------
matrix_3:
tensor([[0, 1],
        [2, 3]])
matrix_3.ndim : 2
matrix_3.shape: torch.Size([2, 2])
matrix_3.dtype: torch.int64


In [5]:
# 3-dimensional list/array/tensor
lst = [[[0, 1], [2, 3]], [[4, 5], [6, 7]]]
arr = np.array(matrix_1)
tsr = torch.tensor(matrix_1)

# log
print(f"lst:\n{lst}\nndim : 3\ndtype: {type(lst[0][0][0])}")
print('-' * 50)
print(f"arr:\n{arr}\narr.ndim : {arr.ndim}\narr.shape: {arr.shape}\narr.dtype: numpy.{arr.dtype}")
print('-' * 50)
print(f"tsr:\n{tsr}\ntsr.ndim : {tsr.ndim}\ntsr.shape: {tsr.shape}\ntsr.dtype: {tsr.dtype}")

lst:
[[[0, 1], [2, 3]], [[4, 5], [6, 7]]]
ndim : 3
dtype: <class 'int'>
--------------------------------------------------
arr:
[[0 1]
 [2 3]]
arr.ndim : 2
arr.shape: (2, 2)
arr.dtype: numpy.int32
--------------------------------------------------
tsr:
tensor([[0, 1],
        [2, 3]])
tsr.ndim : 2
tsr.shape: torch.Size([2, 2])
tsr.dtype: torch.int64


# Tensors

In [6]:
# ones
t1 = torch.ones(size= ())
t2 = torch.ones(size= (2, 2))

# zeros
t3 = torch.zeros(size= (2,))

# empty
t4 = torch.empty(size= (2, 3))

# rand
t5 = torch.rand(size= (2, 2, 3))

# log
for i in range(5):
    print(f"t{i+1}:\n{eval(f't{i+1}')}")
    print(f"t{i+1}.size(): {eval(f't{i+1}').size()}")
    print(f"t{i+1}.ndim  : {eval(f't{i+1}').ndim}")
    print(f"t{i+1}.dtype : {eval(f't{i+1}').dtype}")
    print(f"type(t{i+1}) : {type(eval(f't{i+1}'))}")
    print('-' * 50)

t1:
1.0
t1.size(): torch.Size([])
t1.ndim  : 0
t1.dtype : torch.float32
type(t1) : <class 'torch.Tensor'>
--------------------------------------------------
t2:
tensor([[1., 1.],
        [1., 1.]])
t2.size(): torch.Size([2, 2])
t2.ndim  : 2
t2.dtype : torch.float32
type(t2) : <class 'torch.Tensor'>
--------------------------------------------------
t3:
tensor([0., 0.])
t3.size(): torch.Size([2])
t3.ndim  : 1
t3.dtype : torch.float32
type(t3) : <class 'torch.Tensor'>
--------------------------------------------------
t4:
tensor([[0., 0., 0.],
        [0., 0., 0.]])
t4.size(): torch.Size([2, 3])
t4.ndim  : 2
t4.dtype : torch.float32
type(t4) : <class 'torch.Tensor'>
--------------------------------------------------
t5:
tensor([[[0.4483, 0.6078, 0.8505],
         [0.9439, 0.5774, 0.7985]],

        [[0.6874, 0.7539, 0.3512],
         [0.1600, 0.0063, 0.1830]]])
t5.size(): torch.Size([2, 2, 3])
t5.ndim  : 3
t5.dtype : torch.float32
type(t5) : <class 'torch.Tensor'>
-----------------------

## Index

In [7]:
t1 = torch.rand(3, 4)

i1 = t1[0]
i2 = t1[1]
i3 = t1[-1]
i4 = t1[0, 0]
i5 = t1[2, -2]

# log
print(f"t1:\n{t1}")
print('-' * 50)
for i in range(5):
    print(f"i{i+1}: {eval(f'i{i+1}')}")

t1:
tensor([[0.3142, 0.7452, 0.6441, 0.1589],
        [0.7466, 0.3164, 0.6066, 0.0906],
        [0.6222, 0.5052, 0.5504, 0.0089]])
--------------------------------------------------
i1: tensor([0.3142, 0.7452, 0.6441, 0.1589])
i2: tensor([0.7466, 0.3164, 0.6066, 0.0906])
i3: tensor([0.6222, 0.5052, 0.5504, 0.0089])
i4: 0.31418269872665405
i5: 0.5503561496734619


## Slice

In [8]:
t1 = torch.rand(3, 4)

s1 = t1[0, :]  # same as t1[0]
s2 = t1[:, 1]
s3 = t1[:2, 2:]
s4 = t1[-1:, 0]

# log
print(f"t1:\n{t1}")
print('-' * 50)
for i in range(4):
    print(f"s{i+1}:\n{eval(f's{i+1}')}\n")

t1:
tensor([[0.6484, 0.3128, 0.8361, 0.0224],
        [0.1618, 0.8266, 0.9325, 0.8205],
        [0.2317, 0.5489, 0.8283, 0.7612]])
--------------------------------------------------
s1:
tensor([0.6484, 0.3128, 0.8361, 0.0224])

s2:
tensor([0.3128, 0.8266, 0.5489])

s3:
tensor([[0.8361, 0.0224],
        [0.9325, 0.8205]])

s4:
tensor([0.2317])



## Element-wise operations

In [9]:
t1 = torch.arange(4).reshape(2, 2)
t2 = torch.ones(size= (2, 2), dtype= torch.int64)

c1 = t1 + t2   # torch.add(t1, t2)      | t1.add(t2)
c2 = t1 - t2   # torch.sub(t1, t2)      | t1.sub(t2)
c3 = t1 * t2   # torch.multiply(t1, t2) | t1.multiply(t2)
c4 = t1 / t2   # torch.div(t1, t2)      | t1.div(t2)
c5 = t1 ** t2  # torch.pow(t1, t2)      | t1.pow(t2)

# log
for i in range(2):
    print(f"t{i+1}:\n{eval(f't{i+1}')}\n")
print('-' * 50)
for i in range(5):
    print(f"c{i+1}:\n{eval(f'c{i+1}')}\n")

t1:
tensor([[0, 1],
        [2, 3]])

t2:
tensor([[1, 1],
        [1, 1]])

--------------------------------------------------
c1:
tensor([[1, 2],
        [3, 4]])

c2:
tensor([[-1,  0],
        [ 1,  2]])

c3:
tensor([[0, 1],
        [2, 3]])

c4:
tensor([[0., 1.],
        [2., 3.]])

c5:
tensor([[0, 1],
        [2, 3]])



## Broadcasting

In [10]:
t1 = torch.rand(2, 2)

c1 = t1 + 1
c2 = t1 * 10
c3 = t1 ** 2

# log
print(f"t1:\n{t1}\n")
print('-' * 50)
for i in range(3):
    print(f"c{i+1}:\n{eval(f'c{i+1}')}\n")

t1:
tensor([[0.0143, 0.7186],
        [0.3250, 0.4304]])

--------------------------------------------------
c1:
tensor([[1.0143, 1.7186],
        [1.3250, 1.4304]])

c2:
tensor([[0.1426, 7.1856],
        [3.2504, 4.3042]])

c3:
tensor([[2.0324e-04, 5.1633e-01],
        [1.0565e-01, 1.8526e-01]])



## reshape & view
   - view: a new tensor with the same data as the `self` tensor but of a different `shape`.
   - reshape: This method returns a `view` if shape is compatible with the current `shape`.

In [11]:
t1 = torch.rand(4, 4)

reshape_1 = t1.reshape(2, 8)
reshape_2 = t1.reshape(2, -1, 2)

# log
print(f"t1:\n{t1}", end= '\n\n')
print('-' * 50)
for i in range(2):
    print(f"reshape_{i+1}:\n{eval(f'reshape_{i+1}')}")
    print(f"reshape_{i+1}.shape: {eval(f'reshape_{i+1}.shape')}")
    print()

t1:
tensor([[0.2358, 0.2139, 0.9614, 0.2585],
        [0.8515, 0.8237, 0.1138, 0.6574],
        [0.3467, 0.3616, 0.3107, 0.6212],
        [0.4293, 0.7235, 0.5020, 0.0217]])

--------------------------------------------------
reshape_1:
tensor([[0.2358, 0.2139, 0.9614, 0.2585, 0.8515, 0.8237, 0.1138, 0.6574],
        [0.3467, 0.3616, 0.3107, 0.6212, 0.4293, 0.7235, 0.5020, 0.0217]])
reshape_1.shape: torch.Size([2, 8])

reshape_2:
tensor([[[0.2358, 0.2139],
         [0.9614, 0.2585],
         [0.8515, 0.8237],
         [0.1138, 0.6574]],

        [[0.3467, 0.3616],
         [0.3107, 0.6212],
         [0.4293, 0.7235],
         [0.5020, 0.0217]]])
reshape_2.shape: torch.Size([2, 4, 2])



## copy a tensor
   - clone: 
      - creates a hard copy
      - This function is differentiable, so gradients will flow back from the result of this operation to `input`
   - detach:
      - creates a soft copy
      - The new tensor is detached from the current graph for calculating gradients

In [12]:
t1 = torch.zeros(size= (2, 3), requires_grad= True)
t2 = t1.detach()
t3 = t1.clone()

# log
for i in range(2):
    print(f"t{i+2}:\n{eval(f't{i+2}')}")

t2:
tensor([[0., 0., 0.],
        [0., 0., 0.]])
t3:
tensor([[0., 0., 0.],
        [0., 0., 0.]], grad_fn=<CloneBackward0>)


## torch.Tensor to numpy.ndarray

In [13]:
t1 = torch.ones(2, 3)

n1 = t1.numpy()   # share the same memory location
n2 = np.array(t1) # copy

n1[0, 0] = 0
n2[0, 1] = 0

# log
print(f"t1:\n{t1}")
print(f"type(t1): {type(t1)}")
print('-' * 50)
for i in range(2):
    print(f"n{i+1}:\n{eval(f'n{i+1}')}")
    print(f"type(n{i+1}): {eval(f'type(n{i+1})')}\n")

t1:
tensor([[0., 1., 1.],
        [1., 1., 1.]])
type(t1): <class 'torch.Tensor'>
--------------------------------------------------
n1:
[[0. 1. 1.]
 [1. 1. 1.]]
type(n1): <class 'numpy.ndarray'>

n2:
[[1. 0. 1.]
 [1. 1. 1.]]
type(n2): <class 'numpy.ndarray'>



## numpy.ndarray to torch.Tensor

In [14]:
n1 = np.ones(shape= (2, 3))

t1 = torch.from_numpy(n1) # share the same memory location
t2 = torch.tensor(n1)     # copy

t1[0, 0] = 0
t2[0, 1] = 0

# log
print(f"n1:\n{n1}")
print(f"type(n1): {type(n1)}")
print('-' * 50)
for i in range(2):
    print(f"t{i+1}:\n{eval(f't{i+1}')}")
    print(f"type(t{i+1}): {eval(f'type(t{i+1})')}\n")

n1:
[[0. 1. 1.]
 [1. 1. 1.]]
type(n1): <class 'numpy.ndarray'>
--------------------------------------------------
t1:
tensor([[0., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
type(t1): <class 'torch.Tensor'>

t2:
tensor([[1., 0., 1.],
        [1., 1., 1.]], dtype=torch.float64)
type(t2): <class 'torch.Tensor'>



## tensor on GPU
   - tensor on GPU can not be converted to np.ndarray directly

In [15]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# or
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# log
print(device)

cuda


In [16]:
t1 = torch.ones(2, 3)
t2 = t1.to(device)

# log
print(f"t1.device: {t1.device}")
print(f"t2.device: {t2.device}")
print('-' * 50)
print(f"t2:\n{t2}")

t1.device: cpu
t2.device: cuda:0
--------------------------------------------------
t2:
tensor([[1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')


In [17]:
t1 = torch.ones(2, 3, device= device)

try:
    n1 = t1.numpy()
except TypeError as e:
    print(e)

can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.


In [18]:
t1 = torch.ones(2, 3, device= device)

n2 = t1.cpu().numpy()

# log
print(f"n2:\n{n2}")

n2:
[[1. 1. 1.]
 [1. 1. 1.]]


In [19]:
t1 = torch.ones(2, 3, device= device)
t2 = torch.ones(2, 3).to(device)
t3 = torch.ones(2, 3).cuda()

# log
print(t1.device)
print(t2.device)
print(t3.device)

cuda:0
cuda:0
cuda:0


## Notes

### what you see is not necessarily the actual value

In [20]:
t1 = torch.rand(size= (2, 3))

a = t1[0, 0]
b = t1[0, 0].item()

# log
print(f"a: {a}")
print(f"a.dtype: {a.dtype}")
print('-' * 50)
print(f"b: {b}")
print(f"type(b): {type(b)}")

a: 0.2484932541847229
a.dtype: torch.float32
--------------------------------------------------
b: 0.2484932541847229
type(b): <class 'float'>


### torch.float32 is preferred rather than torch.float64
   1. Memory Efficiency
   2. Speed
   3. Compatibility: Some deep learning libraries and models are optimized for `torch.float32` operations. Using `torch.float64` may lead to compatibility issues or slower performance in certain cases.

Note:
   - `torch.float32` often referred to as `float`
   - `torch.float64` often referred to as `double`