In [None]:
import torch
torch.__version__

'2.6.0+cu124'

In [None]:
scalar = torch.tensor(7)
scalar

tensor(7)

In [None]:
scalar.ndim

0

In [None]:
# to retrieve the number (only works with one-element tensors)
scalar.item()

tensor(7)


Vector - single dimension tensor but can contain many numbers

In [None]:
# Vector
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [None]:
# dim of vector
vector.ndim

1

Dimensions/Rank of a tensor is basically how many indexes/numbers/variables is required to represent a single element in that tensor.

For example in vectors 1 index is needed. In matrix 2 indexes are needed.

In [None]:
vector.shape

torch.Size([2])

In [None]:
matrix = torch.tensor([[1, 2],
                       [3, 4]])
print(f"shape: {matrix.shape}\ndim: {matrix.ndim}")

shape: torch.Size([2, 2])
dim: 2


In [None]:
tensor = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])
print(f"shape: {tensor.shape}\ndim: {tensor.ndim}")

shape: torch.Size([1, 3, 3])
dim: 3


In [None]:
T2 = torch.ones([2,2,3,4,5])
print(f"shape: {T2.shape}\ndim: {T2.ndim}")
print(T2)

shape: torch.Size([2, 2, 3, 4, 5])
dim: 5
tensor([[[[[1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.]],

          [[1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.]],

          [[1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.]]],


         [[[1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.]],

          [[1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.]],

          [[1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.]]]],



        [[[[1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.],
           [1., 1., 1., 1., 1.]],

          [[1.

Instead, a machine learning model often starts out with large random tensors of numbers and adjusts these random numbers as it works through data to better represent it.

In essence:

Start with random numbers -> look at data -> update random numbers -> look at data -> update random numbers...

As a data scientist, we can define how the machine learning model starts (initialization), looks at data (representation) and updates (optimization) its random numbers.

We can do so using torch.rand() and passing in the size parameter.

In [None]:
random_tensor = torch.rand((3,4))
# random_tensor = torch.rand(size = (3,4)) # Same as above
random_tensor

tensor([[0.7242, 0.3444, 0.5939, 0.4513],
        [0.4947, 0.9167, 0.0257, 0.0883],
        [0.7106, 0.0778, 0.7522, 0.9024]])

Creating tensor like a range() in python

In [None]:
one_to_ten_tensor = torch.arange(start=1, end=11, step=1)
one_to_ten_tensor

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [None]:
# creating tensors alike
ten_zeroes = torch.zeros_like(one_to_ten_tensor)
ten_zeroes

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Tensor Datatypes

Tensor datatypes is 3 one of the 3 big issues we run into:
1. Tensors not right dtype
2. Tensors not right shape
3. Tensors not on the right device

In [None]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # What datatype is the tensor , can define explicitly
                               device=None, # basically can select cuda, cpu, tpu
                               requires_grad=False) # whether or not to track gradient with tensors operations
float_32_tensor.dtype

torch.float32

In [None]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [None]:
float_16_tensor * float_32_tensor # No error. But some operations will result in error because not in right dtype

tensor([ 9., 36., 81.])

In [None]:
int_32_tensor = torch.tensor([3, 6, 9], dtype=torch.int32)
int_32_tensor

tensor([3, 6, 9], dtype=torch.int32)

In [None]:
float_32_tensor * int_32_tensor

tensor([ 9., 36., 81.])

### Getting information from tensors (attributes)

1. To get datatype from a tensor, can use tensor.dtype
2. to get shape, tensor.shape
3. to get device from a tensor, tensor.device

In [None]:
# ctreate a tensor
some_tensor = torch.rand(3,4)
some_tensor

tensor([[0.5823, 0.7248, 0.2341, 0.9191],
        [0.9856, 0.2722, 0.3052, 0.4297],
        [0.0259, 0.3869, 0.2264, 0.3602]])

In [None]:
print(some_tensor)
print(f"Datatype: {some_tensor.dtype}")
print(f"Shape: {some_tensor.shape}")
print(f"Shape: {some_tensor.size()}") # same thing as tensor.shape
print(f"Device: {some_tensor.device}")

tensor([[0.5823, 0.7248, 0.2341, 0.9191],
        [0.9856, 0.2722, 0.3052, 0.4297],
        [0.0259, 0.3869, 0.2264, 0.3602]])
Datatype: torch.float32
Shape: torch.Size([3, 4])
Shape: torch.Size([3, 4])
Device: cpu


### Manipulating Tensors (tensor operations)

tensor operations include:
* Add
* Sub
* Mult (element wise)
* Div
* Matrix Mult

In [None]:
# Create a sample tensors
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [None]:
# Mult
tensor * 10

tensor([10, 20, 30])

In [None]:
# Sub
tensor - 10

tensor([-9, -8, -7])

In [None]:
torch.mul(tensor, 10)

tensor([10, 20, 30])

### Matrix mult

2 main ways to perform multiplication in NN and DL.
1. Element-wise
2. Matrix Mult (dot prodcut)

In [None]:
# Element wise
%%time
print(tensor * tensor)

tensor([1, 4, 9])
CPU times: user 1.61 ms, sys: 33 µs, total: 1.64 ms
Wall time: 1.48 ms


In [None]:
# Matrix mult
%%time
torch.matmul(tensor, tensor) # can also do `tensor @ tensor`

CPU times: user 558 µs, sys: 0 ns, total: 558 µs
Wall time: 454 µs


tensor(14)

In [None]:
torch.matmul(torch.rand(3, 2), torch.rand(2, 3))

tensor([[0.8098, 0.8096, 0.6596],
        [1.2280, 0.5469, 0.4932],
        [1.4420, 0.9030, 0.7734]])

In [None]:
# Shapes for matrix multiplication
# Shapes need to be in the right way
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]], dtype=torch.float32)

torch.matmul(tensor_A, tensor_B) # (this will error)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

To fix this issue. We take transpose of B.

In [None]:
# View tensor_A and tensor_B
print(tensor_A)
print(tensor_B)

# View tensor_A and tensor_B.T
print(tensor_A)
print(tensor_B.T)

torch.mm(tensor_A, tensor_B.T)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7., 10.],
        [ 8., 11.],
        [ 9., 12.]])
tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])


tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

## Finding the min. max, mean etc (tensor aggregation)

In [None]:
x = torch.arange(0, 100, 10)

In [None]:
# Min
torch.min(x), x.min()

(tensor(0), tensor(0))

In [None]:
# Max
torch.max(x), x.max()

(tensor(90), tensor(90))

In [None]:
# Find the mean
torch.mean(x) # Dtype error. It is Long.

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

In [None]:
# torch.mean() func require a tensor of float32 Dtype to work
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(45.), tensor(45.))

In [None]:
# Sum
torch.sum(x), x.sum()

(tensor(450), tensor(450))