In [5]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
print(torch.__version__)

1.13.1


## Intro to Tensors
### Creating tensors


In [6]:
scalar = torch.tensor(2) # torch.tensor() is used to create a tensor
print(scalar) # print the tensor
print(scalar.item()) # item() is used to get the value of the tensor
print(scalar.ndim) # ndim is used to check the dimension of the tensor, like row or column

tensor(2)
2
0


In [7]:
vector = torch.tensor([1,2,3,4,5])
print(vector)
print(vector.ndim) # can say no of square brackets (levels of hierarchy)
print(vector.shape) # shape is used to check the shape of the tensor i.e. no of items inside square brackets

tensor([1, 2, 3, 4, 5])
1
torch.Size([5])


In [8]:
MATRIX = torch.tensor([[1,2,3],[4,5,6],[7,8,9]])
print(MATRIX)
print(MATRIX.ndim)
print(MATRIX.shape)

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
2
torch.Size([3, 3])


In [9]:
TENSOR = torch.tensor([[[1,2,3],[4,5,6],[7,8,9]],[[1,2,3],[4,5,6],[7,8,9]]])
print(TENSOR)
print(TENSOR.ndim)
print(TENSOR.shape) # 2,3,3 means 2 matrices of 3 rows and 3 columns (3x3)

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]],

        [[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])
3
torch.Size([2, 3, 3])


by convention tensors and matrices are represented by capital letters and scalar and vectors by lower case letters

### Random Tensors

In [10]:
#creating a random tensor
random_tensor = torch.rand(3,4) # 3 rows and 4 columns
random_tensor

tensor([[0.0680, 0.3903, 0.3812, 0.3191],
        [0.2309, 0.3397, 0.5072, 0.9584],
        [0.0017, 0.3234, 0.8155, 0.9886]])

In [11]:
#random image tensor
random_image_tensor = torch.rand(3,224,244) # height, width, color channel (RGB)
random_image_tensor.ndim

3

In [12]:
#zeros and ones tensor
zeros_tensor = torch.zeros(3,4)
#zeros_tensor = torch.zeros(size=(3,4)) # same way for size
zeros_tensor

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [13]:
ones_tensor = torch.ones(3,4)
ones_tensor

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [14]:
ones_tensor.dtype # data type of the tensor

torch.float32

In [15]:
one_to_ten = torch.arange(0,10)
torch.arange(start=0,end=10,step=2) 

tensor([0, 2, 4, 6, 8])

In [16]:
# tensors like
# to create a tensor with same shape as other tensor
ones_tensor_like = torch.zeros_like(ones_tensor)
ones_tensor_like

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [17]:
#default tensor type is float32

float_16_tensor = torch.tensor([1,2,3],
                                                dtype=torch.float16, # data type of the tensor
                                                device='mps', # what device to use, mps for mac metal gpu, by default cpu
                                                requires_grad = False # to calculate gradient or not
)
float_16_tensor

  nonzero_finite_vals = torch.masked_select(


tensor([1., 2., 3.], device='mps:0', dtype=torch.float16)

In [18]:
float_32_tensor= float_16_tensor.type(torch.float32)
float_32_tensor

tensor([1., 2., 3.], device='mps:0')

In [19]:
float_32_tensor * float_16_tensor # possible, results in float32 tensor, this is type casting, float * int = float is also possible

tensor([1., 4., 9.], device='mps:0')

In [20]:
print(float_16_tensor.dtype)
print(float_16_tensor.device)
print(float_16_tensor.requires_grad)
print(float_16_tensor.shape)

torch.float16
mps:0
False
torch.Size([3])


### Operations on tensors
- Addition
- Subtraction
- Division
- Element wise multiplication
- Matrix Multiplication (also known as dot product)

In [21]:
operations = torch.tensor([1,2,3])
operations +10 # add 10 to each element

tensor([11, 12, 13])

In [22]:
operations * 10 # multiply 10 to each element (element wise multiplication)

tensor([10, 20, 30])

In [23]:
torch.mul(operations,10) # same as above

tensor([10, 20, 30])

In [24]:
operations / 10 # divide 10 to each element

tensor([0.1000, 0.2000, 0.3000])

In [25]:
tensor_mul_1 = torch.tensor([1,2,3])
tensor_mul_2 = torch.tensor([4,5,6])
print(f"element wise multiplication: {tensor_mul_1 * tensor_mul_2}")

element wise multiplication: tensor([ 4, 10, 18])


In [26]:
tensor_mul_1 @ tensor_mul_2 # same as above

tensor(32)

In [27]:
torch.matmul(tensor_mul_1,tensor_mul_2) # same as above

tensor(32)

## some benchmarking

In [28]:
%%time
value =0
tensor1 = torch.rand(100000)

for i in range(len(tensor1)):
    value += tensor1[i] * tensor1[i]

print(tensor1,tensor1.shape,tensor1.ndim)
print(value)

tensor([0.2997, 0.0454, 0.0875,  ..., 0.7646, 0.3664, 0.8544]) torch.Size([100000]) 1
tensor(33409.8516)
CPU times: user 208 ms, sys: 2.62 ms, total: 210 ms
Wall time: 213 ms


In [29]:
%%time 
torch.matmul(tensor1,tensor1)

CPU times: user 413 µs, sys: 1.11 ms, total: 1.53 ms
Wall time: 1.46 ms


tensor(33410.2539)

In [30]:
%%time 
tensor1.dot(tensor1) # faster than above
# dot and matmul gives same result for 1D tensor 

CPU times: user 117 µs, sys: 126 µs, total: 243 µs
Wall time: 478 µs


tensor(33410.2539)

In [31]:
tensor2 = torch.rand(100,100,100)
tensor2.shape,tensor2.ndim


(torch.Size([100, 100, 100]), 3)

In [32]:
# print(tensor2.dot(tensor2)) # gives error, dot product is not possible for 3D tensor
tensor2.matmul(tensor2) # matrix multiplication is possible for 3D tensor

tensor([[[24.1355, 22.5474, 21.2028,  ..., 23.3974, 24.2962, 20.9226],
         [26.9782, 23.8141, 23.9537,  ..., 25.0834, 25.1837, 22.7407],
         [28.8917, 24.7451, 24.8900,  ..., 27.1475, 25.7138, 26.6879],
         ...,
         [24.8964, 22.8051, 23.6040,  ..., 24.8836, 24.3157, 22.1827],
         [29.4223, 24.2789, 25.2092,  ..., 24.4218, 26.3043, 25.6475],
         [26.5048, 22.8048, 23.0850,  ..., 24.8098, 25.7140, 23.7128]],

        [[19.5590, 23.4508, 22.2768,  ..., 20.7265, 21.6137, 20.8505],
         [22.1140, 26.7733, 24.3902,  ..., 25.1827, 23.5018, 25.3075],
         [19.2721, 22.8501, 22.6267,  ..., 22.6890, 21.9649, 21.5033],
         ...,
         [21.0523, 25.3162, 23.7720,  ..., 25.1456, 24.0636, 21.8882],
         [22.1876, 25.5806, 25.2815,  ..., 23.9183, 22.7452, 24.2331],
         [21.2688, 24.6019, 23.9528,  ..., 24.6465, 24.0688, 23.8682]],

        [[23.6685, 27.7340, 26.4463,  ..., 24.9889, 25.7199, 25.5952],
         [26.3906, 29.2407, 28.4798,  ..., 26

In [33]:
tensor3 = torch.rand(100,100)
# tensor3.dot(tensor3) # dot product is possible for 2D tensor

In [34]:
# can also use torch.mm(tensor3,tensor3) for matrix multiplication of 2D tensor
# torch.mm does not broadcast, matmul does broadcast, meaning matmul can multiply between tensors of different shapes by adjusting the shape of the smaller tensor
print(torch.mm(tensor3,tensor3))
# print(tensor2.mm(tensor2)) # gives error, mm is not possible for 3D tensor

tensor([[24.8122, 24.6808, 21.5404,  ..., 22.1012, 22.3122, 21.6048],
        [25.3132, 26.5542, 25.2134,  ..., 24.4651, 23.7103, 23.5687],
        [26.1217, 25.9671, 24.7538,  ..., 24.7833, 23.6987, 23.0907],
        ...,
        [24.8978, 25.9454, 22.7677,  ..., 23.7624, 23.6002, 22.1892],
        [24.4345, 26.3747, 24.4636,  ..., 22.0047, 25.3130, 23.9794],
        [25.0486, 26.3797, 22.5511,  ..., 23.8049, 25.0386, 23.5396]])


## Rules for matrix multiplication
- The inner dimensions must match <br>
(3,2) @ (2,3) will work because the inner dimensions(2,2) match <br>
(3,2) @ (3,2) will not work because the inner dimensions(2,3) do not match

(rows,column)

- The resulting matrix has the shape of the outer dimensions <br>
(3,2) @ (2,3) will result in a (3,3) matrix

In [35]:
tensorA = torch.tensor([[1,2],[3,4],[5,6]]) # 3x2
tensorB = torch.tensor([[2,4],[6,8],[10,12]]) # 3x2
# print(tensorA @ tensorB) # error as inner dimensions are not same
# to fix this we can transpose tensorB, i.e. convert 3x2 to 2x3

print(tensorB.shape)
tensorB = tensorB.T # transpose
print(tensorB.shape)
print(tensorA @ tensorB) # now it works

torch.Size([3, 2])
torch.Size([2, 3])
tensor([[ 10,  22,  34],
        [ 22,  50,  78],
        [ 34,  78, 122]])


## Tensor aggregation
- finding mean, sum, max, min, standard deviation, variance, product, argmax, argmin, etc

In [36]:
tensor_agg = torch.arange(0,100,10)
print(tensor_agg)
print(tensor_agg.sum()) # sum of all elements
print(tensor_agg.prod()) # product of all element
print(tensor_agg.max()) # max of all elements
print(tensor_agg.min()) # min of all elements
# can also use torch.sum(tensor_agg), torch.prod(tensor_agg), torch.max(tensor_agg), torch.min(tensor_agg), etc

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])
tensor(450)
tensor(0)
tensor(90)
tensor(0)


In [37]:
# print(tensor_agg.dtype) # int64 (long)
#mean and std are not possible for int64, so we need to convert it to float
tensor_agg = tensor_agg.type(torch.float32)
print(tensor_agg.mean()) # mean of all elements, can also use tensor_agg.sum()/len(tensor_agg)
print(tensor_agg.var()) # variance of all elements, can also use tensor_agg.std()**2
print(tensor_agg.std()) # standard deviation of all elements, can also use tensor_agg.var().sqrt()

tensor(45.)
tensor(916.6667)
tensor(30.2765)


In [38]:
print(tensor_agg.argmax()) # index of max element
print(tensor_agg.argmin()) # index of min element

tensor(9)
tensor(0)


In [39]:
torch.tensor([1,2,3,4,5,6,7,8,9,10]).reshape(2,5) # reshape to 2x5, breaks the tensor into 2x5

tensor([[ 1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10]])

In [40]:
torch.tensor([1,2,3,4,5,6,7,8,9,10]).reshape(2,-1) 
# reshape to 2x5, breaks the tensor into 2x5, -1 means automatically adjust the number of columns

tensor([[ 1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10]])

### Reshaping, stacking, squeezing, unsqueezing, permute of tensors

Why do any of these?

Because deep learning models (neural networks) are all about manipulating tensors in some way. And because of the rules of matrix multiplication, if you've got shape mismatches, you'll run into errors. These methods help you make the right elements of your tensors are mixing with the right elements of other tensors.

In [41]:
import torch
x = torch.arange(1.,8.)
x , x.shape, x.ndim

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]), 1)

In [42]:
#Reshape
x_reshaped = x.reshape(1,7) # it has to be compatible with the original shape (1x7) or (7x1) or (7,) or (1,7,1) or (1,1,7)
# (2,7) is not possible as it is not compatible with the original shape, i.e. 2x7 = 14 != 7 (original shape)
x_reshaped , x_reshaped.shape , x_reshaped.ndim #one dimension is added

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]), 2)

In [43]:
y = torch.arange(1.,11.) 
y , y.shape

y_reshaped = y.reshape(2,5) # compatible as 2x5 = 10 = 10 (original shape)
y_reshaped , y_reshaped.shape , y_reshaped.ndim #one dimension is added

(tensor([[ 1.,  2.,  3.,  4.,  5.],
         [ 6.,  7.,  8.,  9., 10.]]),
 torch.Size([2, 5]),
 2)

In [44]:
# view is same as reshape, but it does not create a copy of the tensor, it just changes the view of the tensor, sharing same memory
x_view = x.view(1,7)
x_view , x_view.shape , x_view.ndim
# if we change x_view, x will also change as they share same memory

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]), 2)

In [45]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim=0) 
print(x_stacked)
x_stacked2 = torch.stack([x, x, x, x], dim=1)
print(x_stacked2)


tensor([[1., 2., 3., 4., 5., 6., 7.],
        [1., 2., 3., 4., 5., 6., 7.],
        [1., 2., 3., 4., 5., 6., 7.],
        [1., 2., 3., 4., 5., 6., 7.]])
tensor([[1., 1., 1., 1.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.]])


In [49]:
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimension from x_reshaped, squeeze() removes all the dimensions with value 1, upto 1D tensor
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")
x.squeeze(), x.squeeze().shape

Previous tensor: tensor([[1., 2., 3., 4., 5., 6., 7.]])
Previous shape: torch.Size([1, 7])

New tensor: tensor([1., 2., 3., 4., 5., 6., 7.])
New shape: torch.Size([7])


(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))

In [48]:
# unsqueeze() adds a dimension with value 1, upto 3D tensor, its opposite of squeeze()
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

## Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous tensor: tensor([1., 2., 3., 4., 5., 6., 7.])
Previous shape: torch.Size([7])

New tensor: tensor([[1., 2., 3., 4., 5., 6., 7.]])
New shape: torch.Size([1, 7])


In [53]:
# permute rearrange the order of axes values with torch.permute(input, dims), where the input gets turned into a view with new dims.
# Create tensor with specific shape
x_original = torch.rand(size=(224, 224, 3)) # [Height, Width, Channels], common representation of an image data

# Permute the original tensor to rearrange the axis order
x_permuted = x_original.permute(2, 0, 1) # shifts axis 0->1, 1->2, 2->0

print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}") # [Channels, Height, Width]
# it is just a memory view, so if we change x_permuted, x_original will also change

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


Indexing (Selecting data from tensors) in PyTorch
(for example, only the first column or second row)
(similar to numpy indexing)

In [58]:
import torch
x = torch.arange(1,10).reshape(1,3,3) # 1 matrix of 3x3
x , x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [65]:
print(f"First square bracket:\n{x[0]}") 
print(f"Second square bracket: {x[0][0]}") 
print(f"Third square bracket: {x[0][0][0]}")
# or we can use x[0,0,0]
print(x[0,0,1])
print(x[0,0,2])
print(x[0,1,0])
print(x[0,1,1])


First square bracket:
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
Second square bracket: tensor([1, 2, 3])
Third square bracket: 1
tensor(2)
tensor(3)
tensor(4)
tensor(5)


In [61]:
# Get all values of 0th dimension and the 0 index of 1st dimension
print(x[:, 0])
# Get all values of 0th & 1st dimensions but only index 1 of 2nd dimension
print(x[:, :, 1])
# Get all values of the 0 dimension but only the 1 index value of the 1st and 2nd dimension
print(x[:, 1, 1])
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension 
print(x[0, 0, :]) # same as x[0][0]

tensor([[1, 2, 3]])
tensor([[2, 5, 8]])
tensor([5])
tensor([1, 2, 3])


In [73]:
x[0,:2,1:]

tensor([[2, 3],
        [5, 6]])

### PyTorch tensors & NumPy
- torch.from_numpy(ndarray) - NumPy array -> PyTorch tensor.
- torch.Tensor.numpy() - PyTorch tensor -> NumPy array.

In [86]:
# NumPy array to tensor
import torch
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array) # from_numpy() is used to convert numpy array to tensor
# default dtype is float64 of numpy and float32 of tensor in pytorch
tensor2 = torch.tensor(array).type(torch.float32)  

array, tensor , tensor2.dtype

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64),
 torch.float32)

In [87]:
# changing the array after converting to tensor will not change the tensor as they dont share same memory
array = array + 1

array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [88]:
# Tensor to NumPy array
tensor = torch.ones(7) # create a tensor of ones with dtype=float32
numpy_tensor = tensor.numpy() # will be dtype=float32 unless changed
tensor, numpy_tensor


(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [89]:
# Change the tensor, keep the array the same
tensor = tensor + 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))