## Getting started with pytorch

In [1]:
import torch
print(torch.__version__)

2.9.0+cu126


In [2]:
if torch.cuda.is_available():
    print("GPU is available!")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("GPU not available. Using CPU.")

GPU is available!
Using GPU: Tesla T4


## Creating a Tensor

In [3]:
# using empty
a = torch.empty(2,3)
a
## Initializes tensor with garbage value

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [4]:
# check type
type(a)

torch.Tensor

In [5]:
# using zeros
torch.zeros(2,3)

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [6]:
# using ones
torch.ones(2,3)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [7]:
# using rand
torch.rand(2,3)

tensor([[0.8706, 0.1376, 0.3254],
        [0.0434, 0.0921, 0.8888]])

In [8]:
# use of seed
torch.rand(2,3)

tensor([[0.6265, 0.1632, 0.0046],
        [0.1029, 0.9265, 0.3750]])

In [9]:
# manual_seed is used to ensure reproducibility
torch.manual_seed(100)
torch.rand(2,3)

tensor([[0.1117, 0.8158, 0.2626],
        [0.4839, 0.6765, 0.7539]])

In [10]:
torch.manual_seed(100)
torch.rand(2,3)

tensor([[0.1117, 0.8158, 0.2626],
        [0.4839, 0.6765, 0.7539]])

In [11]:
# using tensor
torch.tensor([[1,2,3],[4,5,6]])

tensor([[1, 2, 3],
        [4, 5, 6]])

In [12]:
# other ways

# arange
print("using arange ->", torch.arange(0,10,2))

# using linspace
print("using linspace ->", torch.linspace(0,10,10))

# using eye
print("using eye ->", torch.eye(5))

# using full
print("using full ->", torch.full((3, 3), 5))

using arange -> tensor([0, 2, 4, 6, 8])
using linspace -> tensor([ 0.0000,  1.1111,  2.2222,  3.3333,  4.4444,  5.5556,  6.6667,  7.7778,
         8.8889, 10.0000])
using eye -> tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])
using full -> tensor([[5, 5, 5],
        [5, 5, 5],
        [5, 5, 5]])


## Tensor Shapes

In [13]:
x = torch.tensor([[1,2,3],[4,5,6]])
x

tensor([[1, 2, 3],
        [4, 5, 6]])

In [14]:
x.shape

torch.Size([2, 3])

## to make a new tensor of a certain shape

In [15]:
torch.empty_like(x)

tensor([[      338007648, 137635424602944,               0],
        [              0,               0,               0]])

In [16]:
torch.zeros_like(x)

tensor([[0, 0, 0],
        [0, 0, 0]])

In [17]:
torch.ones_like(x)

tensor([[1, 1, 1],
        [1, 1, 1]])

In [18]:
torch.rand_like(x, dtype=torch.float32)

tensor([[0.2627, 0.0428, 0.2080],
        [0.1180, 0.1217, 0.7356]])

## Tensor Data Types

In [19]:
# find data type
x.dtype

torch.int64

In [20]:
# assign data type
torch.tensor([1.0,2.0,3.0], dtype=torch.int32)

tensor([1, 2, 3], dtype=torch.int32)

In [21]:
torch.tensor([1,2,3], dtype=torch.float64)

tensor([1., 2., 3.], dtype=torch.float64)

In [22]:
# using to()
x.to(torch.float32)

tensor([[1., 2., 3.],
        [4., 5., 6.]])

| **Data Type**             | **Dtype**         | **Description**                                                                                                                                                                |
|---------------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **32-bit Floating Point** | `torch.float32`   | Standard floating-point type used for most deep learning tasks. Provides a balance between precision and memory usage.                                                         |
| **64-bit Floating Point** | `torch.float64`   | Double-precision floating point. Useful for high-precision numerical tasks but uses more memory.                                                                               |
| **16-bit Floating Point** | `torch.float16`   | Half-precision floating point. Commonly used in mixed-precision training to reduce memory and computational overhead on modern GPUs.                                            |
| **BFloat16**              | `torch.bfloat16`  | Brain floating-point format with reduced precision compared to `float16`. Used in mixed-precision training, especially on TPUs.                                                |
| **8-bit Floating Point**  | `torch.float8`    | Ultra-low-precision floating point. Used for experimental applications and extreme memory-constrained environments (less common).                                               |
| **8-bit Integer**         | `torch.int8`      | 8-bit signed integer. Used for quantized models to save memory and computation in inference.                                                                                   |
| **16-bit Integer**        | `torch.int16`     | 16-bit signed integer. Useful for special numerical tasks requiring intermediate precision.                                                                                    |
| **32-bit Integer**        | `torch.int32`     | Standard signed integer type. Commonly used for indexing and general-purpose numerical tasks.                                                                                  |
| **64-bit Integer**        | `torch.int64`     | Long integer type. Often used for large indexing arrays or for tasks involving large numbers.                                                                                  |
| **8-bit Unsigned Integer**| `torch.uint8`     | 8-bit unsigned integer. Commonly used for image data (e.g., pixel values between 0 and 255).                                                                                    |
| **Boolean**               | `torch.bool`      | Boolean type, stores `True` or `False` values. Often used for masks in logical operations.                                                                                      |
| **Complex 64**            | `torch.complex64` | Complex number type with 32-bit real and 32-bit imaginary parts. Used for scientific and signal processing tasks.                                                               |
| **Complex 128**           | `torch.complex128`| Complex number type with 64-bit real and 64-bit imaginary parts. Offers higher precision but uses more memory.                                                                 |
| **Quantized Integer**     | `torch.qint8`     | Quantized signed 8-bit integer. Used in quantized models for efficient inference.                                                                                              |
| **Quantized Unsigned Integer** | `torch.quint8` | Quantized unsigned 8-bit integer. Often used for quantized tensors in image-related tasks.                                                                                     |


## Mathematical operations

### 1. Scalar operation

In [29]:
x = torch.rand(2,2)
x

tensor([[0.9969, 0.7565],
        [0.2239, 0.3023]])

In [30]:
# addition
print(x + 2)
# substraction
print(x - 2)
# multiplication
print(x * 3)
# division
print(x / 3)
# int division
print((x * 100)//3)
# mod
print(((x * 100)//3)%2)
# power
print(x**2)

tensor([[2.9969, 2.7565],
        [2.2239, 2.3023]])
tensor([[-1.0031, -1.2435],
        [-1.7761, -1.6977]])
tensor([[2.9907, 2.2695],
        [0.6717, 0.9070]])
tensor([[0.3323, 0.2522],
        [0.0746, 0.1008]])
tensor([[33., 25.],
        [ 7., 10.]])
tensor([[1., 1.],
        [1., 0.]])
tensor([[0.9938, 0.5723],
        [0.0501, 0.0914]])


### 2. Element wise operation

In [32]:
a = torch.rand(2,3)
b = torch.rand(2,3)

print(a)
print(b)

tensor([[0.1784, 0.8238, 0.5557],
        [0.9770, 0.4440, 0.9478]])
tensor([[0.7445, 0.4892, 0.2426],
        [0.7003, 0.5277, 0.2472]])


In [33]:
# add
print(a + b)
# sub
print(a - b)
# multiply
print(a * b)
# division
print(a / b)
# power
print(a ** b)
# mod
print(a % b)

tensor([[0.9229, 1.3130, 0.7983],
        [1.6774, 0.9717, 1.1950]])
tensor([[-0.5661,  0.3346,  0.3132],
        [ 0.2767, -0.0837,  0.7007]])
tensor([[0.1328, 0.4030, 0.1348],
        [0.6842, 0.2343, 0.2343]])
tensor([[0.2397, 1.6841, 2.2912],
        [1.3951, 0.8415, 3.8346]])
tensor([[0.2771, 0.9096, 0.8672],
        [0.9839, 0.6515, 0.9868]])
tensor([[0.1784, 0.3346, 0.0706],
        [0.2767, 0.4440, 0.2063]])


In [34]:
c = torch.tensor([1, -2, 3, -4])

In [35]:
# abs
torch.abs(c)

tensor([1, 2, 3, 4])

In [36]:
# negative
torch.neg(c)

tensor([-1,  2, -3,  4])

In [37]:
d = torch.tensor([1.9, 2.3, 3.7, 4.4])

In [38]:
# round
torch.round(d)

tensor([2., 2., 4., 4.])

In [39]:
# ceil
torch.ceil(d)

tensor([2., 3., 4., 5.])

In [40]:
# floor
torch.floor(d)

tensor([1., 2., 3., 4.])

In [41]:
# clamp
## keeping elements in a certain range, here elements less than 2 are converted to 2
## and element greater than 3 are 3
torch.clamp(d, min=2, max=3)

tensor([2.0000, 2.3000, 3.0000, 3.0000])

### 3. Reduction operation

In [42]:
e = torch.randint(size=(2,3), low=0, high=10, dtype=torch.float32)
e

tensor([[0., 9., 5.],
        [7., 3., 9.]])

In [44]:
# sum
print(torch.sum(e))

# sum along columns
print(torch.sum(e, dim=0))

# sum along rows
print(torch.sum(e, dim=1))

tensor(33.)
tensor([ 7., 12., 14.])
tensor([14., 19.])


In [45]:
# mean

print(torch.mean(e))

# mean along col
print(torch.mean(e, dim=0))

tensor(5.5000)
tensor([3.5000, 6.0000, 7.0000])


In [46]:
# median
torch.median(e)

tensor(5.)

In [48]:
# max and min
print(torch.max(e))

print(torch.min(e))

tensor(9.)
tensor(0.)


In [49]:
# product
torch.prod(e)

tensor(0.)

In [50]:
# standard deviation
torch.std(e)

tensor(3.5637)

In [51]:
# variance
torch.var(e)

tensor(12.7000)

In [53]:
# argmax

## gives the index of the max element
torch.argmax(e)

tensor(1)

In [54]:
# argmin

##gives the index of the min element
torch.argmin(e)

tensor(0)

### 4. Matrix operations

In [55]:
f = torch.randint(size=(2,3), low=0, high=10)
g = torch.randint(size=(3,2), low=0, high=10)

print(f)
print(g)

tensor([[4, 0, 5],
        [7, 5, 9]])
tensor([[9, 7],
        [5, 9],
        [8, 9]])


In [56]:
# matrix multiplcation
torch.matmul(f, g)

tensor([[ 76,  73],
        [160, 175]])

In [57]:
vector1 = torch.tensor([1, 2])
vector2 = torch.tensor([3, 4])

# dot product
torch.dot(vector1, vector2)

tensor(11)

In [58]:
# transpose
torch.transpose(f, 0, 1)

tensor([[4, 7],
        [0, 5],
        [5, 9]])

In [60]:
h = torch.randint(size=(3,3), low=0, high=10, dtype=torch.float32)
h

tensor([[7., 9., 2.],
        [6., 7., 7.],
        [8., 3., 6.]])

In [61]:
# determinant
torch.det(h)

tensor(251.)

In [62]:
# inverse
torch.inverse(h)

tensor([[ 0.0837, -0.1912,  0.1952],
        [ 0.0797,  0.1036, -0.1474],
        [-0.1514,  0.2032, -0.0199]])

### 5. Comparison operations

In [63]:
i = torch.randint(size=(2,3), low=0, high=10)
j = torch.randint(size=(2,3), low=0, high=10)

print(i)
print(j)

tensor([[1, 5, 5],
        [0, 4, 3]])
tensor([[8, 8, 3],
        [3, 5, 0]])


In [64]:
# greater than
print(i > j)

# less than
print(i < j)

# equal to
print(i == j)

# not equal to
print(i != j)

# greater than equal to
print(i>=j)

# less than equal to
print(i<=j)

tensor([[False, False,  True],
        [False, False,  True]])
tensor([[ True,  True, False],
        [ True,  True, False]])
tensor([[False, False, False],
        [False, False, False]])
tensor([[True, True, True],
        [True, True, True]])
tensor([[False, False,  True],
        [False, False,  True]])
tensor([[ True,  True, False],
        [ True,  True, False]])


### 6. Special functions

In [65]:
k = torch.randint(size=(2,3), low=0, high=10, dtype=torch.float32)
k

tensor([[6., 4., 0.],
        [8., 4., 7.]])

In [66]:
# log
torch.log(k)

tensor([[1.7918, 1.3863,   -inf],
        [2.0794, 1.3863, 1.9459]])

In [67]:
# exp
torch.exp(k)

tensor([[4.0343e+02, 5.4598e+01, 1.0000e+00],
        [2.9810e+03, 5.4598e+01, 1.0966e+03]])

In [68]:
# sqrt
torch.sqrt(k)

tensor([[2.4495, 2.0000, 0.0000],
        [2.8284, 2.0000, 2.6458]])

In [72]:
# sigmoid
torch.sigmoid(k)

tensor([[0.9975, 0.9820, 0.5000],
        [0.9997, 0.9820, 0.9991]])

In [71]:
# softmax
torch.softmax(k, dim=0)

tensor([[1.1920e-01, 5.0000e-01, 9.1105e-04],
        [8.8080e-01, 5.0000e-01, 9.9909e-01]])

In [None]:
# relu
torch.relu(k)

tensor([[0., 6., 4.],
        [4., 8., 6.]])

## Inplace Operations

- put _ in front of the operation and then it becomes inplace

In [73]:
m = torch.rand(2,3)
n = torch.rand(2,3)

print(m)
print(n)

tensor([[0.5548, 0.6868, 0.4920],
        [0.0748, 0.9605, 0.3271]])
tensor([[0.0103, 0.9516, 0.2855],
        [0.2324, 0.9141, 0.7668]])


In [74]:
m.add_(n)

tensor([[0.5650, 1.6384, 0.7776],
        [0.3072, 1.8746, 1.0939]])

In [75]:
m

tensor([[0.5650, 1.6384, 0.7776],
        [0.3072, 1.8746, 1.0939]])

In [76]:
n

tensor([[0.0103, 0.9516, 0.2855],
        [0.2324, 0.9141, 0.7668]])

In [None]:
torch.relu(m)

tensor([[0.6767, 0.2721, 1.3875],
        [1.1851, 0.6159, 1.3585]])

In [77]:
m.relu_()

tensor([[0.5650, 1.6384, 0.7776],
        [0.3072, 1.8746, 1.0939]])

In [78]:
m

tensor([[0.5650, 1.6384, 0.7776],
        [0.3072, 1.8746, 1.0939]])

## Copying a Tensor

In [79]:
a = torch.rand(2,3)
a

tensor([[0.1659, 0.4393, 0.2243],
        [0.8935, 0.0497, 0.1780]])

In [80]:
b = a
## here reference is shared

In [81]:
b

tensor([[0.1659, 0.4393, 0.2243],
        [0.8935, 0.0497, 0.1780]])

In [82]:
a[0][0] = 0

In [83]:
a

tensor([[0.0000, 0.4393, 0.2243],
        [0.8935, 0.0497, 0.1780]])

In [None]:
b

tensor([[0.0000, 0.9985, 0.6783],
        [0.2776, 0.6227, 0.2982]])

In [None]:
id(a)

134838567488544

In [None]:
id(b)

134838567488544

In [84]:
b = a.clone()

In [85]:
a

tensor([[0.0000, 0.4393, 0.2243],
        [0.8935, 0.0497, 0.1780]])

In [86]:
b

tensor([[0.0000, 0.4393, 0.2243],
        [0.8935, 0.0497, 0.1780]])

In [87]:
a[0][0] = 10

In [88]:
a

tensor([[10.0000,  0.4393,  0.2243],
        [ 0.8935,  0.0497,  0.1780]])

In [89]:
b

tensor([[0.0000, 0.4393, 0.2243],
        [0.8935, 0.0497, 0.1780]])

In [90]:
id(a)

137639856923552

In [91]:
id(b)

137635193657680

### Testing tensor on cpu and gpu

In [92]:
torch.cuda.is_available()

True

In [93]:
device = torch.device('cuda')

In [94]:
## Creating a new tensor on gpu

torch.rand((2,3), device = device)

tensor([[0.3563, 0.0303, 0.7088],
        [0.2009, 0.0224, 0.9896]], device='cuda:0')

In [95]:
## Moving an existing tensor to gpu

a = torch.rand(2,3)
a

tensor([[0.3011, 0.1893, 0.9186],
        [0.2131, 0.3957, 0.6017]])

In [96]:
b = a.to(device)

In [97]:
b+5

tensor([[5.3011, 5.1893, 5.9186],
        [5.2131, 5.3957, 5.6017]], device='cuda:0')

### Comparison

In [99]:
import time

size = 10000

matrix_cpu1 = torch.randn(size,size)
matrix_cpu2 = torch.randn(size,size)

start_time = time.time()
result_cpu = torch.matmul(matrix_cpu1, matrix_cpu2)
end_time = time.time()
cpu_time = end_time - start_time
print("Time taken by CPU matrix multiplication:", cpu_time)
matrix_gpu1 = matrix_cpu1.to(device)
matrix_gpu2 = matrix_cpu2.to(device)
start_time = time.time()
result_gpu = torch.matmul(matrix_gpu1, matrix_gpu2)
end_time = time.time()
gpu_time = end_time - start_time
print("Time taken by GPU matrix multiplication:", gpu_time)




Time taken by CPU matrix multiplication: 23.831297874450684
Time taken by GPU matrix multiplication: 0.17435956001281738


## Reshaping tensors

In [100]:
a = torch.ones(4,4)
a

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [102]:
c = a.reshape(2,2,2,2)
c

tensor([[[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]],


        [[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]]])

In [103]:
## Flatten

In [104]:
a.flatten()

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [107]:
b = torch.rand(2,3,4)
print(b)
b.shape

tensor([[[0.2883, 0.2220, 0.6423, 0.7098],
         [0.5644, 0.4549, 0.0729, 0.8535],
         [0.9793, 0.0611, 0.1162, 0.7666]],

        [[0.8801, 0.4203, 0.8186, 0.0463],
         [0.7306, 0.2009, 0.3265, 0.6684],
         [0.3509, 0.7772, 0.2353, 0.4301]]])


torch.Size([2, 3, 4])

In [106]:
## permute

b.permute(2,0,1).shape

torch.Size([4, 2, 3])

In [108]:
## unsqueeze

c = torch.rand(226,226,3)
## adds a new dimention

c.unsqueeze(dim=0).shape

torch.Size([1, 226, 226, 3])

In [109]:
## Squeeze

d = torch.rand(1,20)
d.squeeze(0).shape

torch.Size([20])

# Numpy and pytorch

In [110]:
import numpy as np

In [111]:
a = torch.tensor([1,2,3])
a

tensor([1, 2, 3])

In [112]:
b = a.numpy()

In [113]:
b

array([1, 2, 3])

In [114]:
c = np.array([1,2,3])
c

array([1, 2, 3])

In [116]:
d = torch.from_numpy(c)

In [117]:
type(d)

torch.Tensor