In [1]:
import torch
print(torch.__version__)

2.5.1+cu121


In [2]:
if torch.cuda.is_available():
    print("GPU is available!")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("GPU not available. Using CPU.")

GPU is available!
Using GPU: NVIDIA GeForce RTX 3050 Laptop GPU


In [3]:
## Using Empty

A=torch.empty(2,3)

In [4]:
## check type

type(A)

torch.Tensor

In [5]:
## Using Zeros

print(torch.zeros(3,3))

## Using Ones

print(torch.zeros(3,3))

## Using Random

print(torch.rand(3,3))

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[0.2091, 0.9939, 0.2126],
        [0.2411, 0.7554, 0.0208],
        [0.1534, 0.4682, 0.2084]])


In [6]:
## Using Random and Seed

torch.manual_seed(120)

print(torch.rand(3,3))

tensor([[0.5566, 0.5807, 0.3175],
        [0.2012, 0.6687, 0.7744],
        [0.1485, 0.1320, 0.8265]])


In [7]:
## Using Tensor

print(torch.tensor([[1,2,3],[2,3,5]]))

## arrange

print(torch.arange(0,10,2))

## linspace

print(torch.linspace(0,10,10))

## full 

print(torch.full((3,3),5))


## eye for creating identity matrix

print(torch.eye(5))

tensor([[1, 2, 3],
        [2, 3, 5]])
tensor([0, 2, 4, 6, 8])
tensor([ 0.0000,  1.1111,  2.2222,  3.3333,  4.4444,  5.5556,  6.6667,  7.7778,
         8.8889, 10.0000])
tensor([[5, 5, 5],
        [5, 5, 5],
        [5, 5, 5]])
tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])


### Tensor Shapes

In [8]:
Y=torch.empty(3,3)

Y.shape

torch.Size([3, 3])

### `If you want create tensor of same shape and size you can use it`

In [9]:
print(torch.rand_like(Y))

print(torch.empty_like(Y))

print(torch.zeros_like(Y))

tensor([[0.6202, 0.4214, 0.5804],
        [0.0514, 0.8384, 0.7322],
        [0.5923, 0.3191, 0.7236]])
tensor([[-2.0930e-17,  1.6213e-42,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


## `Find Data Types of Tensor`

In [10]:
Y.dtype

torch.float32

### `Assign Data Type`

In [11]:
X=torch.arange(0,10,1,dtype=torch.int8)

In [12]:
X.to(torch.float32)

tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

| **Data Type**             | **Dtype**         | **Description**                                                                                                                                                                |
|---------------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **32-bit Floating Point** | `torch.float32`   | Standard floating-point type used for most deep learning tasks. Provides a balance between precision and memory usage.                                                         |
| **64-bit Floating Point** | `torch.float64`   | Double-precision floating point. Useful for high-precision numerical tasks but uses more memory.                                                                               |
| **16-bit Floating Point** | `torch.float16`   | Half-precision floating point. Commonly used in mixed-precision training to reduce memory and computational overhead on modern GPUs.                                            |
| **BFloat16**              | `torch.bfloat16`  | Brain floating-point format with reduced precision compared to `float16`. Used in mixed-precision training, especially on TPUs.                                                |
| **8-bit Floating Point**  | `torch.float8`    | Ultra-low-precision floating point. Used for experimental applications and extreme memory-constrained environments (less common).                                               |
| **8-bit Integer**         | `torch.int8`      | 8-bit signed integer. Used for quantized models to save memory and computation in inference.                                                                                   |
| **16-bit Integer**        | `torch.int16`     | 16-bit signed integer. Useful for special numerical tasks requiring intermediate precision.                                                                                    |
| **32-bit Integer**        | `torch.int32`     | Standard signed integer type. Commonly used for indexing and general-purpose numerical tasks.                                                                                  |
| **64-bit Integer**        | `torch.int64`     | Long integer type. Often used for large indexing arrays or for tasks involving large numbers.                                                                                  |
| **8-bit Unsigned Integer**| `torch.uint8`     | 8-bit unsigned integer. Commonly used for image data (e.g., pixel values between 0 and 255).                                                                                    |
| **Boolean**               | `torch.bool`      | Boolean type, stores `True` or `False` values. Often used for masks in logical operations.                                                                                      |
| **Complex 64**            | `torch.complex64` | Complex number type with 32-bit real and 32-bit imaginary parts. Used for scientific and signal processing tasks.                                                               |
| **Complex 128**           | `torch.complex128`| Complex number type with 64-bit real and 64-bit imaginary parts. Offers higher precision but uses more memory.                                                                 |
| **Quantized Integer**     | `torch.qint8`     | Quantized signed 8-bit integer. Used in quantized models for efficient inference.                                                                                              |
| **Quantized Unsigned Integer** | `torch.quint8` | Quantized unsigned 8-bit integer. Often used for quantized tensors in image-related tasks.                                                                                     |


In [13]:
X=torch.rand(6,6)
Y=torch.rand(6,6)

In [14]:
Addition_XY=X+2*Y
print("Addition:",Addition_XY)

Subtraction_XY=5*X-Y
print("\n \n Subtraction:",Subtraction_XY)

Addition: tensor([[1.8985, 1.8208, 0.2787, 1.4055, 1.1808, 2.2056],
        [2.0015, 1.5762, 1.3265, 1.6434, 2.5184, 1.7183],
        [1.2730, 2.9731, 1.2076, 1.6042, 1.5271, 1.4867],
        [1.0733, 0.6261, 0.5587, 1.6329, 0.8946, 0.6663],
        [1.5811, 1.3178, 0.7979, 1.6741, 1.8344, 1.5221],
        [0.3625, 0.8495, 2.1186, 1.4896, 0.4103, 0.8246]])

 
 Subtraction: tensor([[-0.8488,  3.9915,  0.7637,  0.4512, -0.5777,  2.2350],
        [ 2.3837,  1.4394,  4.8128, -0.0383,  3.5182,  1.5945],
        [ 4.0447,  3.8735,  1.8284,  3.8840,  2.5012,  1.3241],
        [ 0.4557,  0.4326,  1.6920, -0.0201, -0.3605,  2.3707],
        [-0.4427,  2.0408,  1.8837, -0.6733, -0.5839,  4.1737],
        [ 0.1075,  2.5587,  2.3796,  1.0459, -0.1592, -0.2416]])


In [15]:
print(torch.round(X))
print(torch.floor(X))
print(torch.ceil(X))

tensor([[0., 1., 0., 0., 0., 1.],
        [1., 0., 1., 0., 1., 0.],
        [1., 1., 0., 1., 1., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1.],
        [0., 1., 1., 0., 0., 0.]])
tensor([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.]])
tensor([[1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.]])


#### To Fix a Matrix between some values we use `clamp`

In [16]:
torch.clamp(Addition_XY*1.5,min=2,max=3)

tensor([[2.8477, 2.7312, 2.0000, 2.1082, 2.0000, 3.0000],
        [3.0000, 2.3643, 2.0000, 2.4651, 3.0000, 2.5775],
        [2.0000, 3.0000, 2.0000, 2.4064, 2.2907, 2.2300],
        [2.0000, 2.0000, 2.0000, 2.4493, 2.0000, 2.0000],
        [2.3717, 2.0000, 2.0000, 2.5112, 2.7516, 2.2831],
        [2.0000, 2.0000, 3.0000, 2.2343, 2.0000, 2.0000]])

In [17]:
E=torch.randint(size=(8,5),low=0,high=10,dtype=torch.float64)

E

tensor([[6., 5., 6., 4., 1.],
        [8., 1., 2., 0., 4.],
        [3., 8., 7., 7., 3.],
        [8., 5., 3., 3., 5.],
        [9., 0., 9., 4., 4.],
        [2., 1., 5., 7., 6.],
        [6., 0., 3., 3., 9.],
        [2., 1., 0., 5., 9.]], dtype=torch.float64)

#### Reduction Operation like `sum` , `sum along rows`, `sum along columns`

In [18]:
print("SUM: ", torch.sum(E))

print("SUM along columns: ", torch.sum(E,dim=0))

print("SUM along rows:", torch.sum(E,dim=1))

SUM:  tensor(174., dtype=torch.float64)
SUM along columns:  tensor([44., 21., 35., 33., 41.], dtype=torch.float64)
SUM along rows: tensor([22., 15., 28., 24., 26., 21., 21., 17.], dtype=torch.float64)


#### Reduction Operation like `mean` , `mean along rows`, `mean along columns`

In [19]:
print("Mean: ", torch.mean(E))

print("Mean along columns: ", torch.mean(E,dim=0))

print("Mean along rows:", torch.mean(E,dim=1))

print('\n\n')

print("Median: ", torch.median(E))

print("Median along columns: ", torch.median(E,dim=0))

print("Median along rows:", torch.median(E,dim=1))

Mean:  tensor(4.3500, dtype=torch.float64)
Mean along columns:  tensor([5.5000, 2.6250, 4.3750, 4.1250, 5.1250], dtype=torch.float64)
Mean along rows: tensor([4.4000, 3.0000, 5.6000, 4.8000, 5.2000, 4.2000, 4.2000, 3.4000],
       dtype=torch.float64)



Median:  tensor(4., dtype=torch.float64)
Median along columns:  torch.return_types.median(
values=tensor([6., 1., 3., 4., 4.], dtype=torch.float64),
indices=tensor([0, 5, 6, 0, 4]))
Median along rows: torch.return_types.median(
values=tensor([5., 2., 7., 5., 4., 5., 3., 2.], dtype=torch.float64),
indices=tensor([1, 2, 2, 1, 4, 2, 3, 0]))


In [20]:
print('Max: ', torch.max(E), 'Min:',torch.min(E))

Max:  tensor(9., dtype=torch.float64) Min: tensor(0., dtype=torch.float64)


In [21]:
torch.max(E,dim=0)

torch.return_types.max(
values=tensor([9., 8., 9., 7., 9.], dtype=torch.float64),
indices=tensor([4, 2, 4, 2, 6]))

In [22]:
torch.prod(E)

tensor(0., dtype=torch.float64)

In [23]:
print("STD: ",torch.std(E))

print("VAR: ",torch.var(E))

STD:  tensor(2.8153, dtype=torch.float64)
VAR:  tensor(7.9256, dtype=torch.float64)


In [24]:
torch.argmax(E)

torch.argmin(E)

tensor(8)

## `Matrix Operation`

In [25]:
A=torch.randint(size=(6,3),low=1,high=22,dtype=torch.float32)
B=torch.randint(size=(3,6),low=12,high=34,dtype=torch.float32)

In [26]:
torch.matmul(A,B)

tensor([[ 768., 1047.,  886.,  701.,  857., 1159.],
        [1078., 1300., 1146., 1154., 1288., 1625.],
        [ 954., 1239., 1058.,  919., 1087., 1428.],
        [ 624.,  678.,  618.,  738.,  780.,  936.],
        [ 546.,  750.,  637.,  500.,  611.,  830.],
        [ 346.,  478.,  402.,  308.,  382.,  521.]])

In [27]:
torch.transpose(B,0,1)

tensor([[18., 20., 22.],
        [24., 32., 19.],
        [21., 25., 19.],
        [18., 12., 31.],
        [21., 19., 30.],
        [28., 29., 33.]])

#### Calculating $Determinant$

In [28]:
C=torch.randint(size=(6,6),low=1,high=22,dtype=torch.float32)

torch.det(C)

tensor(-13718164.)

In [29]:
torch.inverse(C)

tensor([[-0.0201, -0.0474, -0.0010,  0.0325,  0.0896, -0.0101],
        [-0.0130, -0.0071,  0.0597, -0.0477,  0.0225,  0.0077],
        [-0.0400, -0.0169, -0.0096,  0.0181,  0.0178,  0.0570],
        [ 0.0121,  0.1206, -0.0587, -0.0566, -0.0724,  0.0303],
        [ 0.0376,  0.0475,  0.0026,  0.0185, -0.0725, -0.0464],
        [ 0.0631, -0.0355,  0.0048,  0.0269, -0.0179, -0.0207]])

### We can calculate `exponent`, `Squareroot`,`Sigmoid`

In [30]:
print(torch.exp(C))

print(torch.sigmoid(C))

print(torch.sqrt(C))

tensor([[1.6275e+05, 2.2026e+04, 1.4841e+02, 5.9874e+04, 2.0086e+01, 1.3188e+09],
        [4.8517e+08, 4.4241e+05, 1.6275e+05, 1.6275e+05, 1.7848e+08, 2.0086e+01],
        [2.9810e+03, 1.7848e+08, 1.2026e+06, 2.7183e+00, 2.4155e+07, 1.4841e+02],
        [1.2026e+06, 5.4598e+01, 1.7848e+08, 7.3891e+00, 4.8517e+08, 1.4841e+02],
        [4.8517e+08, 8.1031e+03, 2.0086e+01, 8.1031e+03, 5.4598e+01, 4.0343e+02],
        [1.4841e+02, 2.2026e+04, 4.8517e+08, 2.9810e+03, 2.0086e+01, 4.4241e+05]])
tensor([[1.0000, 1.0000, 0.9933, 1.0000, 0.9526, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.9526],
        [0.9997, 1.0000, 1.0000, 0.7311, 1.0000, 0.9933],
        [1.0000, 0.9820, 1.0000, 0.8808, 1.0000, 0.9933],
        [1.0000, 0.9999, 0.9526, 0.9999, 0.9820, 0.9975],
        [0.9933, 1.0000, 1.0000, 0.9997, 0.9526, 1.0000]])
tensor([[3.4641, 3.1623, 2.2361, 3.3166, 1.7321, 4.5826],
        [4.4721, 3.6056, 3.4641, 3.4641, 4.3589, 1.7321],
        [2.8284, 4.3589, 3.7417, 1.0000, 4

## Applying Softmax Function if `dim=0` it goes columns wise if `dim=1` it goes row wise

In [31]:
torch.softmax(C,dim=1)

tensor([[1.2339e-04, 1.6699e-05, 1.1251e-07, 4.5392e-05, 1.5227e-08, 9.9981e-01],
        [7.3021e-01, 6.6587e-04, 2.4496e-04, 2.4496e-04, 2.6863e-01, 3.0230e-08],
        [1.4624e-05, 8.7559e-01, 5.8997e-03, 1.3335e-08, 1.1850e-01, 7.2808e-07],
        [1.8088e-03, 8.2121e-08, 2.6845e-01, 1.1114e-08, 7.2974e-01, 2.2323e-07],
        [9.9997e-01, 1.6701e-05, 4.1398e-08, 1.6701e-05, 1.1253e-07, 8.3150e-07],
        [3.0561e-07, 4.5356e-05, 9.9904e-01, 6.1383e-06, 4.1360e-08, 9.1100e-04]])

## Inaplace Operation

In [32]:
M=torch.rand(2,3)
N=torch.rand(2,3)

print(M)

print(N)

print("After Inplace Addition: ",M.subtract_(N))

print("After Inplace Relu: ",M.relu_())

tensor([[0.7309, 0.8924, 0.9492],
        [0.3060, 0.9788, 0.9458]])
tensor([[0.5660, 0.2940, 0.0258],
        [0.8352, 0.8705, 0.8416]])
After Inplace Addition:  tensor([[ 0.1649,  0.5984,  0.9234],
        [-0.5292,  0.1082,  0.1042]])
After Inplace Relu:  tensor([[0.1649, 0.5984, 0.9234],
        [0.0000, 0.1082, 0.1042]])


### Copying Tensor

In [33]:
O=N.clone()

# <br>Tensor Operation in GPU</br>

In [34]:
torch.cuda.is_available()

True

In [35]:
device = torch.device("cuda")
print(f"Using GPU: {torch.cuda.get_device_name(0)}")

Using GPU: NVIDIA GeForce RTX 3050 Laptop GPU


In [52]:
P=torch.rand(size=(5000,6000),device=device)
Q=torch.rand(size=(6000,8000),device=device)

In [53]:
P=torch.matmul(P,Q)

In [54]:
P

tensor([[1470.2046, 1474.1167, 1477.1392,  ..., 1470.0393, 1484.1052,
         1462.3391],
        [1488.7314, 1509.1858, 1506.6777,  ..., 1512.1431, 1519.9465,
         1490.3674],
        [1488.7627, 1501.2974, 1507.4160,  ..., 1499.0752, 1501.0099,
         1485.4484],
        ...,
        [1492.6012, 1499.0159, 1520.4917,  ..., 1514.6980, 1521.2920,
         1496.4668],
        [1490.4917, 1514.5554, 1533.2986,  ..., 1521.0848, 1517.0851,
         1497.4648],
        [1480.9275, 1495.6318, 1507.2743,  ..., 1504.9678, 1504.3104,
         1485.8137]], device='cuda:0')

### Moving Tensor From CPU to GPU

In [56]:
A=A.to(device=device)

In [57]:
A

tensor([[21., 14.,  5.],
        [21., 13., 20.],
        [16., 19., 13.],
        [ 6.,  6., 18.],
        [19.,  8.,  2.],
        [ 9.,  7.,  2.]], device='cuda:0')

In [58]:
import torch
import time

def benchmark_matrix_multiplication():
    # Check if CUDA (GPU) is available
    if not torch.cuda.is_available():
        print("CUDA is not available. Please run this on a machine with an NVIDIA GPU.")
        return

    # --- Configuration ---
    # We use 10,000 x 10,000. 
    # If you have high-end hardware (e.g., A100, RTX 3090/4090), 
    # you can try increasing these to 10000 and 100000.
    M = 10000  # Rows of Matrix A
    K = 10000  # Cols of A / Rows of B
    N = 10000  # Cols of Matrix B
    
    print(f"Benchmarking Matrix Multiplication: [{M}x{K}] x [{K}x{N}]")
    print("-" * 50)

    # --- CPU Benchmark ---
    print("Allocating tensors on CPU...")
    # Create random matrices
    a_cpu = torch.randn(M, K)
    b_cpu = torch.randn(K, N)
    
    print("Starting CPU multiplication... (This may take a while)")
    start_time = time.time()
    c_cpu = torch.matmul(a_cpu, b_cpu)
    end_time = time.time()
    
    cpu_duration = end_time - start_time
    print(f"CPU Time: {cpu_duration:.4f} seconds")
    print("-" * 50)

    # --- GPU Benchmark ---
    print("Moving tensors to GPU...")
    # Transfer data to GPU
    a_gpu = a_cpu.cuda()
    b_gpu = b_cpu.cuda()
    
    # Warm-up: Run a small operation to initialize CUDA context/buffers
    torch.matmul(a_gpu[:100, :100], b_gpu[:100, :100])
    torch.cuda.synchronize() # Wait for warm-up to finish

    print("Starting GPU multiplication...")
    start_time = time.time()
    c_gpu = torch.matmul(a_gpu, b_gpu)
    
    # Important: GPU operations are asynchronous. 
    # We must synchronize to measure the actual execution time.
    torch.cuda.synchronize() 
    end_time = time.time()
    
    gpu_duration = end_time - start_time
    print(f"GPU Time: {gpu_duration:.4f} seconds")
    print("-" * 50)

    # --- Results ---
    speedup = cpu_duration / gpu_duration
    print(f"Speedup: {speedup:.2f}x faster on GPU")

if __name__ == "__main__":
    benchmark_matrix_multiplication()


Benchmarking Matrix Multiplication: [10000x10000] x [10000x10000]
--------------------------------------------------
Allocating tensors on CPU...
Starting CPU multiplication... (This may take a while)
CPU Time: 5.1827 seconds
--------------------------------------------------
Moving tensors to GPU...
Starting GPU multiplication...
GPU Time: 0.4351 seconds
--------------------------------------------------
Speedup: 11.91x faster on GPU


### `Permute`

In [None]:
A.permute(1,0)

tensor([[21., 21., 16.,  6., 19.,  9.],
        [14., 13., 19.,  6.,  8.,  7.],
        [ 5., 20., 13., 18.,  2.,  2.]], device='cuda:0')

### `Unsqueeze`

In [72]:
D=torch.rand(226,226,3)

D.shape

torch.Size([226, 226, 3])

In [73]:
D.unsqueeze(0).shape

torch.Size([1, 226, 226, 3])

### `Squeeze`

In [78]:
F=torch.rand(size=(1,20))
F.squeeze(0).shape

torch.Size([20])

In [81]:
import numpy as np

## `Pytorch` to `Numpy` and Vice Versa

In [82]:
D_numpy=D.numpy()

In [87]:
D_torch=torch.from_numpy(D_numpy)

In [88]:
type(D_torch)

torch.Tensor