In [21]:
!nvidia-smi  

Sat Nov  2 12:14:24 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 556.13                 Driver Version: 556.13         CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3060 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   55C    P0             19W /   95W |       0MiB /   6144MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [22]:
import torch
import torch.nn as nn

### Tensors
Attributes
- torch.dtype
    - torch.FloatTensor, ...
    - torch.cuda.FloatTensor, ...
- torch.device
- torch.layout
- torch.Tensor.grad
    - None by default
    - becomes tensor when first time called backward()
    - gradient are accumulated on future calls
- torch.Tensor.requires_grad

Functions  
- torch.Tensor.backward()
    - computes gradient of current tensor wrt graph leaves
    - triggers all tensor gradient in prev layers

In [23]:
#Always copies the data
#torch.tensor()

In [24]:
#Scalars
    #ndim = 0
scalar = torch.tensor(7)
print(scalar)
print(scalar.ndim)
print(scalar.item())
print(scalar.dtype)

tensor(7)
0
7
torch.int64


In [25]:
#Vector
    #ndim = 1
    #shape = array size
vector = torch.tensor((7,7))
print(vector)
print(vector.ndim)
print(vector.shape)

tensor([7, 7])
1
torch.Size([2])


In [26]:
#matrix
    #ndim = 2
    #shape = row, col
MATRIX = torch.tensor( [[7,8],[9,10]])
print(MATRIX)
print(MATRIX.ndim)
print(MATRIX.shape)

tensor([[ 7,  8],
        [ 9, 10]])
2
torch.Size([2, 2])


In [27]:
#tensor
    #ndim > 2
    #shape
TENSOR = torch.tensor( [[ [1,2,3],[4,5,6] ], [ [7,8,9],[10,11,12]] ])
print(TENSOR)
print(TENSOR.ndim)
print(TENSOR.shape)

tensor([[[ 1,  2,  3],
         [ 4,  5,  6]],

        [[ 7,  8,  9],
         [10, 11, 12]]])
3
torch.Size([2, 2, 3])


### BuiltIn Tensors

In [28]:
#Random Tensor
print(torch.rand(3,4))
print(torch.rand(size=(3,4)))

tensor([[0.5255, 0.6119, 0.7445, 0.1212],
        [0.1954, 0.6378, 0.3828, 0.1275],
        [0.3558, 0.8873, 0.5513, 0.6215]])
tensor([[0.5240, 0.4560, 0.0069, 0.7626],
        [0.5871, 0.1299, 0.9513, 0.4457],
        [0.6905, 0.0871, 0.4863, 0.3944]])


In [29]:
#01 Tensor
print(torch.zeros(2,3))
print(torch.ones(2,4))

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.]])


In [30]:
print(torch.arange(0,10))
print(torch.arange(start=0,end=10,step=2))
print(torch.zeros_like(input=torch.arange(0,10)))

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
tensor([0, 2, 4, 6, 8])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


### Properties

In [10]:
tensorInt = torch.tensor([3,5,6],
                   dtype=None,      # float32/float float64/double  float16/half (default)
                   device="cpu",     # "cpu"  "cuda"  None
                   requires_grad=False) #
print(tensorInt)
print(tensorInt.dtype)

tensor([3, 5, 6])
torch.int64


### Operations

    1. Add
    2. Sub  
    3. Multiply 
    4. Div
    5. Matrix Multiplication

In [11]:
#Scalar Add
tensor = torch.tensor([1,2,3])
print(tensor + 10)
print(torch.add(tensor,10))

tensor([11, 12, 13])
tensor([11, 12, 13])


In [12]:
#Scalar Multiplication
print(tensor * 10)
print(torch.mul(tensor,10))

tensor([10, 20, 30])
tensor([10, 20, 30])


In [13]:
#Scalar Divide
tensor / 10

tensor([0.1000, 0.2000, 0.3000])

In [14]:
#Hadamard Product
tensor * tensor

tensor([1, 4, 9])

In [15]:
# Dot Product or matrix Multiplication
torch.matmul(tensor,tensor)

tensor(14)

In [None]:
%%time
torch.matmul(torch.rand(100,100,100),torch.rand(100,100,100))
print("Time")

In [31]:
w = torch.rand(1, requires_grad=True)
x = torch.rand(1)
b = torch.rand(1, requires_grad=True)
y = w * x + b
print(y)

tensor([0.1526], grad_fn=<AddBackward0>)


In [39]:
w = torch.rand(size=(3,4), requires_grad=True)
x = torch.rand(size=(4,5), requires_grad=True)
b = torch.rand(size=(3,5), requires_grad=True)
y = torch.matmul(w,x) + b
y.retain_grad()
avg = y.mean()
print(avg)

tensor(1.6333, grad_fn=<MeanBackward0>)


In [40]:
avg.backward()
print(x.grad)   #davg/dx
print(w.grad)   #davg/dw
print(b.grad)   #davg/db
print(y.grad)   #davg/dy

tensor([[0.1507, 0.1507, 0.1507, 0.1507, 0.1507],
        [0.1120, 0.1120, 0.1120, 0.1120, 0.1120],
        [0.0418, 0.0418, 0.0418, 0.0418, 0.0418],
        [0.1122, 0.1122, 0.1122, 0.1122, 0.1122]])
tensor([[0.1762, 0.1898, 0.1430, 0.1288],
        [0.1762, 0.1898, 0.1430, 0.1288],
        [0.1762, 0.1898, 0.1430, 0.1288]])
tensor([[0.0667, 0.0667, 0.0667, 0.0667, 0.0667],
        [0.0667, 0.0667, 0.0667, 0.0667, 0.0667],
        [0.0667, 0.0667, 0.0667, 0.0667, 0.0667]])
tensor([[0.0667, 0.0667, 0.0667, 0.0667, 0.0667],
        [0.0667, 0.0667, 0.0667, 0.0667, 0.0667],
        [0.0667, 0.0667, 0.0667, 0.0667, 0.0667]])


In [42]:
#Required_grad
with torch.no_grad():
    #any computation done here will not have loss.backward()/ tensor.backward()
    #apply when u are sure not to call tensor.backward()
    #even if input is requires_grad(True) --> no grad calculation
    #memory saved 
    w = torch.rand(size=(3,4), requires_grad=True)
    x = torch.rand(size=(4,5), requires_grad=True)
    b = torch.rand(size=(3,5), requires_grad=True)
    y = torch.matmul(w,x) + b
    avg = y.mean()
    print(avg)

avg.backward()
print(x.grad)   #davg/dx
print(w.grad)   #davg/dw
print(b.grad)   #davg/db


#As decorator
@torch.no_grad()
def inference_function():
    return

tensor(1.7261)


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

### Views
Allows different shaped tensor without creating a copy

Exceptions:
- reshape()
- reshape_as()
- flatten()
- contiguous()  

These may/may not return a view and therefore may create a new tensor