In [1]:
import torch
import torchvision

In [2]:
torch.cuda.is_available()

True

In [3]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce GTX 1650'

In [4]:
!nvidia-smi

Tue Jan  6 14:02:33 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 581.80                 Driver Version: 581.80         CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce GTX 1650      WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   50C    P8              1W /   50W |       0MiB /   4096MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

In [5]:
a = torch.tensor([2,2,1])
print(a)
print(a.data)

tensor([2, 2, 1])
tensor([2, 2, 1])


In [6]:
b = torch.tensor([[2,2,2],[1,1,1],[3,3,3]])
print(b)
print(b.data)

tensor([[2, 2, 2],
        [1, 1, 1],
        [3, 3, 3]])
tensor([[2, 2, 2],
        [1, 1, 1],
        [3, 3, 3]])


In [7]:
print(a.shape)
print(b.shape)

torch.Size([3])
torch.Size([3, 3])


In [8]:
b[0]

tensor([2, 2, 2])

In [9]:
c = torch.tensor([2,3,4], dtype = torch.float)
d = torch.tensor([2,4,6], dtype = torch.double)
print(c,d)

tensor([2., 3., 4.]) tensor([2., 4., 6.], dtype=torch.float64)


In [10]:
c.mean()

tensor(3.)

In [11]:
d.mean()

tensor(4., dtype=torch.float64)

In [12]:
c.std()

tensor(1.)

In [13]:
d.std()

tensor(2., dtype=torch.float64)

This view() method is extremely important !!

In [14]:
b.view(3,-1) # -1 means that it will calculate the no. of rows automatically for given no. of columns !!

tensor([[2, 2, 2],
        [1, 1, 1],
        [3, 3, 3]])

In [15]:
b.float().mean()

tensor(2.)

In [16]:
b.view(9) # we gave only one dimension so the output is in one dimension

tensor([2, 2, 2, 1, 1, 1, 3, 3, 3])

In [17]:
b.view(9,-1) # we gave two dimension so the output is two dimension 

tensor([[2],
        [2],
        [2],
        [1],
        [1],
        [1],
        [3],
        [3],
        [3]])

In [18]:
b.view(-1,9)

tensor([[2, 2, 2, 1, 1, 1, 3, 3, 3]])

In [19]:
b.view(-1,1,9) # you cannot infer two dimension !! i.e. b.view(-1,-1,9) will give an error !!

tensor([[[2, 2, 2, 1, 1, 1, 3, 3, 3]]])

in pytorch 3D tensors take the form of (channels,rows,columns)

In [20]:
e = torch.randn(2,3,4)
e.view(2,-1)

tensor([[-0.7675, -0.5869,  2.3576,  1.9364,  0.1527,  0.7614,  0.4412,  0.2566,
          0.5563, -0.2288,  1.6991,  0.8139],
        [-0.7652,  0.8014, -0.1505, -1.6782, -0.4629,  0.5418, -0.9373,  1.0275,
         -0.0783,  0.0816,  0.8326, -2.9648]])

In [21]:
torch.rand(3,4) # creates 3x4 tensor with number between 0 and 1 

tensor([[0.4484, 0.3672, 0.8002, 0.5703],
        [0.2852, 0.8882, 0.1791, 0.7536],
        [0.2480, 0.2509, 0.4256, 0.0093]])

In [22]:
int_array = torch.randint(6,10,(5,)) # creates 5 random integers between 6 and 9 (10 is exclusive) 
print(type(int_array))
int_array

<class 'torch.Tensor'>


tensor([8, 8, 6, 7, 9])

In [23]:
int_array_2d = torch.randint(2,10,(3,3))
int_array_2d

tensor([[8, 8, 8],
        [8, 5, 5],
        [3, 4, 4]])

In [24]:
torch.numel(int_array)

5

In [25]:
torch.zeros(2,2,dtype=torch.double)

tensor([[0., 0.],
        [0., 0.]], dtype=torch.float64)

In [26]:
torch.ones(3,3) # default datatype !!

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [27]:
torch.randn_like(b,dtype = torch.double) # takes the shape of b !!!

tensor([[ 0.0338, -0.2447, -1.2525],
        [-1.5961, -1.2469,  0.1073],
        [-0.4555,  0.4004, -1.6336]], dtype=torch.float64)

In [28]:
torch.add(torch.zeros(3,3),torch.ones(3,3))

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [29]:
a = torch.ones(5)
print(a)
b = a.numpy()
print(b)
a.add_(2) # this add_() function reassigns the value to the object itself i.e. it is equivalent to a = torch.add() 
print(a,b)

tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]
tensor([3., 3., 3., 3., 3.]) [3. 3. 3. 3. 3.]


what happened above is something called NUMPY BRIDGE i.e. if some operation is done upon the tensor then the numpy array made from that tensor will also get affected !

In [30]:
import numpy as np 

In [31]:
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a,1,out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


In [32]:
b = b.cuda()
print(b)

tensor([2., 2., 2., 2., 2.], device='cuda:0', dtype=torch.float64)


In [33]:
torch.cuda.is_initialized()

True

Tensor Concatenation

In [34]:
first = torch.randn(2,5)
print(first)
second = torch.randn(3,5)
print(second)
third = torch.randn(3,3)
print(third)
# concatenate along dimension 0 (along rows)
con1 = torch.cat([first,second]) # can give ,0 after the list [first,second], but since it is default no need here !!
# concatenate along dimension 1 (along columns)
con2 = torch.cat([second,third],1)
print()
print(con1)
print()
print(con2)


tensor([[ 1.0661,  0.0719, -0.2745, -1.1170, -0.0638],
        [-0.4626, -0.0616, -0.5388, -0.4665,  0.3660]])
tensor([[-0.1211, -0.6117, -0.0565, -0.1537, -0.2728],
        [-1.8786,  1.0245, -0.3555,  0.3663, -0.4842],
        [ 0.2181, -0.4822, -1.1623, -0.2089, -0.8228]])
tensor([[-0.3451, -0.4196, -0.0321],
        [ 1.5151, -0.6649,  1.5503],
        [ 0.7706, -0.1445, -2.2590]])

tensor([[ 1.0661,  0.0719, -0.2745, -1.1170, -0.0638],
        [-0.4626, -0.0616, -0.5388, -0.4665,  0.3660],
        [-0.1211, -0.6117, -0.0565, -0.1537, -0.2728],
        [-1.8786,  1.0245, -0.3555,  0.3663, -0.4842],
        [ 0.2181, -0.4822, -1.1623, -0.2089, -0.8228]])

tensor([[-0.1211, -0.6117, -0.0565, -0.1537, -0.2728, -0.3451, -0.4196, -0.0321],
        [-1.8786,  1.0245, -0.3555,  0.3663, -0.4842,  1.5151, -0.6649,  1.5503],
        [ 0.2181, -0.4822, -1.1623, -0.2089, -0.8228,  0.7706, -0.1445, -2.2590]])


In [35]:
t1 = torch.tensor([[1,2,3],[3,4,5],[4,5,6]])
print(t1)

tensor([[1, 2, 3],
        [3, 4, 5],
        [4, 5, 6]])


In [36]:
t1.squeeze(1)

tensor([[1, 2, 3],
        [3, 4, 5],
        [4, 5, 6]])

In [37]:
t1.unsqueeze_(0)

tensor([[[1, 2, 3],
         [3, 4, 5],
         [4, 5, 6]]])

In [38]:
t1

tensor([[[1, 2, 3],
         [3, 4, 5],
         [4, 5, 6]]])

if requires_grad is set to True then the tensor object keeps track of how it was created !!

In [39]:
x = torch.tensor([1,2,3],requires_grad = True, dtype = float)
y = torch.tensor([4,5,6],requires_grad = True, dtype = float)
z = x+y
print(z)
print(z.grad_fn)

tensor([5., 7., 9.], dtype=torch.float64, grad_fn=<AddBackward0>)
<AddBackward0 object at 0x000001FBBAAC43A0>


In [40]:
s = z.sum()
print(s)
print(s.grad_fn)

tensor(21., dtype=torch.float64, grad_fn=<SumBackward0>)
<SumBackward0 object at 0x000001FBBAAC5BA0>


In [41]:
s.backward()
print(x.grad) # this gives ds/dx !!!!!!!!!!!!!

tensor([1., 1., 1.], dtype=torch.float64)


In [42]:
# another method to set requires_grad = Ture if forgotten before is 
x.requires_grad_() # this will do inplace operation !!

tensor([1., 2., 3.], dtype=torch.float64, requires_grad=True)

In [43]:
new_z = z.detach() # it returns a tensor that shares the same storage as "z" but with computation history forgotten !!!
print(new_z.grad_fn)

None


In [44]:
print(x.requires_grad)

True


In [45]:
print((x+10).requires_grad)

True


In [46]:
with torch.no_grad() : 
    print((x+10).requires_grad)

False


In [47]:
x = torch.ones(2,2,requires_grad=True)
print(x)
y = x+2
print(y)
print(y.grad_fn)
z = y*y*3
out = z.mean()
print(z,out)
out.backward()
print(x.grad)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x000001FBBAAC6F50>
tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)
tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


loss functions in pytorch !!

In [48]:
import torch.nn as nn

In [49]:
pred = torch.randn(4,5)

In [50]:
label = torch.randn(4,5)

In [51]:
mse = nn.MSELoss(reduction = 'mean')
loss = mse(pred,label)
loss

tensor(2.1680)

In [52]:
loss.mean()

tensor(2.1680)

In [53]:
((pred-label)**2).mean()

tensor(2.1680)

In [54]:
label = torch.zeros(4,5).random_(0,2)

In [55]:
label

tensor([[0., 0., 1., 0., 0.],
        [0., 1., 1., 0., 1.],
        [0., 0., 0., 1., 0.],
        [0., 1., 0., 0., 0.]])

In [56]:
sig = nn.Sigmoid()

In [57]:
bce = nn.BCELoss(reduction="mean") # in bce loss inputs should be between 0 and 1 so we applied sigmoid in input [sig(pred)] !!

In [58]:
bce(sig(pred),label)

tensor(0.8512)

In [59]:
bces = nn.BCEWithLogitsLoss(reduction="mean")

In [60]:
bces(pred,label)

tensor(0.8512)

In [61]:
layer = nn.Linear(5,5)
layer.weight.data

tensor([[-0.0611,  0.0104, -0.1582,  0.4133, -0.1772],
        [ 0.3101,  0.0272, -0.2494, -0.3936,  0.2564],
        [ 0.1754, -0.3578, -0.4059,  0.0156,  0.1568],
        [ 0.2139, -0.2385,  0.0369,  0.1624, -0.2009],
        [-0.2840,  0.4319, -0.2960, -0.0567, -0.3004]])

In [62]:
nn.init.uniform_(layer.weight.data,a=0,b=3)

tensor([[2.5092, 1.0967, 2.9386, 0.9764, 0.8858],
        [1.8723, 2.6414, 2.0366, 1.0693, 1.6348],
        [2.1811, 2.1311, 0.0310, 2.7558, 1.5352],
        [2.3814, 0.7733, 1.7300, 0.1408, 0.7928],
        [0.2246, 0.4851, 1.8607, 0.0735, 1.9482]])

In [63]:
nn.init.normal_(layer.weight.data,mean=0.0,std=0.5)

tensor([[ 0.8696, -0.0596, -0.1976,  0.0530, -0.2032],
        [ 0.6278,  0.5116, -1.4021,  0.0337,  0.3694],
        [ 0.8219, -0.7480, -0.1982, -0.6042,  0.0783],
        [ 0.2367,  0.2941,  0.5171, -0.3176, -0.2010],
        [-0.2971, -0.5856, -0.7487, -0.3544, -0.3924]])

In [64]:
nn.init.constant_(layer.bias,0.01)

Parameter containing:
tensor([0.0100, 0.0100, 0.0100, 0.0100, 0.0100], requires_grad=True)

In [65]:
nn.init.xavier_uniform_(layer.weight,gain=1.0)

Parameter containing:
tensor([[-0.2141, -0.1989, -0.0594, -0.0816, -0.3463],
        [-0.6040,  0.6994, -0.0733, -0.0301, -0.1525],
        [-0.7204, -0.3050,  0.2056, -0.1578,  0.7322],
        [-0.2596, -0.6349,  0.5791, -0.4263,  0.5148],
        [-0.4516,  0.0395, -0.5011,  0.3450,  0.6295]], requires_grad=True)

In [66]:
nn.init.xavier_normal_(layer.weight,gain=1.0)

Parameter containing:
tensor([[ 0.5275,  0.1678,  0.0354,  0.1771, -0.6345],
        [-0.2036, -0.1495,  0.3824, -0.0824, -1.0282],
        [ 0.0452, -0.5883, -0.2087,  0.0676,  0.4895],
        [-0.2887,  0.0336, -0.8697, -0.5638, -0.1072],
        [ 0.0300,  0.0880, -0.9018,  0.5829,  0.1146]], requires_grad=True)