In [38]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Data handling
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
import torchvision
import torchvision.transforms as T

# Utility libraries
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import random

In [1]:
def  dy_dx(x):
    return 2*x

In [2]:
dy_dx(3)

6

## Autograd is a core component of pytroch that provies automatic diffrentation for tensor operations .it enables gradient compuatation , which is  essentail for traning machine learning models using optmisation algo  like gradient descent 

In [3]:
import torch

In [4]:
x=torch.tensor(3.0,requires_grad=True)

In [5]:
y=x**2

In [6]:
x

tensor(3., requires_grad=True)

In [7]:
y

tensor(9., grad_fn=<PowBackward0>)

In [8]:
y.backward()

In [9]:
x.grad

tensor(6.)

In [12]:
import math
def dz_dx(x):
    return 2*x*math.cos(x**2)


In [13]:
dz_dx(3)

-5.466781571308061

In [16]:
x=torch.tensor(3.0,requires_grad=True)

In [17]:
y=x**2


In [18]:
z=torch.sin(y)

In [19]:
x

tensor(3., requires_grad=True)

In [20]:
y

tensor(9., grad_fn=<PowBackward0>)

In [21]:
z

tensor(0.4121, grad_fn=<SinBackward0>)

In [22]:
z.backward()

In [24]:
x.grad

tensor(-5.4668)

In [25]:
x=torch.tensor(6.7)

In [26]:
y=torch.tensor(0.0)


In [28]:
w=torch.tensor(1.0,requires_grad=True)
b=torch.tensor(0.0,requires_grad=True)


In [29]:
w

tensor(1., requires_grad=True)

In [30]:
b

tensor(0., requires_grad=True)

In [31]:
z=w*x+b

In [32]:
z

tensor(6.7000, grad_fn=<AddBackward0>)

In [35]:
y_pred=torch.sigmoid(z)
y_pred

tensor(0.9988, grad_fn=<SigmoidBackward0>)

In [40]:
bce_loss = nn.BCELoss()
loss=bce_loss(y_pred,y)
loss


tensor(6.7012, grad_fn=<BinaryCrossEntropyBackward0>)

In [41]:
loss.backward()

In [42]:
print(w.grad)
print(b.grad)

tensor(6.6918)
tensor(0.9988)


In [None]:
#when preidction time there is no need of backword pass 

#option 1 -require_grad_(False)
#option 2-detach()
#ption 3 -torch.no_grad()

In [43]:
import torch

# --- 1. Creating Tensors with requires_grad=True ---
# Tensors that require gradients will track operations performed on them.
# This is typically set for model parameters (weights and biases).
print("--- Creating Tensors ---")
x = torch.tensor(2.0, requires_grad=True)
y = torch.tensor(3.0, requires_grad=True)
print(f"Tensor x: {x}")
print(f"Tensor y: {y}")
print("-" * 20)

# --- 2. Performing Operations and Building the Computational Graph ---
# PyTorch automatically builds a dynamic computational graph as you perform operations
# on tensors that require gradients.
print("--- Performing Operations ---")
z = x * y + x**2
print(f"Tensor z = x * y + x^2: {z}")
# Notice that z has a grad_fn, indicating it's part of the graph
print(f"Gradient function for z: {z.grad_fn}")
print("-" * 20)

# Let's add another operation
out = z.mean()
print(f"Tensor out = mean(z): {out}")
print(f"Gradient function for out: {out.grad_fn}")
print("-" * 20)

# --- 3. Computing Gradients with .backward() ---
# When you call .backward() on a scalar tensor (like our 'out'),
# PyTorch computes the gradients of that tensor with respect to all
# tensors that have requires_grad=True and are part of the graph.
# The gradients are accumulated in the .grad attribute of these tensors.
print("--- Computing Gradients ---")
out.backward()

# --- 4. Accessing the Computed Gradients ---
# The gradients are now available in the .grad attribute.
# Let's manually verify the gradients:
# z = x*y + x^2
# out = mean(z) (since z is a scalar here, mean(z) = z)
# So, out = x*y + x^2
# d(out)/dx = y + 2*x
# d(out)/dy = x
# At x=2.0, y=3.0:
# d(out)/dx = 3.0 + 2*2.0 = 3.0 + 4.0 = 7.0
# d(out)/dy = 2.0

print(f"Gradient of out with respect to x (x.grad): {x.grad}")
print(f"Gradient of out with respect to y (y.grad): {y.grad}")
print("-" * 20)

# --- Example with a non-scalar tensor and .backward(gradient) ---
print("--- Non-scalar Backward Example ---")
a = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
b = a * 2
print(f"Tensor a: {a}")
print(f"Tensor b = a * 2: {b}")

# b is not a scalar. To compute gradients, we need to pass a gradient argument
# to .backward(). This argument is a tensor of the same shape as 'b' and
# represents the gradient of some scalar loss with respect to 'b'.
# A common case is the gradient of the sum of 'b' with respect to 'b',
# which is a tensor of ones.
gradient_of_sum = torch.tensor([1.0, 1.0, 1.0])
b.backward(gradient=gradient_of_sum)

# d(sum(b))/da_i = d(a_i * 2 + sum(a_j * 2 for j!=i))/da_i = 2
print(f"Gradient of sum(b) with respect to a (a.grad): {a.grad}")
print("-" * 20)


# --- 5. Disabling Gradient Tracking ---
# You might want to disable gradient tracking during inference or when
# updating model weights to save memory and computation.

print("--- Disabling Gradient Tracking ---")
m = torch.tensor(5.0, requires_grad=True)

# Method 1: Using torch.no_grad() context manager
with torch.no_grad():
    n = m * 10
print(f"Tensor n (created inside no_grad): {n}")
print(f"Does n require grad? {n.requires_grad}")

# Method 2: Using .detach()
p = m * 10
q = p.detach()
print(f"Tensor p (original): {p}")
print(f"Does p require grad? {p.requires_grad}")
print(f"Tensor q (detached from p): {q}")
print(f"Does q require grad? {q.requires_grad}")
print("-" * 20)

# Note: Gradients are accumulated. If you run backward multiple times
# on the same tensors without zeroing the gradients, they will add up.
# In training loops, you typically zero gradients before each backward pass
# using optimizer.zero_grad() or tensor.grad.zero_().


--- Creating Tensors ---
Tensor x: 2.0
Tensor y: 3.0
--------------------
--- Performing Operations ---
Tensor z = x * y + x^2: 10.0
Gradient function for z: <AddBackward0 object at 0x306abbe50>
--------------------
Tensor out = mean(z): 10.0
Gradient function for out: <MeanBackward0 object at 0x306abbe50>
--------------------
--- Computing Gradients ---
Gradient of out with respect to x (x.grad): 7.0
Gradient of out with respect to y (y.grad): 2.0
--------------------
--- Non-scalar Backward Example ---
Tensor a: tensor([1., 2., 3.], requires_grad=True)
Tensor b = a * 2: tensor([2., 4., 6.], grad_fn=<MulBackward0>)
Gradient of sum(b) with respect to a (a.grad): tensor([2., 2., 2.])
--------------------
--- Disabling Gradient Tracking ---
Tensor n (created inside no_grad): 50.0
Does n require grad? False
Tensor p (original): 50.0
Does p require grad? True
Tensor q (detached from p): 50.0
Does q require grad? False
--------------------
