In [2]:
import torch
#inputs
x = torch.tensor(6.7)
y = torch.tensor(0.0) #target

w = torch.tensor(1.0) #weight
b = torch.tensor(0.0) #bias

In [3]:
def binary_cross_entropy_loss(prediction,target): # function for calculating loss
  epslion = 1e-8
  prediction = torch.clamp(prediction,epslion,1-epslion)
  return -target*torch.log(prediction)-(1-target)*torch.log(1-prediction)

In [4]:
z = w*x+b #linear transformation
y_pred = torch.sigmoid(z) #sigmoid func
loss = binary_cross_entropy_loss(y_pred,y) #loss func
print(loss)

tensor(6.7012)


In [7]:
#derivatives for dL/dw and dL/db
dloss_dy_pred = (y_pred-y)/(y_pred*(1-y_pred))
dy_pred_dz = y_pred*(1-y_pred)
dz_dw = x
dz_db = 1
dloss_dw = dloss_dy_pred*dy_pred_dz*dz_dw
dloss_db = dloss_dy_pred*dy_pred_dz*dz_db

print(f"Gradient w.r.t w: {dloss_dw.item():.4f}")
print(f"Gradient w.r.t b: {dloss_db.item():.4f}")

Gradient w.r.t w: 6.6918
Gradient w.r.t b: 0.9988


**Using Auto-grad method**

In [15]:
X = torch.tensor(6.7,requires_grad=True)
Y = torch.tensor(0.0) #target

In [13]:
w1 = torch.tensor(1.0,requires_grad=True) #weight
b1 = torch.tensor(0.0,requires_grad=True) #bias

In [14]:
w1,b1

(tensor(1., requires_grad=True), tensor(0., requires_grad=True))

In [16]:
z1 = w1*X+b1 #linear transformation
y_pred1 = torch.sigmoid(z1) #sigmoid func
loss1 = binary_cross_entropy_loss(y_pred1,Y) #loss func
print(loss1)

tensor(6.7012, grad_fn=<SubBackward0>)


In [17]:
loss1.backward()

In [20]:
print(w1.grad)

tensor(6.6918)


In [21]:
b1.grad

tensor(0.9988)

In [22]:
import torch

# ----- Step 1: Define input tensors -----
a = torch.tensor(4.0, requires_grad=True)
p = torch.tensor(2.0, requires_grad=True)
q = torch.tensor(1.0, requires_grad=True)

# ----- Step 2: Forward computation -----
intermediate = a * p + q    # like linear layer: z = ap + q
intermediate.retain_grad()  # allows us to access its gradient

z = intermediate ** 2       # final output

# ----- Step 3: Backward (compute gradients) -----
z.backward()  # triggers autograd

# ----- Step 4: Print gradients -----
print(f"∂z/∂a: {a.grad.item():.2f}")
print(f"∂z/∂p: {p.grad.item():.2f}")
print(f"∂z/∂q: {q.grad.item():.2f}")
print(f"∂z/∂intermediate: {intermediate.grad.item():.2f}")  # thanks to retain_grad()

# ----- Step 5: Clear gradients for next pass -----
a.grad.zero_()
p.grad.zero_()
q.grad.zero_()

# ----- Step 6: Detach intermediate for logging (no gradient tracking) -----
detached_intermediate = intermediate.detach()
print(f"Detached intermediate: {detached_intermediate.item():.2f}")

# ----- Step 7: Do inference without tracking gradients -----
with torch.no_grad():
    new_output = (a * p + q) ** 2
    print(f"New output without gradient tracking: {new_output.item():.2f}")


∂z/∂a: 36.00
∂z/∂p: 72.00
∂z/∂q: 18.00
∂z/∂intermediate: 18.00
Detached intermediate: 9.00
New output without gradient tracking: 81.00
