# CPE 487 587 Deep Learning for Engineering Applications
## Instructor: Rahul Bhadani
## Chapter 3: Introducing Neural Networks

In [2]:
import torch
import matplotlib.pyplot as plt
import numpy as np

# 1. Vector of Partial Derivatives using Torch

## Example 1: Single Point

$$F: \mathbb{R}^2 \to \mathbb{R}$$

$$
f(x, y) = x^2 + 3xy + y^2
$$

In [4]:
# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [15]:
# Value at which I will compute the gradient
x = torch.tensor(2.0, device=device, requires_grad=True)
y = torch.tensor(3.0, device=device, requires_grad=True)

In [16]:
f = x**2 + 3*x*y + y**2

In [17]:
print(f"\nFunction: f(x, y) = x^2 + 3xy + y^2")
print(f"Point: (x={x.item()}, y={y.item()})")
print(f"f(x, y) = {f.item():.4f}")


Function: f(x, y) = x^2 + 3xy + y^2
Point: (x=2.0, y=3.0)
f(x, y) = 31.0000


In [18]:
f

tensor(31., device='cuda:0', grad_fn=<AddBackward0>)

In [19]:
# Backward pass: compute gradients via autograd, you can only run this once!
f.backward()

In [20]:
print(f"\nAutomatic Differentiation Results:")
print(f"df/dx = {x.grad.item():.4f}")
print(f"df/dy = {y.grad.item():.4f}")
print(f"\nGradient vector grad f = [{x.grad.item():.4f}, {y.grad.item():.4f}]")


Automatic Differentiation Results:
df/dx = 13.0000
df/dy = 12.0000

Gradient vector grad f = [13.0000, 12.0000]


## Example 2: As a vector-valued input

In [5]:
x = torch.tensor([1.0, 2.0, 3.0], device=device, requires_grad=True)
f = x[0]**2 + x[1]*x[2] + torch.sin(x[0])

In [30]:
# Compute gradient via autograd
f.backward()

print(f"\nGradient of f via autograd:")
print(f"df/dx1 = {x.grad[0].item():.6f}")
print(f"df/dx2 = {x.grad[1].item():.6f}")
print(f"df/dx3 = {x.grad[2].item():.6f}")
print(f"\ngrad f = {x.grad.cpu().detach().numpy()}")

# Analytical verification
print(f"\nAnalytical verification:")
print(f"df/dx1 = 2x1 + cos(x1) = 2(1) + cos(1) = {2*1 + np.cos(1):.6f} ")
print(f"df/dx2 = x3 = {3:.6f} ")
print(f"df/dx3 = x2 = {2:.6f} ")


Gradient of f via autograd:
df/dx1 = 2.540302
df/dx2 = 3.000000
df/dx3 = 2.000000

grad f = [2.5403023 3.        2.       ]

Analytical verification:
df/dx1 = 2x1 + cos(x1) = 2(1) + cos(1) = 2.540302 
df/dx2 = x3 = 3.000000 
df/dx3 = x2 = 2.000000 


## Example 3: We are interested in computing gradient for  multiple points at once

$$F: \mathbb{R}^2 \to \mathbb{R}$$

$$ f(\mathbf{x}, \mathbf{y}) = \mathbf{x}^2 + \mathbf{y}^2$$

In [35]:
points = torch.tensor([
            [1.0, 2.0],
            [2.0, 3.0],
            [3.0, 1.0],
            [0.5, 1.5],
        ], device=device, requires_grad=True)

In [36]:
#f(x, y) = x^2 + y^2
f = (points[:, 0]**2 + points[:, 1]**2).sum()

### Why we need `sum()`

$$Loss = f_1(x_1, y_1) + f_2(x_2, y_2) + f_3(x_3, y_3)$$

$$\frac{\partial Loss}{\partial x_1} = \frac{\partial f_1}{\partial x_1} \quad \text{(only depends on first function!)}$$
$$\frac{\partial Loss}{\partial y_1} = \frac{\partial f_1}{\partial y_1}$$

$$\frac{\partial Loss}{\partial x_2} = \frac{\partial f_2}{\partial x_2} \quad \text{(only depends on second function!)}$$
$$\frac{\partial Loss}{\partial y_2} = \frac{\partial f_2}{\partial y_2}$$

$$\frac{\partial Loss}{\partial x_3} = \frac{\partial f_3}{\partial x_3} \quad \text{(only depends on third function!)}$$
$$\frac{\partial Loss}{\partial y_3} = \frac{\partial f_3}{\partial y_3}$$

In [37]:
print(f"\nFunction: f(x, y) = x^2 + y^2")
print(f"Batch of 4 points:")
print(points.cpu().detach().numpy())
        


Function: f(x, y) = x^2 + y^2
Batch of 4 points:
[[1.  2. ]
 [2.  3. ]
 [3.  1. ]
 [0.5 1.5]]


In [38]:
f.backward()

In [40]:
print(points.grad.cpu().detach().numpy())

[[2. 4.]
 [4. 6.]
 [6. 2.]
 [1. 3.]]


In [42]:
for i, (point, grad) in enumerate(zip(points.detach(), points.grad)):
            print(f"Point {i+1}: ({point[0].item():.1f}, {point[1].item():.1f}) --> "
                f"grad f = ({grad[0].item():.2f}, {grad[1].item():.2f})")


Point 1: (1.0, 2.0) --> grad f = (2.00, 4.00)
Point 2: (2.0, 3.0) --> grad f = (4.00, 6.00)
Point 3: (3.0, 1.0) --> grad f = (6.00, 2.00)
Point 4: (0.5, 1.5) --> grad f = (1.00, 3.00)


## Example 4: Jacobian Matrix - Vector-Valued Functions

$$F: \mathbb{R}^2 \to \mathbb{R}^3$$

$$f_1(x, y) = x^2 + y^2$$
$$f_2(x, y) = xy$$
$$f_3(x, y) = \sin(x) + \cos(y)$$

In [43]:
def F(points):
    # points is a tensor of shape (2,)
    x, y = points[0], points[1]
    
    f1 = x**2 + y**2
    f2 = x * y
    f3 = torch.sin(x) + torch.cos(y)
    
    return torch.stack([f1, f2, f3])

In [44]:
x = torch.tensor(1.0, device=device, requires_grad=True)
y = torch.tensor(2.0, device=device, requires_grad=True)

In [47]:
print(f"\nVector Function: F(x, y) = [x^2 + y^2, xy, sin(x) + cos(y)]")
print(f"Input: (x={x.item()}, y={y.item()})")

f_outputs = F(x, y)
print(f"F(x, y) = {f_outputs.cpu().detach().numpy()}")


Vector Function: F(x, y) = [x^2 + y^2, xy, sin(x) + cos(y)]
Input: (x=1.0, y=2.0)
F(x, y) = [5.        2.        0.4253242]


In [48]:
# Compute Jacobian by backprop for each output
jacobian = torch.zeros(3, 2, device=device)

In [50]:
for i in range(3):
    # Reset gradients
    if x.grad is not None:
        x.grad.zero_()
    if y.grad is not None:
        y.grad.zero_()
    
    # Recompute outputs
    f_outputs = F(x, y)
    
    # Backprop for i-th output
    f_outputs[i].backward()
    
    # Store gradients in Jacobian
    jacobian[i, 0] = x.grad
    jacobian[i, 1] = y.grad

print(f"\nJacobian Matrix J = dF/d(x,y):")
print(jacobian.cpu().detach().numpy())
print(f"\nInterpretation (rows=outputs, columns=inputs):")
print(f"df1/dx = {jacobian[0, 0].item():.4f},  df1/dy = {jacobian[0, 1].item():.4f}")
print(f"df2/dx = {jacobian[1, 0].item():.4f},  df2/dy = {jacobian[1, 1].item():.4f}")
print(f"df3/dx = {jacobian[2, 0].item():.4f},  df3/dy = {jacobian[2, 1].item():.4f}")



Jacobian Matrix J = dF/d(x,y):
[[ 2.          4.        ]
 [ 2.          1.        ]
 [ 0.5403023  -0.90929747]]

Interpretation (rows=outputs, columns=inputs):
df1/dx = 2.0000,  df1/dy = 4.0000
df2/dx = 2.0000,  df2/dy = 1.0000
df3/dx = 0.5403,  df3/dy = -0.9093


## Example 5: Computing Jacobian for multiple points for Vector-valued

$$F: \mathbb{R}^2 \to \mathbb{R}^3$$

$$f_1(\mathbf{x}, \mathbf{y}) = \mathbf{x}^2 + \mathbf{y}^2$$
$$f_2(\mathbf{x}, \mathbf{y}) = \mathbf{x}\mathbf{y}$$
$$f_3(\mathbf{x}, \mathbf{y}) = \sin(\mathbf{x}) + \cos(\mathbf{y})$$

In [29]:
from torch.func import vmap, jacrev

In [28]:
# Define your specific F: R^2 -> R^3
def F(points):
    # points is a tensor of shape (2,)
    x, y = points[0], points[1]
    
    f1 = x**2 + y**2
    f2 = x * y
    f3 = torch.sin(x) + torch.cos(y)
    
    return torch.stack([f1, f2, f3])

In [30]:
# Create a batch of (x, y) coordinates
# Shape: (Batch, 2)
batch_points = torch.tensor([
    [1.0, 2.0],
    [0.0, torch.pi],
    [0.5, 0.5]
], device=device)

# Compute the batch Jacobian
# jacrev(F) calculates the 3x2 matrix
# vmap handles the batch dimension
batch_jac = vmap(jacrev(F))(batch_points)

print(f"Batch Jacobian Shape: {batch_jac.shape}") # torch.Size([3, 3, 2])
print("Jacobian for the first point (1.0, 2.0):")
print(batch_jac[0])

Batch Jacobian Shape: torch.Size([3, 3, 2])
Jacobian for the first point (1.0, 2.0):
tensor([[ 2.0000,  4.0000],
        [ 2.0000,  1.0000],
        [ 0.5403, -0.9093]], device='cuda:0')


In [None]:
print("Jacobian for the second point:")
print(batch_jac[1])

Batch Jacobian Shape: torch.Size([3, 3, 2])
Jacobian for the second point:
tensor([[0.0000e+00, 6.2832e+00],
        [3.1416e+00, 0.0000e+00],
        [1.0000e+00, 8.7423e-08]], device='cuda:0')


In [32]:
print("Jacobian for the third point:")
print(batch_jac[2])

Jacobian for the third point:
tensor([[ 1.0000,  1.0000],
        [ 0.5000,  0.5000],
        [ 0.8776, -0.4794]], device='cuda:0')
