In [2]:
import torch
import torch.nn as nn

class SimplifiedModel(nn.Module):
    def __init__(self):
        super(SimplifiedModel, self).__init__()
        self.params = nn.ParameterList([nn.Parameter(torch.randn(1)) for _ in range(6)])

    def forward(self, x):
        β0, ω0, β1, ω1, β2, ω2 = self.params
        return β2 + ω2 * torch.sin(β1 + ω1 * torch.sin(β0 + ω0 * x))

# Create model and data
model = SimplifiedModel()
x = torch.randn(100, 1)
y = torch.randn(100, 1)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Training loop
for epoch in range(100):
    # Forward pass
    y_pred = model(x)
    loss = criterion(y_pred, y)
    
    # Backward pass (both #1 and #2 are handled internally)
    optimizer.zero_grad()
    loss.backward()
    
    # Print gradients
    if epoch == 0:
        for i, param in enumerate(model.parameters()):
            print(f'Gradient for parameter {i}: {param.grad.item()}')
    
    # Update parameters
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

Gradient for parameter 0: 0.09284305572509766
Gradient for parameter 1: -0.1896030455827713
Gradient for parameter 2: 0.04433836042881012
Gradient for parameter 3: 0.058229852467775345
Gradient for parameter 4: 0.9315991401672363
Gradient for parameter 5: 0.2435024380683899
Epoch 0, Loss: 1.2287806272506714
Epoch 10, Loss: 1.1466872692108154
Epoch 20, Loss: 1.0903394222259521
Epoch 30, Loss: 1.0514551401138306
Epoch 40, Loss: 1.0244476795196533
Epoch 50, Loss: 1.0055450201034546
Epoch 60, Loss: 0.9921976327896118
Epoch 70, Loss: 0.9826788306236267
Epoch 80, Loss: 0.9758155345916748
Epoch 90, Loss: 0.9708074331283569
Epoch 100, Loss: 0.9671062231063843
Epoch 110, Loss: 0.9643340110778809
Epoch 120, Loss: 0.962228536605835
Epoch 130, Loss: 0.9606065154075623
Epoch 140, Loss: 0.9593387842178345
Epoch 150, Loss: 0.9583333730697632
Epoch 160, Loss: 0.9575241208076477
Epoch 170, Loss: 0.9568633437156677
Epoch 180, Loss: 0.9563159346580505
Epoch 190, Loss: 0.9558557271957397
Epoch 200, Loss: 

In [3]:
import torch

# Define a simple layer
def layer(β, ω, h):
    return β + ω * h

# Create some example parameters and input
β = torch.tensor([2.0], requires_grad=True)
ω = torch.tensor([3.0], requires_grad=True)
h = torch.tensor([4.0])

# Compute the layer output
f = layer(β, ω, h)

# Compute gradients
f.backward()

print(f"∂f/∂β: {β.grad.item()}")  # Should be 1
print(f"∂f/∂ω: {ω.grad.item()}")  # Should be 4 (which is h)

∂f/∂β: 1.0
∂f/∂ω: 4.0


In [1]:
import torch
import torch.nn as nn

class TwoLayerNet(nn.Module):
    def __init__(self):
        super(TwoLayerNet, self).__init__()
        self.layer1 = nn.Linear(1, 5)
        self.layer2 = nn.Linear(5, 1)
    
    def forward(self, x):
        x = torch.relu(self.layer1(x))
        return self.layer2(x)

# Create model and data
model = TwoLayerNet()
x = torch.linspace(0, 10, 100).reshape(-1, 1)
y_true = 2 * torch.sin(x) + 1 + 0.1 * torch.randn(x.shape)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Training loop
for epoch in range(1000):
    # Forward pass
    y_pred = model(x)
    loss = criterion(y_pred, y_true)
    
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    
    # Print gradients every 100 epochs
    if epoch % 100 == 0:
        print(f'Epoch {epoch}:')
        print(f'Loss: {loss.item():.4f}')
        for name, param in model.named_parameters():
            print(f'Gradient for {name}: {param.grad.mean().item():.4f}')
        print()
    
    # Update parameters
    optimizer.step()

# Print final parameters
for name, param in model.named_parameters():
    print(f'Final {name} mean: {param.mean().item():.4f}')

Epoch 0:
Loss: 5.7670
Gradient for layer1.weight: 1.2104
Gradient for layer1.bias: 0.3221
Gradient for layer2.weight: -1.2783
Gradient for layer2.bias: -3.9672

Epoch 100:
Loss: 1.7621
Gradient for layer1.weight: -0.0438
Gradient for layer1.bias: -0.0295
Gradient for layer2.weight: -0.0730
Gradient for layer2.bias: -0.3772

Epoch 200:
Loss: 1.6288
Gradient for layer1.weight: -0.0172
Gradient for layer1.bias: -0.0327
Gradient for layer2.weight: -0.0587
Gradient for layer2.bias: 0.0190

Epoch 300:
Loss: 1.5363
Gradient for layer1.weight: 0.0002
Gradient for layer1.bias: -0.0243
Gradient for layer2.weight: -0.0466
Gradient for layer2.bias: 0.0843

Epoch 400:
Loss: 1.5053
Gradient for layer1.weight: 0.0068
Gradient for layer1.bias: -0.0180
Gradient for layer2.weight: -0.0261
Gradient for layer2.bias: 0.0557

Epoch 500:
Loss: 1.4922
Gradient for layer1.weight: -0.0045
Gradient for layer1.bias: -0.0165
Gradient for layer2.weight: -0.0121
Gradient for layer2.bias: 0.0348

Epoch 600:
Loss: 1.4