In [2]:

import torch
import torch.nn as nn
import torch.optim as optim

# Define a simple model
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(10, 5)
        self.fc2 = nn.Linear(5, 2)

    def forward(self, x):
        x = self.fc1(x)
        x = torch.relu(x)
        x = self.fc2(x)
        return x

# Instantiate model, loss function, and optimizer
model = SimpleModel()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Dummy data
inputs = torch.randn(32, 10)
labels = torch.randint(0, 2, (32,))

# Forward pass
outputs = model(inputs)
loss = loss_fn(outputs, labels)

# Backward pass
optimizer.zero_grad()
loss.backward()

# Check gradients
for name, param in model.named_parameters():
    if param.grad is None:
        print(f"No gradient for {name}")
    else:
        print(f"Gradient for {name}: {param.grad}")

# Ensure optimizer is linked to model parameters
optimizer = optim.Adam(model.parameters(), lr=0.001)
print(model.parameters())
# Training loop with gradient check
for epoch in range(5):
    model.train()
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = loss_fn(outputs, labels)
    loss.backward()
    optimizer.step()

    # Check gradients
    for name, param in model.named_parameters():
        if param.grad is None:
            print(f"No gradient for {name} after epoch {epoch}")
        else:
            print(f"Gradient for {name} after epoch {epoch}: {param.grad}")

Gradient for fc1.weight: tensor([[ 0.0154,  0.0078,  0.0122,  0.0068,  0.0234, -0.0251, -0.0100,  0.0292,
         -0.0118,  0.0124],
        [ 0.0030,  0.0111,  0.0062, -0.0031,  0.0653, -0.0474,  0.0029,  0.0271,
         -0.0054,  0.0041],
        [-0.0215,  0.0037, -0.0113,  0.0147,  0.0382, -0.0259,  0.0087,  0.0224,
         -0.0029, -0.0032],
        [-0.0089,  0.0059,  0.0003, -0.0087,  0.0096,  0.0038,  0.0018, -0.0015,
          0.0015, -0.0028],
        [ 0.0115,  0.0098,  0.0053, -0.0023, -0.0373,  0.0226, -0.0085, -0.0081,
          0.0111, -0.0003]])
Gradient for fc1.bias: tensor([ 0.0114,  0.0353,  0.0353,  0.0021, -0.0202])
Gradient for fc2.weight: tensor([[ 0.1071,  0.0866,  0.0399,  0.0201,  0.0583],
        [-0.1071, -0.0866, -0.0399, -0.0201, -0.0583]])
Gradient for fc2.bias: tensor([ 0.1718, -0.1718])
<generator object Module.parameters at 0x7f19ee39e340>
Gradient for fc1.weight after epoch 0: tensor([[ 0.0154,  0.0078,  0.0122,  0.0068,  0.0234, -0.0251, -0.0100, 