In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define a simple CNN
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Load and preprocess CIFAR-10 dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

# Initialize the network and optimizer
net = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Training loop
for epoch in range(2):  # Just 2 epochs for demonstration
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        
        # Print gradients for different layers
        print(f"Epoch {epoch}, Batch {i}")
        print(f"Conv1 grad norm: {net.conv1.weight.grad.norm().item():.4f}")
        print(f"FC3 grad norm: {net.fc3.weight.grad.norm().item():.4f}")
        print("---")
        
        optimizer.step()
        running_loss += loss.item()
        if i % 2000 == 1999:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


100.0%


Extracting ./data\cifar-10-python.tar.gz to ./data
Epoch 0, Batch 0
Conv1 grad norm: 0.0876
FC3 grad norm: 0.1847
---
Epoch 0, Batch 1
Conv1 grad norm: 0.0545
FC3 grad norm: 0.1753
---
Epoch 0, Batch 2
Conv1 grad norm: 0.0515
FC3 grad norm: 0.1771
---
Epoch 0, Batch 3
Conv1 grad norm: 0.0704
FC3 grad norm: 0.2492
---
Epoch 0, Batch 4
Conv1 grad norm: 0.0754
FC3 grad norm: 0.1830
---
Epoch 0, Batch 5
Conv1 grad norm: 0.0864
FC3 grad norm: 0.1874
---
Epoch 0, Batch 6
Conv1 grad norm: 0.0452
FC3 grad norm: 0.1680
---
Epoch 0, Batch 7
Conv1 grad norm: 0.0711
FC3 grad norm: 0.2960
---
Epoch 0, Batch 8
Conv1 grad norm: 0.0828
FC3 grad norm: 0.2528
---
Epoch 0, Batch 9
Conv1 grad norm: 0.0676
FC3 grad norm: 0.1861
---
Epoch 0, Batch 10
Conv1 grad norm: 0.0668
FC3 grad norm: 0.1846
---
Epoch 0, Batch 11
Conv1 grad norm: 0.0812
FC3 grad norm: 0.2730
---
Epoch 0, Batch 12
Conv1 grad norm: 0.0703
FC3 grad norm: 0.2361
---
Epoch 0, Batch 13
Conv1 grad norm: 0.0608
FC3 grad norm: 0.2367
---
Epoch 0

KeyboardInterrupt: 