In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

from torch.utils.tensorboard import SummaryWriter

torch.set_printoptions(linewidth=120)

In [2]:
print("torch ", torch.__version__)
print("torchvision ", torchvision.__version__)


torch  2.4.1+cpu
torchvision  0.19.1+cpu


In [3]:
train_set = torchvision.datasets.FashionMNIST(
    root= './data',
    download= True,
    train= True,
    transform = transforms.Compose([
        transforms.ToTensor()
    ])
)

In [4]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=1000, shuffle=True)

In [5]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [6]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)

    def forward(self, t):
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        t = t.reshape(-1, 12*4*4)
        t = F.relu(self.fc1(t))
        t = F.relu(self.fc2(t))
        t = self.out(t)
        
        return t

In [7]:
network = Network()

In [8]:
network

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)

In [9]:
optimizer = optim.Adam(network.parameters(), lr=0.01)

In [10]:
for epoch in range(20):
    total_loss = 0
    total_correct = 0

    for batch in train_loader:
        images, labels = batch

        preds = network(images)
        loss = F.cross_entropy(preds, labels)
        optimizer.zero_grad()
        
        """
        The "optimizer.zero_grad()" should come before calling "loss.backward()" to avoid accumulating gradients across iterations.
        This causes the gradients to accumulate instead of resetting them before each backward pass.
        Zeroing Gradients Before "backward()" ensures that gradients are reset properly before computing new ones.
        """
        
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)
        
    print(f"epoch {epoch} total_loss {total_loss:.3f} total_correct {total_correct}")

    

epoch 0 total_loss 56.776 total_correct 38592
epoch 1 total_loss 29.556 total_correct 48542
epoch 2 total_loss 24.761 total_correct 50835
epoch 3 total_loss 21.771 total_correct 51876
epoch 4 total_loss 20.267 total_correct 52436
epoch 5 total_loss 18.809 total_correct 52981
epoch 6 total_loss 18.217 total_correct 53100
epoch 7 total_loss 17.135 total_correct 53533
epoch 8 total_loss 16.656 total_correct 53780
epoch 9 total_loss 16.217 total_correct 53911
epoch 10 total_loss 15.567 total_correct 54075
epoch 11 total_loss 15.510 total_correct 54096
epoch 12 total_loss 15.032 total_correct 54205
epoch 13 total_loss 14.691 total_correct 54394
epoch 14 total_loss 13.815 total_correct 54788
epoch 15 total_loss 13.656 total_correct 54794
epoch 16 total_loss 13.469 total_correct 54890
epoch 17 total_loss 12.785 total_correct 55077
epoch 18 total_loss 13.110 total_correct 55050
epoch 19 total_loss 12.892 total_correct 55033


# Starting out with TensorBoard (Network graph and images)

In [11]:
tb = SummaryWriter("runs/FashionMNIST")

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)

network = Network()
images, labels = next(iter(train_loader))

grid = torchvision.utils.make_grid(images)

tb.add_image('images', grid)
tb.add_graph(network, images)
tb.close()

## Training loop with tensorboard

In [17]:
network = Network()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)
optimizer = optim.Adam(network.parameters(), lr=0.01)

images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb = SummaryWriter("runs/FashionMNIST/trained model")
tb.add_image("FashionMNIST Images", grid)
tb.add_graph(network, images)

for epoch in range(10):
    total_loss = 0
    total_correct = 0

    for batch in train_loader:
        images, labels = batch
        preds = network(images)
        loss = F.cross_entropy(preds, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    tb.add_scalar("Loss", total_loss, epoch)
    tb.add_scalar("Number Correct", total_correct, epoch)
    tb.add_scalar("Accuracy", total_correct/len(train_set), epoch)

    tb.add_histogram("conv1.bias", network.conv1.bias, epoch)
    tb.add_histogram("conv1.weight", network.conv1.weight, epoch)
    tb.add_histogram("conv1.weight.grad", network.conv1.weight.grad, epoch)

    print(f"epoch {epoch} total_loss {total_loss:.3f} total_correct {total_correct}" )

epoch 0 total_loss 339.326 total_correct 47089
epoch 1 total_loss 229.458 total_correct 51503
epoch 2 total_loss 212.701 total_correct 52154
epoch 3 total_loss 203.097 total_correct 52431
epoch 4 total_loss 196.390 total_correct 52663
epoch 5 total_loss 187.156 total_correct 53071
epoch 6 total_loss 185.296 total_correct 53097
epoch 7 total_loss 182.877 total_correct 53260
epoch 8 total_loss 179.265 total_correct 53445
epoch 9 total_loss 179.320 total_correct 53434
