In [None]:
import torch
import torch.nn as nn

In [None]:
def conv_block(num_channels):
  return nn.Sequential(
      nn.LazyBatchNorm2d(),nn.ReLU(),
      nn.LazyConv2d(num_channels,kernel_size=3,padding=1)
  )

In [None]:
class DenseBlock(nn.Module):
  def __init__(self,num_convs,num_channels):
    """
    num_convs: it is the number of blocks we want
    num_channels: growth rate, it is the k.
    """
    super(DenseBlock,self).__init__()
    layer = []
    for i in range(num_convs):
      layer.append(conv_block(num_channels=num_channels))
    self.net = nn.Sequential(*layer)

  def forward(self,X):
    for blk in self.net:
      Y = blk(X)
      # concatenate input and output of each block along the channels
      X = torch.cat((X,Y),dim=1)
    return X

In [None]:
def transition_block(num_channels):
  return nn.Sequential(
      nn.LazyBatchNorm2d(),nn.ReLU(),
      nn.LazyConv2d(num_channels,kernel_size=1),
      nn.AvgPool2d(kernel_size=2,stride=2)
  )

In [None]:
class DenseNet(nn.Module):

  def __init__(self,num_channels=64,growth_rate=32,arch=(4,4,4,4),num_classes=10):
    super(DenseNet, self).__init__()
    self.net = nn.Sequential(self.block1())
    for i, num_convs in enumerate(arch):
      self.net.add_module(f"dense_blk{i+1}",DenseBlock(num_convs,growth_rate))
      # num_channels += num_convs*growth_rate

      # if i != len(arch)-1:
      #   num_channels//=2
      #   self.net.add_module(f"tran_blk{i+1}",transition_block(num_channels))
    self.net.add_module('last', nn.Sequential(
        nn.LazyBatchNorm2d(), nn.ReLU(),
        nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten(),
        nn.LazyLinear(num_classes)))
  def block1(self):
    return nn.Sequential(
        nn.LazyConv2d(64,kernel_size=7,stride=2,padding=3),
        nn.LazyBatchNorm2d(), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
    )
  def forward(self,X):
    return self.net(X)

In [None]:
import torchvision
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader

In [None]:
transform = transforms.Compose([
    transforms.Resize((96,96)),
    transforms.ToTensor(),
    transforms.Normalize((0,),(1,))
])

In [None]:
train_data = MNIST(root="./root",train=True,transform=transform,download=True)
train_loader = DataLoader(train_data,64,shuffle=True)

In [None]:
test_data = MNIST(root="./root",train=False,transform=transform,download=True)
test_loader = DataLoader(test_data,64,shuffle=True)

In [None]:
import torch.optim as optim

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def training_loop(model, loss_fn, optimizer, epochs, loader):
    model.train()
    for ep in range(epochs):
        for feature, target in loader:
            feature, target = feature.to(device), target.to(device)
            optimizer.zero_grad()
            y_pred = model(feature)
            loss = loss_fn(y_pred, target)
            loss.backward()
            optimizer.step()
        print(f"Epoch {ep+1}, loss: {loss.item()}")

def testing_loop(model, loss_fn, loader):
    model.eval() # Set the model to evaluation mode
    with torch.no_grad():
        for feature, target in loader:
            feature, target = feature.to(device), target.to(device)
            prediction = model(feature)
            loss = loss_fn(prediction, target)
        print(f"Test loss: {loss.item()}")

In [None]:
model = DenseNet()
model.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr = 1e-4)
training_loop(model,loss_fn,optimizer,10,train_loader)

Epoch 1, loss: 0.1086050346493721
Epoch 2, loss: 0.07311496138572693
Epoch 3, loss: 0.2029210478067398
Epoch 4, loss: 0.0790981650352478
Epoch 5, loss: 0.028374947607517242
Epoch 6, loss: 0.02086241915822029
Epoch 7, loss: 0.08081139624118805
Epoch 8, loss: 0.03718626871705055
Epoch 9, loss: 0.009528332389891148
Epoch 10, loss: 0.007580135948956013


In [None]:
testing_loop(model,loss_fn,test_loader)

Test loss: 0.0008846366545185447


In [None]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for feature, target in test_loader:
        feature, target = feature.to(device), target.to(device)
        prediction = model(feature)
        loss = loss_fn(prediction, target)
        _, predicted = torch.max(prediction.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Test Loss: {loss.item():.4f}")
print(f"Accuracy: {100 * correct / total:.2f}%")

Test Loss: 0.0142
Accuracy: 99.16%
