In [1]:
# !pip install torchmetrics torch-summary
!pip install tqdm

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [2]:
import torch.nn as nn
import torch
from torchvision import datasets, transforms
from torchsummary import summary
from torchmetrics import Accuracy
from tqdm import tqdm
import matplotlib.pyplot as plt

In [3]:
print("PyTorch version:", torch.__version__)

PyTorch version: 1.11.0a0+bfe5ad2


In [4]:
# !pip install -q netron
# !curl --output smartreply.zip https://storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip
# !unzip smartreply.zip

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

Using device: cuda


In [6]:
class BasicBlock(nn.Module):
  def __init__(self, in_channels, out_channels, stride):
    super().__init__()
    self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
    self.bn = nn.BatchNorm2d(out_channels)
    self.relu = nn.ReLU(inplace=True)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn(x)
    x = self.relu(x)
    return x

In [7]:
height = 32
width= 32
in_channels = 16
fake_input = torch.rand(1,3,height,width)

In [8]:
block = BasicBlock(3,in_channels,1)
out = block(fake_input)
assert out.shape == (1,in_channels,height,width), f"Unexpented shape of {out.shape}"

In [9]:
block = BasicBlock(3,16,2)
out = block(fake_input)
assert out.shape == (1,in_channels,height//2,width//2), f"Unexpented shape of {out.shape}"

In [10]:
class PlainNetwork(nn.Module):
  def __init__(self, n):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1, stride=1)
  
    self.layer1 = self.make_layer(16, out_channels=16, stride=1, n_layers=2*n)
    self.layer2 = self.make_layer(16, out_channels=32, stride=2, n_layers=2*n)
    self.layer3 = self.make_layer(32, out_channels=64, stride=2, n_layers=2*n)
    self.global_pooling = nn.AdaptiveAvgPool2d((1,1))
    self.linear = nn.Linear(64, 10)

  def make_layer(self, in_channels, out_channels, stride, n_layers):
    layer = []
    layer.append(BasicBlock(in_channels, out_channels, stride))
    for i in range(n_layers-1):
      layer.append(BasicBlock(out_channels, out_channels, 1))
    return nn.Sequential(*layer)

  def forward(self, x):
    x = self.conv1(x)
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.global_pooling(x)
    x = torch.flatten(x, 1)
    x = self.linear(x)
    return x

In [11]:
plainet20 = PlainNetwork(n=3)
plainet20 = plainet20.to(device)
summary(plainet20, (3,32,32))

Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 16, 32, 32]          448
├─Sequential: 1-2                        [-1, 16, 32, 32]          --
|    └─BasicBlock: 2-1                   [-1, 16, 32, 32]          --
|    |    └─Conv2d: 3-1                  [-1, 16, 32, 32]          2,320
|    |    └─BatchNorm2d: 3-2             [-1, 16, 32, 32]          32
|    |    └─ReLU: 3-3                    [-1, 16, 32, 32]          --
|    └─BasicBlock: 2-2                   [-1, 16, 32, 32]          --
|    |    └─Conv2d: 3-4                  [-1, 16, 32, 32]          2,320
|    |    └─BatchNorm2d: 3-5             [-1, 16, 32, 32]          32
|    |    └─ReLU: 3-6                    [-1, 16, 32, 32]          --
|    └─BasicBlock: 2-3                   [-1, 16, 32, 32]          --
|    |    └─Conv2d: 3-7                  [-1, 16, 32, 32]          2,320
|    |    └─BatchNorm2d: 3-8             [-1, 16, 32, 32]          32
|    

Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 16, 32, 32]          448
├─Sequential: 1-2                        [-1, 16, 32, 32]          --
|    └─BasicBlock: 2-1                   [-1, 16, 32, 32]          --
|    |    └─Conv2d: 3-1                  [-1, 16, 32, 32]          2,320
|    |    └─BatchNorm2d: 3-2             [-1, 16, 32, 32]          32
|    |    └─ReLU: 3-3                    [-1, 16, 32, 32]          --
|    └─BasicBlock: 2-2                   [-1, 16, 32, 32]          --
|    |    └─Conv2d: 3-4                  [-1, 16, 32, 32]          2,320
|    |    └─BatchNorm2d: 3-5             [-1, 16, 32, 32]          32
|    |    └─ReLU: 3-6                    [-1, 16, 32, 32]          --
|    └─BasicBlock: 2-3                   [-1, 16, 32, 32]          --
|    |    └─Conv2d: 3-7                  [-1, 16, 32, 32]          2,320
|    |    └─BatchNorm2d: 3-8             [-1, 16, 32, 32]          32
|    

In [12]:
# convert to onnx format
model_path = "./plainnet.pth"
_ = torch.onnx.export(plainet20.to(device), fake_input.to(device), model_path)

In [13]:
# import netron
# import portpicker
# from google.colab import output

# port = portpicker.pick_unused_port()

# # Read the model file and start the netron browser.
# with output.temporary():
#   netron.start(model_path, port, browse=False)

# output.serve_kernel_port_as_iframe(port, height='800')

In [14]:
## training parameters
weight_decay = 0.0001
momentum = 0.9
batch_size = 128
learning_rate = 0.1

In [15]:
## We follow the simple data augmen-tation in [24] for training: 4 pixels
## are padded on each side, and  a  32×32  crop  is  randomly  sampled  from  
## the  padded image or its horizontal flip.  For testing, we only evaluate the
## single view of the original 32×32 image

train_augmentations = transforms.Compose([
     transforms.RandomHorizontalFlip(p=0.5),
     transforms.RandomCrop(size=32, padding=4, fill=0, padding_mode='constant'),
     transforms.ToTensor()])

test_augmentations = transforms.Compose([
    transforms.ToTensor()
])

In [16]:
train_dataset = datasets.CIFAR10(root=".", train = True, download=True, transform=train_augmentations)
test_dataset = datasets.CIFAR10(root=".", train=False, transform=test_augmentations)

Files already downloaded and verified


In [17]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, 
                                           shuffle=True, sampler=None, drop_last=True, 
                                           num_workers=0)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, 
                                           shuffle=False, sampler=None, num_workers=0)

In [18]:
total_epochs = round(64_000 / len(train_loader))
lr_step1 = round(32_000 / len(train_loader))
lr_step2 = round(48_000 / len(train_loader))
print(f"Train for {total_epochs} epochs.")
print(f"Learning rate decreases at epochs {lr_step1} and {lr_step2}")

Train for 164 epochs.
Learning rate decreases at epochs 82 and 123


In [19]:
optimizer = torch.optim.SGD(plainet20.parameters(), lr=learning_rate, 
                            weight_decay=weight_decay, momentum=momentum)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[lr_step1, lr_step2], gamma=0.1)

In [20]:
class Trainer:
    
    def __init__(self, epochs, optimizer, lr_scheduler, device):
        self.epochs = epochs
        self.optimizer = optimizer
        self.lr_scheduler = lr_scheduler
        self.accuracy = Accuracy(num_classes=10).to(device)
        self.training_error = []
        self.testing_error = []
        self.loss_fn = nn.CrossEntropyLoss().to(device)
        
    def fit(self, model, train_loader, test_loader):
        iter = 0
        for epoch in tqdm(range(self.epochs)):
            train_error = 0

            model.train()
            for x_batch, y_batch in train_loader:
                x_batch = x_batch.to(device)
                y_batch = y_batch.to(device)

                logits = model(x_batch)
                loss = self.loss_fn(logits, y_batch)
                
                train_acc = self.accuracy(logits, y_batch)
                train_error += (1 - train_acc)
                
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                # if iter % 10 == 0:
                #   print(f"Epoch: {epoch}\tLoss: {loss}")

                iter += 1

            self.lr_scheduler.step()
            train_error /= len(train_loader)
            self.training_error.append(train_error.item())

            model.eval()
            test_error = 0

            with torch.no_grad():
                for x_batch, y_batch in test_loader:
                    x_batch = x_batch.to(device)
                    y_batch = y_batch.to(device)
                    logits = model(x_batch)

                    test_acc = self.accuracy(logits, y_batch)
                    test_error += (1 - test_acc)

                test_error /= len(test_loader)
                self.testing_error.append(test_error.item())

In [21]:
trainer = Trainer(total_epochs, optimizer, scheduler, device)

In [None]:
trainer.fit(plainet20, train_loader, test_loader)

 32%|███▏      | 53/164 [15:06<31:23, 16.97s/it]

In [None]:
fig, ax = plt.subplots()
line1, = ax.plot(trainer.training_error, label="Line 1", color="red", linestyle='--')
line2, = ax.plot(trainer.testing_error, label="Line 2", color="red", linewidth=2)

# Create a legend for the first line.
first_legend = ax.legend(handles=[line1], loc='upper right')

# Add the legend manually to the Axes.
ax.add_artist(first_legend)

plt.show()