In this example, Pu tried to improve the model's performance on pixel error and color error. Pu did so by improving:
1. Increase the model complexity by adding one more CNN layer.
2. Model architecture by using batch norm.
3. Introduce the learning rate scheduler. 
4. Add dropout layer to avoid overfitting.

In [2]:
import torch
import torch.nn as nn
import torch.optim.lr_scheduler as lr_scheduler
from skimage.draw import disk, rectangle
import matplotlib.pyplot as plt

# Setup the device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Function for generating train/test data
def generate_data(width=32, radius_range=(3, 7), noise_range=(0, 0)):
    img = torch.FloatTensor(3, width, width).uniform_(0, 1)
    rad = torch.randint(low=radius_range[0], high=radius_range[1], size=(1,))
    circle_pt = torch.FloatTensor(2).uniform_(rad.item(), width - rad.item())
    circle_mask = disk(circle_pt.tolist(), rad.item(), shape=(width, width))
    clr = torch.randint(3, size=(1,)).item()
    noise_lvl = torch.FloatTensor(1).uniform_(*noise_range).item()
    img[clr][circle_mask] = (1 - img[clr][circle_mask] * noise_lvl)
    return img, torch.concat((rad, circle_pt)), clr

# Metrics calculation
def compute_pixel_error(pred, label):
    return torch.mean(torch.abs(pred - label)).item()

def compute_color_accuracy(pred, label):
    return torch.mean(pred == label, dtype=torch.float)

# Training loop
def model_training(train_ldr, test_ldr, model, shape_loss_fn, clr_loss_fn, opt, sched, num_epochs=1000):
    for epoch in range(num_epochs):
        model.train()
        train_loss, _, _ = one_epoch(model, opt, shape_loss_fn, clr_loss_fn, train_ldr)
        sched.step()
        if epoch in [0, num_epochs // 2, num_epochs - 1]:
            model.eval()
            test_loss, pixel_error, clr_accuracy = one_epoch(model, opt, shape_loss_fn, clr_loss_fn, test_ldr, training=False)
            print(f'Epoch {epoch + 1} train_loss: {train_loss:.3f} test_loss: {test_loss:.3f}, pixel_error: {pixel_error:.3f}, color_accuracy:{clr_accuracy:.3f}')

# Run one epoch's training
def one_epoch(model, opt, shape_loss_fn, clr_loss_fn, ldr, training=True):
    losses = []
    pixel_errors = []
    clr_accuracies = []
    for _, data_batch in enumerate(ldr):
        imgs, lbl_shape, lbl_clr = [x.to(device) for x in data_batch]
        pred_shape, pred_clr = model(imgs)
        loss = (3 * shape_loss_fn(pred_shape, lbl_shape) + clr_loss_fn(pred_clr, lbl_clr))
        if training:
            opt.zero_grad()
            loss.backward()
            opt.step()
        losses.append(loss.item())
        pixel_errors.append(compute_pixel_error(pred_shape, lbl_shape))
        clr_accuracies.append(compute_color_accuracy(torch.argmax(pred_clr, dim=1), lbl_clr))
    return torch.Tensor(losses).mean(), torch.Tensor(pixel_errors).mean(), torch.Tensor(clr_accuracies).mean()


# Generate the data and load into Dataset & DataLoader
train_data = [generate_data(noise_range=[0.33, 0.66]) for _ in range(10000)]
test_data = [generate_data(noise_range=[0.33, 0.66]) for _ in range(1000)]
train_ldr = torch.utils.data.DataLoader(train_data, batch_size=100)
test_ldr = torch.utils.data.DataLoader(test_data, batch_size=100)

# Loss functions
shape_loss_fn = nn.MSELoss(reduction='mean')
color_loss_fn = nn.CrossEntropyLoss(reduction='mean')

# Model definition
class ANNModel(nn.Module):
    def __init__(self):
        super(ANNModel, self).__init__()

        # for any CNN, the output size is (W - F + 2P) / S + 1, W is width, F is kernel size, P is padding, S is stride
        self.feature_layers = nn.Sequential(                                # 100x3x32x32
            nn.Conv2d(3, 128, kernel_size=3, stride=2, padding=1),          # 100x128x16x16
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),        # 100x256x8x8
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1),        # 100x512x4x4
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Dropout(0.4),
        )
        self.embed_size = 4 * 4 * 512
        self.shape_out = nn.Linear(self.embed_size, 3)                      # 100x3 
        self.color_out = nn.Linear(self.embed_size, 3)                      # 100x3

    def forward(self, x):
        x = self.feature_layers(x)
        x = x.view(-1, self.embed_size)
        shape_res = self.shape_out(x)
        color_res = torch.sigmoid(self.color_out(x))
        return shape_res, color_res

model = ANNModel()
if torch.cuda.device_count() > 1:
    print(f'Will use {torch.cuda.device_count()} GPUs!')
    model = nn.DataParallel(model)
model.to(device)

# Optimizer and scheduler setup
opt = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
# The learning rate will drop 50% every 100 steps/epochs
sched = lr_scheduler.StepLR(opt, step_size=100, gamma=0.7)

# Start training
model_training(train_ldr, test_ldr, model, shape_loss_fn, color_loss_fn, opt, sched, num_epochs=1000)


Epoch 1 train_loss: 81.254 test_loss: 42.131, pixel_error: 2.502, color_accuracy:0.315
Epoch 501 train_loss: 2.509 test_loss: 18.435, pixel_error: 1.444, color_accuracy:0.961
Epoch 1000 train_loss: 2.232 test_loss: 18.442, pixel_error: 1.440, color_accuracy:0.970
