In [1]:
import os
import random
import shutil
import time
import warnings

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim

import torch.utils.data
from torch.utils.tensorboard import SummaryWriter
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

from torch import autocast
from torch.cuda.amp import GradScaler

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Utiliy Functions.
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)
class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [3]:
# Ensure that we get deterministic results.
SEED=1
random.seed(SEED)
torch.manual_seed(SEED)
cudnn.deterministic = True

In [4]:
# Setup TensorBoard
writer = SummaryWriter(log_dir="/data/logs")

In [5]:
# Check that we have a CUDA enabled device
if torch.cuda.device_count():
    GPU = torch.device("cuda")

In [6]:
MEAN_RGB = [0.47889522, 0.47227842, 0.43047404]
STD_RGB = [0.229, 0.224, 0.225]
IMG_SIZE = 64

In [7]:
# Load train and val dataset. 
TRAINDIR = "/data/train" # ImageNet train.
VALDIR = "/data/val" # ImageNet val.
BATCH_SIZE = 1 # Autobatch
TRAIN_WORKERS = 6
VAL_WORKERS = 2

# Scale to ImageNet mean and STD since we will be using a model pretrained on ImageNet. 
transform_train = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(MEAN_RGB, STD_RGB),
])

transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(MEAN_RGB, STD_RGB),
])

# Load training data.
train_dataset = datasets.ImageFolder(
    TRAINDIR, transform=transform_train)

train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=BATCH_SIZE, num_workers=TRAIN_WORKERS, shuffle=True, pin_memory=True, sampler=None)

# Load validation data.
val_dataset = datasets.ImageFolder(
    VALDIR, transform=transform_val)

val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=BATCH_SIZE, num_workers=VAL_WORKERS, shuffle=True, pin_memory=True, sampler=None)

In [8]:
NUM_CLASSES = 1000
ARCH = 'resnet18'
LR = 1e-4
# Load model from PyTorch.
model = models.__dict__[ARCH]()
inf = model.fc.in_features
# Set fully connected layer to train with 1000 classes.
model.fc = nn.Linear(inf, NUM_CLASSES)
model.cuda(GPU)
# Setup optimizer and loss function. 
optimizer = torch.optim.Adam(model.parameters(), LR)
criterion = nn.CrossEntropyLoss().cuda(GPU)

In [9]:
PRINT_FREQ = 1000
global_step = 0

In [10]:
scaler = GradScaler()
# Define train step.
def train(train_loader, model, criterion, optimizer, epoch):
    global global_step
    # Keep progress of metrics.
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))

    # Switch to train mode
    model.train()

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # Measure data loading time.
        data_time.update(time.time() - end)
        
        # Move data to GPU if CUDA device is available.
        if GPU is not None:
            images = images.cuda(GPU, non_blocking=True)
        if torch.cuda.is_available():
            target = target.cuda(GPU, non_blocking=True)
            
        # Use automatic mixed precision (AMP) to increase training speed.
        with autocast("cuda"):
            # Compute output.
            output = model(images)
            loss = criterion(output, target)

        # Measure accuracy and record loss.
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))
        
        # Write tensorboard logs.
        writer.add_scalar("Loss/train", loss, global_step=global_step)
        writer.add_scalar("Acc1/train", top1.avg, global_step=global_step)
        writer.add_scalar("Acc5/train", top5.avg, global_step=global_step)
        global_step += 1

        # Compute gradient.
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        # Measure elapsed time.
        batch_time.update(time.time() - end)
        end = time.time()

        if i % PRINT_FREQ == 0:
            progress.display(i)

In [11]:
# Define validation step.
def validate(val_loader, model, criterion):
    global global_step
    # Keep progress of metrics.
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix='Test: ')

    # Switch to evaluate mode.
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            
            # Move data to GPU if CUDA device is available.
            if GPU is not None:
                images = images.cuda(GPU, non_blocking=True)
            if torch.cuda.is_available():
                target = target.cuda(GPU, non_blocking=True)

            # Compute output.
            output = model(images)
            loss = criterion(output, target)

            # Measure accuracy and record loss.
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))
            

            # Measure elapsed time.
            batch_time.update(time.time() - end)
            end = time.time()

            if i % PRINT_FREQ == 0:
                progress.display(i)

        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))
    
     # Write tensorboard logs.
    writer.add_scalar("Loss/val", loss, global_step=global_step)
    writer.add_scalar("Acc1/val", top1.avg, global_step=global_step)
    writer.add_scalar("Acc5/val", top5.avg, global_step=global_step)
    
    return top1.avg

In [None]:
start = time.perf_counter()
for epoch in range(1):
    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    validate(val_loader, model, criterion)
end = time.perf_counter()

print(f"Time to Train and 1 Epoch: {end - start}")

Epoch: [0][      0/1281167]	Time  0.812 ( 0.812)	Data  0.229 ( 0.229)	Loss 6.7422e+00 (6.7422e+00)	Acc@1   0.00 (  0.00)	Acc@5   0.00 (  0.00)
Epoch: [0][   1000/1281167]	Time  0.015 ( 0.016)	Data  0.000 ( 0.000)	Loss 9.0234e+00 (7.8044e+00)	Acc@1   0.00 (  0.30)	Acc@5   0.00 (  0.80)
Epoch: [0][   2000/1281167]	Time  0.017 ( 0.016)	Data  0.000 ( 0.000)	Loss 7.8203e+00 (7.8293e+00)	Acc@1   0.00 (  0.20)	Acc@5   0.00 (  0.60)
Epoch: [0][   3000/1281167]	Time  0.015 ( 0.016)	Data  0.000 ( 0.000)	Loss 6.9492e+00 (7.7673e+00)	Acc@1   0.00 (  0.13)	Acc@5   0.00 (  0.60)
Epoch: [0][   4000/1281167]	Time  0.015 ( 0.016)	Data  0.000 ( 0.000)	Loss 7.6250e+00 (7.6856e+00)	Acc@1   0.00 (  0.12)	Acc@5   0.00 (  0.60)
