In [8]:
!pip install pretrainedmodels
!pip install torch_optimizer
!pip install torchcontrib



In [9]:
import os
import gc
gc.enable()
import sys
import time
from glob import glob
from datetime import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torchvision
from torchvision import transforms, models, datasets
import torch.nn as nn
import torch_optimizer as optim
import torch.nn.functional as F
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import BatchSampler, SequentialSampler
from fastprogress.fastprogress import master_bar, progress_bar

import pretrainedmodels
import torchcontrib
from torchcontrib.optim import SWA

import warnings
warnings.simplefilter("ignore")

In [10]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
to_rgb = transforms.Lambda(lambda image: image.convert('RGB'))
resize = transforms.Resize((224,224))

augmentations = transforms.Compose([
                                    resize,
                                    to_rgb,
                                    transforms.RandomResizedCrop(224),
                                    transforms.RandomHorizontalFlip(),
                                    transforms.RandomVerticalFlip(),
                                    transforms.ToTensor(),
                                    normalize
                                    ])

train_dataset = datasets.FashionMNIST(
  os.path.join("/tmp/fashionmnist"),
  train=True,
  download=True,
  transform=augmentations)

test_dataset = datasets.FashionMNIST(
  os.path.join("/tmp/fashionmnist"),
  train=False,
  download=True,
  transform=augmentations)

In [26]:
def schedule(epoch):
    t = (epoch) / (swa_start)
    lr_ratio = swa_lr / lr_init
    if t <= 0.5:
        factor = 1.0
    elif t <= 0.9:
        factor = 1.0 - (1.0 - lr_ratio) * (t - 0.5) / 0.4
    else:
        factor = lr_ratio
    return lr_init * factor

def adjust_learning_rate(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return lr

In [33]:
# Training Variables
bs = 128
epochs = 6
num_classes = 10
swa_start = 4
swa_lr = 5e-4
lr_init = 1e-4
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# DatLoaders
train_dl = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=4, drop_last=True, pin_memory=True)
test_dl = DataLoader(test_dataset, batch_size=bs, shuffle=True, num_workers=4, drop_last=True, pin_memory=True)

# Steps per epoch
steps_per_epoch = len(train_dl.dataset) / bs
steps_per_epoch = int(steps_per_epoch)
print("Steps per epoch:", steps_per_epoch)

# Model
model = pretrainedmodels.__dict__['resnet18'](pretrained="imagenet")
model.last_linear.out_features = num_classes
model.to(device)
if torch.cuda.device_count()>1:
    model = nn.DataParallel(model)
    
# Loss
criterion = nn.CrossEntropyLoss().to(device)

# Optimizer
base_optimizer = optim.RAdam(model.parameters(), 
                        lr=1e-3, 
                        betas=(0.9,0.999), 
                        eps=1e-3, 
                        weight_decay=1e-4)

# Stochastic Weight Averaging
optimizer = SWA(base_optimizer, swa_start=swa_start * steps_per_epoch,
                swa_freq=steps_per_epoch, swa_lr=swa_lr)
print('SGD training')
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs*len(train_dl))

Steps per epoch: 468
SGD training


In [34]:
class AverageMeter(object):
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

def accuracy(output, target, topk=(1,)):
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [35]:
for epoch in range(epochs):
    # Train
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    learning_rate = AverageMeter('LR',':2.8f')
    progress = ProgressMeter(
        len(train_dl),
        [batch_time, data_time, losses, top1, top5, learning_rate],
        prefix="Epoch: [{}]".format(epoch))
    lr = schedule(epoch)
    adjust_learning_rate(optimizer, lr)
    model.train()
    end = time.time()
    for i, (images, targets) in enumerate(train_dl):
        data_time.update(time.time()-end)
        images = images.cuda(non_blocking=True)
        targets = targets.cuda(non_blocking=True)
        outputs = model(images)
        loss = criterion(outputs, targets)
        acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))
        learning_rate.update(optimizer.param_groups[0]['lr'])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # scheduler.step()
        batch_time.update(time.time() - end)
        end = time.time()
        if i %100 == 0:
            progress.display(i)

    # Validation
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    learning_rate = AverageMeter('LR',':2.8f')
    progress = ProgressMeter(
        len(test_dl),
        [batch_time, losses, top1, top5, learning_rate],
        prefix='Test: ')
    if epoch>=swa_start:
        if epoch==swa_start:
            print('Starting SWA Training..')
        optimizer.swap_swa_sgd()
        optimizer.bn_update(train_dl, model, device='cuda')
    model.eval()
    with torch.no_grad():
        end = time.time()
        for i, (images, targets) in enumerate(test_dl):
            images = images.cuda(non_blocking=True)
            targets = targets.cuda(non_blocking=True)
            outputs = model(images)
            loss = criterion(outputs, targets)
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))
            learning_rate.update(optimizer.param_groups[0]['lr'])
            batch_time.update(time.time() - end)
            end = time.time()
            if i %100 == 0:
                progress.display(i)
    if epoch>=swa_start:
        optimizer.swap_swa_sgd()

Epoch: [0][  0/468]	Time  2.259 ( 2.259)	Data  1.802 ( 1.802)	Loss 9.4098e+00 (9.4098e+00)	Acc@1   0.00 (  0.00)	Acc@5   0.00 (  0.00)	LR 0.00010000 (0.00010000)
Epoch: [0][100/468]	Time  0.459 ( 0.339)	Data  0.272 ( 0.144)	Loss 3.3850e+00 (6.6131e+00)	Acc@1  39.06 ( 11.70)	Acc@5  62.50 ( 21.04)	LR 0.00010000 (0.00010000)
Epoch: [0][200/468]	Time  0.571 ( 0.324)	Data  0.434 ( 0.131)	Loss 1.6739e+00 (4.4685e+00)	Acc@1  63.28 ( 32.17)	Acc@5  91.41 ( 52.01)	LR 0.00010000 (0.00010000)
Epoch: [0][300/468]	Time  0.508 ( 0.320)	Data  0.377 ( 0.126)	Loss 1.1103e+00 (3.4298e+00)	Acc@1  67.97 ( 42.92)	Acc@5  96.09 ( 66.12)	LR 0.00010000 (0.00010000)
Epoch: [0][400/468]	Time  0.544 ( 0.320)	Data  0.429 ( 0.126)	Loss 1.2800e+00 (2.8497e+00)	Acc@1  66.41 ( 48.98)	Acc@5  94.53 ( 73.68)	LR 0.00010000 (0.00010000)
Test: [ 0/78]	Time  1.448 ( 1.448)	Loss 8.6788e-01 (8.6788e-01)	Acc@1  71.09 ( 71.09)	Acc@5  96.09 ( 96.09)	LR 0.00010000 (0.00010000)
Epoch: [1][  0/468]	Time  1.669 ( 1.669)	Data  1.499 ( 