In [1]:
import os
import random
import shutil
import time
import warnings
from datetime import datetime
import argparse

import numpy as np

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim as optim

from torch.cuda.amp import autocast, GradScaler

import torch.utils.data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter('logs')

SEED=1
random.seed(SEED)
torch.manual_seed(SEED)
cudnn.deterministic = True

GPU = 'cuda:0'
START_EPOCH = 0
ARCH = 'densenet'
EPOCHS = 10
LR = .0005
MOMENTUM = 0.9
WEIGHT_DECAY = 5e-4
PRINT_FREQ = 50
BATCH_SIZE = 100
WORKERS=2
#TRAINDIR="data/training/DEWP_class"
#VALDIR="data/test/DEWP_class"
imagenet_mean_RGB = [0.47889522, 0.47227842, 0.43047404]
imagenet_std_RGB = [0.229, 0.224, 0.225]

In [2]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)
        
        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

def train(train_loader, model, criterion, optimizer, scaler, epoch):

    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top2 = AverageMeter('Acc@2', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top2],
        prefix="Epoch: [{}]".format(epoch))

    model.train()

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
                
        images = images.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        optimizer.zero_grad()
        with autocast():
            output = model(images)
            loss = criterion(output, target)

        acc1, acc2 = accuracy(output, target, topk=(1, 2))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top2.update(acc2[0], images.size(0))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        batch_time.update(time.time() - end)
        end = time.time()
        
        if i % PRINT_FREQ == 0:
            progress.display(i)

            writer.add_scalar('Train Acc@1',
                acc1[0],
                epoch * len(train_loader) + i)

def validate(val_loader, model, criterion, epoch):

    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top2 = AverageMeter('Acc@2', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top2],
        prefix='Test: ')

    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            
            images = images.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)
            
            output = model(images)
            loss = criterion(output, target)

            acc1, acc2 = accuracy(output, target, topk=(1, 2))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top2.update(acc2[0], images.size(0))
            batch_time.update(time.time() - end)
            end = time.time()

            if i % PRINT_FREQ == 0:
                progress.display(i)

                writer.add_scalar('Validation Acc@1',
                    acc1[0],
                    epoch * len(val_loader) + i)

def save_checkpoint(state, filename='checkpoint.pth.tar'):
    torch.save(state, filename)

In [3]:
##From PyTorch docs https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html 

def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes
        input_size = 224

    elif model_name == "densenet":
        """ Densenet
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size

In [4]:
def single_init(train_dir, val_dir, num_classes):
    model_ft, IMG_SIZE = initialize_model(ARCH, num_classes, False, use_pretrained=False)

    transform_train = transforms.Compose([
        transforms.RandomResizedCrop(IMG_SIZE),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(imagenet_mean_RGB, imagenet_std_RGB)
    ])

    transform_val = transforms.Compose([
        transforms.Resize(IMG_SIZE),
        transforms.CenterCrop(IMG_SIZE),
        transforms.ToTensor(),
        transforms.Normalize(imagenet_mean_RGB, imagenet_std_RGB)
    ])

    torch.cuda.set_device(GPU)
    model_ft.cuda(GPU)
    criterion = nn.CrossEntropyLoss().cuda(GPU)
    scaler = GradScaler()

    optimizer = optim.SGD(
        model_ft.parameters(),
        lr=LR,
        weight_decay=WEIGHT_DECAY,
        momentum=MOMENTUM
    )
    
    model = model_ft

    # use CosineAnnealingLR
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=EPOCHS)
    
    train_dataset = torchvision.datasets.ImageFolder(train_dir, transform=transform_train)
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,batch_size=BATCH_SIZE,shuffle=True)

    val_dataset = torchvision.datasets.ImageFolder(val_dir, transform=transform_val)
    val_loader = torch.utils.data.DataLoader(val_dataset,batch_size=BATCH_SIZE, shuffle=True) 

    return (model, criterion, optimizer, scheduler, scaler, train_loader, val_loader)

In [5]:
TEMP_CHECKPOINT_PATH = "./checkpoint_temp.pth.tar"
DEWP_CHECKPOINT_PATH = "./checkpoint_dewp.pth.tar"
VISIB_CHECKPOINT_PATH = "./checkpoint_visib.pth.tar"
WDSP_CHECKPOINT_PATH = "./checkpoint_wdsp.pth.tar"

TEMP_training_dir = "../../../data/training/TEMP_class"
TEMP_test_dir = "../../../data/test/TEMP_class"
DEWP_training_dir = "../../../data/training/DEWP_class"
DEWP_test_dir = "../../../data/test/DEWP_class"
VISIB_training_dir = "../../../data/training/VISIB_class"
VISIB_test_dir = "../../../data/test/VISIB_class"
WDSP_training_dir = "../../../data/training/WDSP_class"
WDSP_test_dir = "../../../data/test/WDSP_class"

In [6]:
model, criterion, optimizer, scheduler, scaler, train_loader, val_loader = single_init(DEWP_training_dir, DEWP_test_dir, num_classes=3)
    
start = datetime.now()

for epoch in range(START_EPOCH, EPOCHS):

    train(train_loader, model, criterion, optimizer, scaler, epoch)
    validate(val_loader, model, criterion, epoch)

    save_checkpoint({
        'epoch': epoch + 1,
        'arch': ARCH,
        'state_dict': model.state_dict(),
        'optimizer' : optimizer.state_dict(),
    }, filename=DEWP_CHECKPOINT_PATH)

    scheduler.step()
    print('Learning rate: ' + str(scheduler.get_last_lr()))
    print("Time elapsed: " + str(datetime.now() - start))

#TODO: Save final model state_dict



Epoch: [0][   0/1294]	Time  2.477 ( 2.477)	Data  0.000 ( 0.000)	Loss 1.1490e+00 (1.1490e+00)	Acc@1  33.00 ( 33.00)	Acc@2  60.00 ( 60.00)
Epoch: [0][  50/1294]	Time  0.943 ( 0.966)	Data  0.000 ( 0.000)	Loss 1.1014e+00 (1.1043e+00)	Acc@1  33.00 ( 33.71)	Acc@2  68.00 ( 66.82)
Epoch: [0][ 100/1294]	Time  0.937 ( 0.950)	Data  0.000 ( 0.000)	Loss 1.0988e+00 (1.0998e+00)	Acc@1  36.00 ( 35.38)	Acc@2  61.00 ( 67.51)
Epoch: [0][ 150/1294]	Time  0.924 ( 0.943)	Data  0.000 ( 0.000)	Loss 1.0953e+00 (1.0988e+00)	Acc@1  32.00 ( 35.22)	Acc@2  68.00 ( 67.95)
Epoch: [0][ 200/1294]	Time  0.978 ( 0.939)	Data  0.000 ( 0.000)	Loss 1.1028e+00 (1.0973e+00)	Acc@1  33.00 ( 35.44)	Acc@2  71.00 ( 68.44)
Epoch: [0][ 250/1294]	Time  0.907 ( 0.937)	Data  0.000 ( 0.000)	Loss 1.1154e+00 (1.0968e+00)	Acc@1  33.00 ( 35.65)	Acc@2  68.00 ( 68.47)
Epoch: [0][ 300/1294]	Time  0.934 ( 0.936)	Data  0.000 ( 0.000)	Loss 1.0981e+00 (1.0960e+00)	Acc@1  35.00 ( 35.71)	Acc@2  70.00 ( 68.78)
Epoch: [0][ 350/1294]	Time  0.947 ( 0.937

In [7]:
#Temp model

model, criterion, optimizer, scheduler, scaler, train_loader, val_loader = single_init(TEMP_training_dir, TEMP_test_dir, num_classes=4)
    
start = datetime.now()

for epoch in range(START_EPOCH, EPOCHS):

    train(train_loader, model, criterion, optimizer, scaler, epoch)
    validate(val_loader, model, criterion, epoch)

    save_checkpoint({
        'epoch': epoch + 1,
        'arch': ARCH,
        'state_dict': model.state_dict(),
        'optimizer' : optimizer.state_dict(),
    }, filename=TEMP_CHECKPOINT_PATH)

    scheduler.step()
    print('Learning rate: ' + str(scheduler.get_last_lr()))
    print("Time elapsed: " + str(datetime.now() - start))

Epoch: [0][   0/1688]	Time  0.923 ( 0.923)	Data  0.000 ( 0.000)	Loss 1.4300e+00 (1.4300e+00)	Acc@1  21.00 ( 21.00)	Acc@2  45.00 ( 45.00)
Epoch: [0][  50/1688]	Time  0.853 ( 0.899)	Data  0.000 ( 0.000)	Loss 1.3760e+00 (1.3921e+00)	Acc@1  36.00 ( 25.55)	Acc@2  57.00 ( 50.55)
Epoch: [0][ 100/1688]	Time  0.871 ( 0.890)	Data  0.000 ( 0.000)	Loss 1.3741e+00 (1.3876e+00)	Acc@1  30.00 ( 26.31)	Acc@2  52.00 ( 51.54)
Epoch: [0][ 150/1688]	Time  0.905 ( 0.887)	Data  0.000 ( 0.000)	Loss 1.3749e+00 (1.3855e+00)	Acc@1  23.00 ( 26.59)	Acc@2  58.00 ( 52.09)
Epoch: [0][ 200/1688]	Time  0.902 ( 0.886)	Data  0.000 ( 0.000)	Loss 1.3744e+00 (1.3842e+00)	Acc@1  34.00 ( 26.67)	Acc@2  55.00 ( 52.25)
Epoch: [0][ 250/1688]	Time  0.868 ( 0.885)	Data  0.000 ( 0.000)	Loss 1.3876e+00 (1.3829e+00)	Acc@1  26.00 ( 26.90)	Acc@2  53.00 ( 52.71)
Epoch: [0][ 300/1688]	Time  0.892 ( 0.885)	Data  0.000 ( 0.000)	Loss 1.3637e+00 (1.3824e+00)	Acc@1  32.00 ( 26.91)	Acc@2  59.00 ( 52.76)
Epoch: [0][ 350/1688]	Time  0.875 ( 0.884

In [8]:
#Visib model

model, criterion, optimizer, scheduler, scaler, train_loader, val_loader = single_init(VISIB_training_dir, VISIB_test_dir, num_classes=3)
    
start = datetime.now()

for epoch in range(START_EPOCH, EPOCHS):

    train(train_loader, model, criterion, optimizer, scaler, epoch)
    validate(val_loader, model, criterion, epoch)

    save_checkpoint({
        'epoch': epoch + 1,
        'arch': ARCH,
        'state_dict': model.state_dict(),
        'optimizer' : optimizer.state_dict(),
    }, filename=VISIB_CHECKPOINT_PATH)

    scheduler.step()
    print('Learning rate: ' + str(scheduler.get_last_lr()))
    print("Time elapsed: " + str(datetime.now() - start))

Epoch: [0][   0/1431]	Time  0.946 ( 0.946)	Data  0.000 ( 0.000)	Loss 1.1130e+00 (1.1130e+00)	Acc@1  33.00 ( 33.00)	Acc@2  70.00 ( 70.00)
Epoch: [0][  50/1431]	Time  0.925 ( 0.914)	Data  0.000 ( 0.000)	Loss 1.1211e+00 (1.1002e+00)	Acc@1  32.00 ( 35.31)	Acc@2  61.00 ( 68.78)
Epoch: [0][ 100/1431]	Time  0.880 ( 0.912)	Data  0.000 ( 0.000)	Loss 1.0885e+00 (1.0977e+00)	Acc@1  35.00 ( 35.75)	Acc@2  62.00 ( 69.02)
Epoch: [0][ 150/1431]	Time  0.915 ( 0.910)	Data  0.000 ( 0.000)	Loss 1.0915e+00 (1.0944e+00)	Acc@1  29.00 ( 36.01)	Acc@2  71.00 ( 69.52)
Epoch: [0][ 200/1431]	Time  0.905 ( 0.908)	Data  0.000 ( 0.000)	Loss 1.0820e+00 (1.0940e+00)	Acc@1  43.00 ( 35.95)	Acc@2  75.00 ( 69.49)
Epoch: [0][ 250/1431]	Time  0.880 ( 0.906)	Data  0.000 ( 0.000)	Loss 1.0855e+00 (1.0932e+00)	Acc@1  35.00 ( 35.95)	Acc@2  74.00 ( 69.69)
Epoch: [0][ 300/1431]	Time  0.897 ( 0.906)	Data  0.000 ( 0.000)	Loss 1.0844e+00 (1.0922e+00)	Acc@1  38.00 ( 36.27)	Acc@2  67.00 ( 69.81)
Epoch: [0][ 350/1431]	Time  0.882 ( 0.905

In [9]:
#WDSP model

model, criterion, optimizer, scheduler, scaler, train_loader, val_loader = single_init(WDSP_training_dir, WDSP_test_dir, num_classes=3)
    
start = datetime.now()

for epoch in range(START_EPOCH, EPOCHS):

    train(train_loader, model, criterion, optimizer, scaler, epoch)
    validate(val_loader, model, criterion, epoch)

    save_checkpoint({
        'epoch': epoch + 1,
        'arch': ARCH,
        'state_dict': model.state_dict(),
        'optimizer' : optimizer.state_dict(),
    }, filename=WDSP_CHECKPOINT_PATH)

    scheduler.step()
    print('Learning rate: ' + str(scheduler.get_last_lr()))
    print("Time elapsed: " + str(datetime.now() - start))

Epoch: [0][   0/1381]	Time  0.987 ( 0.987)	Data  0.000 ( 0.000)	Loss 1.0952e+00 (1.0952e+00)	Acc@1  38.00 ( 38.00)	Acc@2  67.00 ( 67.00)
Epoch: [0][  50/1381]	Time  0.950 ( 0.950)	Data  0.000 ( 0.000)	Loss 1.1100e+00 (1.1018e+00)	Acc@1  30.00 ( 34.16)	Acc@2  65.00 ( 67.55)
Epoch: [0][ 100/1381]	Time  0.946 ( 0.951)	Data  0.000 ( 0.000)	Loss 1.0720e+00 (1.0985e+00)	Acc@1  44.00 ( 34.27)	Acc@2  76.00 ( 68.23)
Epoch: [0][ 150/1381]	Time  0.906 ( 0.946)	Data  0.000 ( 0.000)	Loss 1.0926e+00 (1.0970e+00)	Acc@1  29.00 ( 34.81)	Acc@2  70.00 ( 68.25)
Epoch: [0][ 200/1381]	Time  0.909 ( 0.942)	Data  0.000 ( 0.000)	Loss 1.0961e+00 (1.0966e+00)	Acc@1  36.00 ( 34.89)	Acc@2  69.00 ( 68.30)
Epoch: [0][ 250/1381]	Time  0.888 ( 0.939)	Data  0.000 ( 0.000)	Loss 1.0881e+00 (1.0966e+00)	Acc@1  37.00 ( 34.99)	Acc@2  69.00 ( 68.29)
Epoch: [0][ 300/1381]	Time  0.888 ( 0.937)	Data  0.000 ( 0.000)	Loss 1.1066e+00 (1.0968e+00)	Acc@1  35.00 ( 34.85)	Acc@2  63.00 ( 68.19)
Epoch: [0][ 350/1381]	Time  0.910 ( 0.935