In [4]:
import numpy as np
import copy
import torch

from torch import nn, optim
from PIL import ImageFile

from desenet import densenet121

ImageFile.LOAD_TRUNCATED_IMAGES = True

import warnings
warnings.filterwarnings('ignore')

In [5]:
multi_gpus = False
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.device_count() > 1:
    multi_gpus = True
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
model = densenet121(num_classes=150)

In [11]:
import torchvision
from torchvision import datasets, transforms

def get_transform(random_crop=True):
    normalize = transforms.Normalize(
        mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
        std=[x / 255.0 for x in [63.0, 62.1, 66.7]])
    transform = []
    transform.append(transforms.Resize(256))
    if random_crop:
        transform.append(transforms.RandomResizedCrop(224))
        transform.append(transforms.RandomHorizontalFlip())
    else:
        transform.append(transforms.CenterCrop(224))
    transform.append(transforms.ToTensor())
    transform.append(normalize)
    return transforms.Compose(transform)

class CustomDataset(datasets.ImageFolder):
    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image_id, sample, target) where target is class_index of
                the target class.
        """
        path, target = self.samples[index]
        #print(path)
        #print(target)
        sample = self.loader(path)
        if self.transform is not None:
            sample = self.transform(sample)
        if self.target_transform is not None:
            target = self.target_transform(target)

        image_id = path.split('/')[-1]

        return image_id, sample, target

In [12]:
from torch.utils import data

data_dir = 'train/train_data'

dataset = CustomDataset(data_dir, transform=get_transform(random_crop=True))

split_size = int(len(dataset) * 0.9)
train_set, valid_set = data.random_split(dataset, [split_size, len(dataset) - split_size])
tr_loader = data.DataLoader(dataset=train_set,
                            batch_size=256,
                            #sampler = RandomIdentitySampler(train_set, 4),
                            shuffle=True,
                            pin_memory=True,
                            num_workers=16)

val_loader = data.DataLoader(dataset=valid_set,
                             batch_size=256,
                             shuffle=False,
                            pin_memory=True,                             
                            num_workers=16)

In [13]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [14]:
count_parameters(model)

7107606

In [15]:
import apex
print("using apex synced BN")
model = apex.parallel.convert_syncbn_model(model)

using apex synced BN


In [16]:
optimizer = optim.SGD(model.parameters(), lr=1., momentum=0.9, weight_decay=1e-4, nesterov=True)

In [17]:
from apex import amp, optimizers

model, optimizer = amp.initialize(model.cuda(), optimizer, opt_level='O3',keep_batchnorm_fp32=True)

Selected optimization level O3:  Pure FP16 training.
Defaults for this optimization level are:
enabled                : True
opt_level              : O3
cast_model_type        : torch.float16
patch_torch_functions  : False
keep_batchnorm_fp32    : False
master_weights         : False
loss_scale             : 1.0
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O3
cast_model_type        : torch.float16
patch_torch_functions  : False
keep_batchnorm_fp32    : True
master_weights         : False
loss_scale             : 1.0


In [18]:
criterion = nn.CrossEntropyLoss().cuda()

In [19]:
import datetime
test_time = datetime.datetime.now()

torch.cuda.synchronize()
model.train()
for _ in range(2):
    _, inputs, labels = next(iter(tr_loader))
    print(1)
    inputs = inputs.cuda(non_blocking=True)        
    labels = labels.cuda(non_blocking=True)    
    print(2)    
    logits = model(inputs)
    print(3)                       
    loss = criterion(logits, labels)                   
    print(4)                   
    loss.backward()
    print(5)                            
    model.zero_grad()
    print(10)                                
torch.cuda.synchronize()
test_end = datetime.datetime.now() - test_time
print('test {}'.format(test_end))

1
2


RuntimeError: CUDA out of memory. Tried to allocate 90.00 MiB (GPU 0; 15.75 GiB total capacity; 14.44 GiB already allocated; 78.88 MiB free; 254.47 MiB cached) (malloc at /opt/conda/conda-bld/pytorch_1573049306803/work/c10/cuda/CUDACachingAllocator.cpp:267)
frame #0: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x47 (0x7f3190977687 in /opt/anaconda3/lib/python3.7/site-packages/torch/lib/libc10.so)
frame #1: <unknown function> + 0x1ea29 (0x7f3190bbaa29 in /opt/anaconda3/lib/python3.7/site-packages/torch/lib/libc10_cuda.so)
frame #2: <unknown function> + 0x1fade (0x7f3190bbbade in /opt/anaconda3/lib/python3.7/site-packages/torch/lib/libc10_cuda.so)
frame #3: at::native::empty_cuda(c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) + 0x26d (0x7f3162d5036d in /opt/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so)
frame #4: <unknown function> + 0x3d98d18 (0x7f3161341d18 in /opt/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so)
frame #5: <unknown function> + 0x3852408 (0x7f3160dfb408 in /opt/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so)
frame #6: <unknown function> + 0x17a5add (0x7f315ed4eadd in /opt/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so)
frame #7: at::native::empty_like(at::Tensor const&, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) + 0x4ab (0x7f315ed56e4b in /opt/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so)
frame #8: at::native::empty_like(at::Tensor const&) + 0x3d (0x7f315ed585cd in /opt/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so)
frame #9: <unknown function> + 0x1ac9459 (0x7f315f072459 in /opt/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so)
frame #10: <unknown function> + 0x341f91b (0x7f31609c891b in /opt/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so)
frame #11: <unknown function> + 0x19d3d9d (0x7f315ef7cd9d in /opt/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so)
frame #12: <unknown function> + 0x1d8b6 (0x7f31399638b6 in /opt/anaconda3/lib/python3.7/site-packages/syncbn.cpython-37m-x86_64-linux-gnu.so)
frame #13: batchnorm_forward_CUDA(at::Tensor, at::Tensor, at::Tensor, c10::optional<at::Tensor>, c10::optional<at::Tensor>) + 0x5e (0x7f313996620e in /opt/anaconda3/lib/python3.7/site-packages/syncbn.cpython-37m-x86_64-linux-gnu.so)
frame #14: <unknown function> + 0x19722 (0x7f313995f722 in /opt/anaconda3/lib/python3.7/site-packages/syncbn.cpython-37m-x86_64-linux-gnu.so)
frame #15: <unknown function> + 0x198ae (0x7f313995f8ae in /opt/anaconda3/lib/python3.7/site-packages/syncbn.cpython-37m-x86_64-linux-gnu.so)
frame #16: <unknown function> + 0x15fa4 (0x7f313995bfa4 in /opt/anaconda3/lib/python3.7/site-packages/syncbn.cpython-37m-x86_64-linux-gnu.so)
frame #17: _PyMethodDef_RawFastCallKeywords + 0x254 (0x5639a8d73744 in /opt/anaconda3/bin/python)
frame #18: _PyCFunction_FastCallKeywords + 0x21 (0x5639a8d73861 in /opt/anaconda3/bin/python)
frame #19: _PyEval_EvalFrameDefault + 0x4ecd (0x5639a8ddf2bd in /opt/anaconda3/bin/python)
frame #20: _PyEval_EvalCodeWithName + 0xac9 (0x5639a8d23d09 in /opt/anaconda3/bin/python)
frame #21: _PyFunction_FastCallDict + 0x1d5 (0x5639a8d24635 in /opt/anaconda3/bin/python)
frame #22: THPFunction_apply(_object*, _object*) + 0x8d6 (0x7f3191267086 in /opt/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch_python.so)
frame #23: _PyMethodDef_RawFastCallKeywords + 0x1f0 (0x5639a8d736e0 in /opt/anaconda3/bin/python)
frame #24: _PyCFunction_FastCallKeywords + 0x21 (0x5639a8d73861 in /opt/anaconda3/bin/python)
frame #25: _PyEval_EvalFrameDefault + 0x4ecd (0x5639a8ddf2bd in /opt/anaconda3/bin/python)
frame #26: _PyEval_EvalCodeWithName + 0x2f9 (0x5639a8d23539 in /opt/anaconda3/bin/python)
frame #27: _PyFunction_FastCallDict + 0x1d5 (0x5639a8d24635 in /opt/anaconda3/bin/python)
frame #28: _PyObject_Call_Prepend + 0x63 (0x5639a8d42e53 in /opt/anaconda3/bin/python)
frame #29: PyObject_Call + 0x6e (0x5639a8d35dbe in /opt/anaconda3/bin/python)
frame #30: _PyEval_EvalFrameDefault + 0x1e42 (0x5639a8ddc232 in /opt/anaconda3/bin/python)
frame #31: _PyEval_EvalCodeWithName + 0x2f9 (0x5639a8d23539 in /opt/anaconda3/bin/python)
frame #32: _PyFunction_FastCallDict + 0x1d5 (0x5639a8d24635 in /opt/anaconda3/bin/python)
frame #33: _PyObject_Call_Prepend + 0x63 (0x5639a8d42e53 in /opt/anaconda3/bin/python)
frame #34: <unknown function> + 0x16ba3a (0x5639a8d7aa3a in /opt/anaconda3/bin/python)
frame #35: _PyObject_FastCallKeywords + 0x49b (0x5639a8d7b8fb in /opt/anaconda3/bin/python)
frame #36: _PyEval_EvalFrameDefault + 0x52f8 (0x5639a8ddf6e8 in /opt/anaconda3/bin/python)
frame #37: _PyFunction_FastCallDict + 0x10b (0x5639a8d2456b in /opt/anaconda3/bin/python)
frame #38: _PyObject_Call_Prepend + 0x63 (0x5639a8d42e53 in /opt/anaconda3/bin/python)
frame #39: PyObject_Call + 0x6e (0x5639a8d35dbe in /opt/anaconda3/bin/python)
frame #40: _PyEval_EvalFrameDefault + 0x1e42 (0x5639a8ddc232 in /opt/anaconda3/bin/python)
frame #41: _PyEval_EvalCodeWithName + 0x2f9 (0x5639a8d23539 in /opt/anaconda3/bin/python)
frame #42: _PyFunction_FastCallDict + 0x1d5 (0x5639a8d24635 in /opt/anaconda3/bin/python)
frame #43: _PyObject_Call_Prepend + 0x63 (0x5639a8d42e53 in /opt/anaconda3/bin/python)
frame #44: <unknown function> + 0x16ba3a (0x5639a8d7aa3a in /opt/anaconda3/bin/python)
frame #45: _PyObject_FastCallKeywords + 0x49b (0x5639a8d7b8fb in /opt/anaconda3/bin/python)
frame #46: _PyEval_EvalFrameDefault + 0x4a96 (0x5639a8ddee86 in /opt/anaconda3/bin/python)
frame #47: _PyFunction_FastCallDict + 0x10b (0x5639a8d2456b in /opt/anaconda3/bin/python)
frame #48: _PyObject_Call_Prepend + 0x63 (0x5639a8d42e53 in /opt/anaconda3/bin/python)
frame #49: PyObject_Call + 0x6e (0x5639a8d35dbe in /opt/anaconda3/bin/python)
frame #50: _PyEval_EvalFrameDefault + 0x1e42 (0x5639a8ddc232 in /opt/anaconda3/bin/python)
frame #51: _PyEval_EvalCodeWithName + 0x2f9 (0x5639a8d23539 in /opt/anaconda3/bin/python)
frame #52: _PyFunction_FastCallDict + 0x1d5 (0x5639a8d24635 in /opt/anaconda3/bin/python)
frame #53: _PyObject_Call_Prepend + 0x63 (0x5639a8d42e53 in /opt/anaconda3/bin/python)
frame #54: <unknown function> + 0x16ba3a (0x5639a8d7aa3a in /opt/anaconda3/bin/python)
frame #55: _PyObject_FastCallKeywords + 0x49b (0x5639a8d7b8fb in /opt/anaconda3/bin/python)
frame #56: _PyEval_EvalFrameDefault + 0x52f8 (0x5639a8ddf6e8 in /opt/anaconda3/bin/python)
frame #57: _PyFunction_FastCallDict + 0x10b (0x5639a8d2456b in /opt/anaconda3/bin/python)
frame #58: _PyObject_Call_Prepend + 0x63 (0x5639a8d42e53 in /opt/anaconda3/bin/python)
frame #59: PyObject_Call + 0x6e (0x5639a8d35dbe in /opt/anaconda3/bin/python)
frame #60: _PyEval_EvalFrameDefault + 0x1e42 (0x5639a8ddc232 in /opt/anaconda3/bin/python)
frame #61: _PyEval_EvalCodeWithName + 0x2f9 (0x5639a8d23539 in /opt/anaconda3/bin/python)
frame #62: _PyFunction_FastCallDict + 0x1d5 (0x5639a8d24635 in /opt/anaconda3/bin/python)
frame #63: _PyObject_Call_Prepend + 0x63 (0x5639a8d42e53 in /opt/anaconda3/bin/python)


In [15]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

In [16]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [17]:
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=len(tr_loader)
                                                , epochs=30, pct_start=0.2)

In [18]:
import datetime
import time
high = 0.0
epoch_time = AverageMeter('Epoch', ':6.3f')
batch_time = AverageMeter('Batch', ':6.3f')
data_time = AverageMeter('Data', ':6.3f')
losses = AverageMeter('Loss', ':.5f')
learning_rates = AverageMeter('LearningRate', ':.5f')
top1 = AverageMeter('Acc@1', ':6.2f')
top5 = AverageMeter('Acc@5', ':6.2f')

for epoch in range(30):  # loop over the dataset multiple times
    time_ = datetime.datetime.now()    
    model.train()
    running_loss = 0.0
    running_corrects = 0
    total = 0
    progress = ProgressMeter(
        len(tr_loader),
        [batch_time, data_time, losses, top1, top5, learning_rates],
        prefix="Epoch: [{}]".format(epoch))
    
    end = time.time()    
    for i, (_, inputs, labels) in enumerate(tr_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        #print(inputs.shape)
        #print(labels.shape)
        data_time.update(time.time() - end)
        inputs = inputs.cuda(non_blocking=True)
        labels = labels.cuda(non_blocking=True)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        #_, preds = torch.max(outputs, 1)
        #loss.backward()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
            
        optimizer.step()
        scheduler.step()
        # print statistics
        acc1, acc5 = accuracy(outputs, labels, topk=(1, 5))
        losses.update(loss.item(), inputs.size(0))
        learning_rates.update(scheduler.get_lr()[0])        
        top1.update(acc1[0], inputs.size(0))
        top5.update(acc5[0], inputs.size(0))

        
        batch_time.update(time.time() - end)
        if i % 100 == 99:    # print every 2000 mini-batches
            progress.display(i)
            #running_loss = 0.0
    elapsed = datetime.datetime.now() - time_
    print('{} elapsed for {}'.format(elapsed, epoch+1))

    
print('Finished Training')

Epoch: [0][ 99/365]	Batch 56.150 (32.287)	Data 55.737 (31.930)	Loss 4.70192 (4.87275)	Acc@1   1.56 (  1.43)	Acc@5  10.16 (  6.26)	LearningRate 0.00449 (0.00417)
Epoch: [0][199/365]	Batch 107.303 (57.101)	Data 106.965 (56.742)	Loss 4.42447 (4.69228)	Acc@1   4.69 (  2.67)	Acc@5  17.19 ( 10.13)	LearningRate 0.00596 (0.00466)
Epoch: [0][299/365]	Batch 157.085 (82.088)	Data 156.737 (81.727)	Loss 4.14706 (4.55699)	Acc@1   4.30 (  3.80)	Acc@5  24.61 ( 13.68)	LearningRate 0.00838 (0.00548)
0:03:03.524555 elapsed for 1
Epoch: [1][ 99/365]	Batch 60.160 (84.283)	Data 59.784 (83.925)	Loss 3.88348 (4.36470)	Acc@1  11.33 (  6.00)	Acc@5  32.81 ( 19.33)	LearningRate 0.01430 (0.00750)
Epoch: [1][199/365]	Batch 108.400 (84.135)	Data 107.970 (83.776)	Loss 3.86043 (4.26291)	Acc@1  11.33 (  7.23)	Acc@5  36.72 ( 22.42)	LearningRate 0.01893 (0.00910)
Epoch: [1][299/365]	Batch 155.428 (91.371)	Data 155.048 (91.012)	Loss 3.55986 (4.16415)	Acc@1  15.62 (  8.53)	Acc@5  42.58 ( 25.33)	LearningRate 0.02425 (0.0109

In [19]:
torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'loss': loss,    
    
}, './checkpoint/resnet50_fp16_sconv_ep030.b0.pth')

In [20]:
def classification_val(model, val_loader):
    correct = 0
    total = 0    
    
    model.eval()
    with torch.no_grad():
        for data in val_loader:
            _, images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct/total

In [21]:
cls_results = [classification_val(model, val_loader) for i in range(10)]

In [22]:
np.mean(cls_results)

0.7725380612834843

In [23]:
np.var(cls_results)

6.425454679560811e-06

In [24]:
np.std(cls_results)

0.0025348480584762496

In [25]:
def val_retrieval(model, val_loader):
    feats = None
    data_ids = None

    model.eval()
    with torch.no_grad():
        for idx, (_, images, labels) in enumerate(val_loader):
            images = images.to(device)
            #labels = labels.to(device)

            feat = model(images, feature=True)
            feat = feat.detach().cpu().numpy()

            feat = feat/np.linalg.norm(feat, axis=1)[:, np.newaxis]

            if feats is None:
                feats = feat
            else:
                feats = np.append(feats, feat, axis=0)

            if data_ids is None:
                data_ids = labels
            else:
                data_ids = np.append(data_ids, labels, axis=0)

        score_matrix = feats.dot(feats.T)
        np.fill_diagonal(score_matrix, -np.inf)
        top1_reference_indices = np.argmax(score_matrix, axis=1)

        top1_reference_ids = [
            [data_ids[idx], data_ids[top1_reference_indices[idx]]] for idx in
            range(len(data_ids))]

    total_count = len(top1_reference_ids)
    correct = 0
    for ids in top1_reference_ids:
        if ids[0] == ids[1]:
            correct += 1        
    return correct/total_count

In [26]:
retrieval_result = [val_retrieval(model, val_loader) for i in range(10)]

In [27]:
np.mean(retrieval_result)

0.6178358065137791

In [28]:
np.std(retrieval_result)

0.0038908581994208

In [29]:
retrieval_result

[0.6151474272499519,
 0.6104259009443053,
 0.615436500289073,
 0.6237232607438813,
 0.6200616689150126,
 0.6146656388514165,
 0.6214106764309115,
 0.6160146463673155,
 0.6200616689150126,
 0.6214106764309115]