In [1]:
import numpy as np
import torch
import copy
import torch.nn.functional as F
from torch.nn import DataParallel

from torch import nn, optim
from autoaugment import ImageNetPolicy
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [2]:
multi_gpus = False
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.device_count() > 1:
    multi_gpus = True
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
class SEBlock(nn.Module):

  def __init__(self, planes, ratio):

      super(SEBlock, self).__init__()

      self.se_pool = nn.AdaptiveAvgPool2d((1,1))
      self.se_fc1 = nn.Linear(planes, planes // ratio)
      self.relu = nn.ReLU(inplace=True)
      self.se_fc2 = nn.Linear(planes // ratio, planes)

  def forward(self, x):

      out = self.se_pool(x)
      out = torch.flatten(out, 1)
      out = self.se_fc1(out)
      #print(out.shape)
      out = F.relu(out)
      out = self.se_fc2(out)
      out = torch.sigmoid(out)
      out = out.view(out.size(0), out.size(1), 1, 1)
      #print(x.shape)
      #print(out.shape)
      out = torch.mul(out.expand_as(x), x)

      return out

In [4]:
class ConvBlock(nn.Module):

    def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=True):

        super(ConvBlock, self).__init__()
        self.kernel_size = kernel_size
        self.stride = stride
        pad = 0
        if padding :
            pad = (self.kernel_size - 1) // 2

        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding=pad, bias=False)
        self.batchnorm = nn.BatchNorm2d(out_planes, momentum=0.99)
        self.leaky_relu = nn.LeakyReLU(0.1, inplace=True)

    def forward(self, x):

        output = self.conv(x)
        output = self.batchnorm(output)
        output = self.leaky_relu(output)

        return output

In [5]:
class DarknetBlock(nn.Module):

    def __init__(self, out_planes):

        super(DarknetBlock, self).__init__()
        self.inplanes = out_planes * 2
        self.conv1 = ConvBlock(self.inplanes, out_planes, 1)
        self.conv2 = ConvBlock(out_planes, self.inplanes, 3)
        self.se = SEBlock(self.inplanes, ratio=16)

    def forward(self, x):

        shortcut = x
        output = self.conv1(x)
        output = self.conv2(output)
        output = self.se(output)
        output = output + shortcut

        return output

In [6]:
class Darknet(nn.Module):

    def __init__(self, num_classes=1000):

        super(Darknet, self).__init__()   

        self.conv_block1 = ConvBlock(3, 32, 3, 1)
        self.conv_block2 = ConvBlock(32, 64, 3, 2)

        self.dark_block1 = DarknetBlock(32)

        self.conv_block3 = ConvBlock(64, 128, 3, 2)

        self.dark_layer1 = self._make_blocks(2, 64)
        
        self.conv_block4 = ConvBlock(128, 256, 3, 2)

        self.dark_layer2 = self._make_blocks(2, 128)

        self.conv_block5 = ConvBlock(256, 512, 3, 2)

        self.dark_layer3 = self._make_blocks(2, 256)

        self.conv_block6 = ConvBlock(512, 1024, 3, 2)

        self.dark_layer4 = self._make_blocks(2, 512)

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        
        self.fc = nn.Linear(1024, num_classes)

    def _make_blocks(self, num_blocks, out_planes):
        blocks = []
        for _ in range(num_blocks):
            blocks.append(DarknetBlock(out_planes))

        return nn.Sequential(*blocks)

    def forward(self, x, feature=False):

        output = self.conv_block1(x)
        output = self.conv_block2(output)

        output = self.dark_block1(output)

        output = self.conv_block3(output)

        output = self.dark_layer1(output)

        output = self.conv_block4(output)

        output = self.dark_layer2(output)

        output = self.conv_block5(output)

        output = self.dark_layer3(output)

        output = self.conv_block6(output)

        output = self.dark_layer4(output)

        output = self.avgpool(output)

        output = torch.flatten(output, 1)
        
        if feature:
            return output

        output = self.fc(output)
        
        return output

In [7]:
model = Darknet(num_classes=101)

In [8]:
inputs = torch.randn((1,3,224,224))
outputs = model(inputs)

In [9]:
outputs.shape

torch.Size([1, 101])

In [10]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [11]:
count_parameters(model)

20705145

In [12]:
import apex
print("using apex synced BN")
model = apex.parallel.convert_syncbn_model(model)

using apex synced BN


In [13]:
optimizer = optim.SGD(model.parameters(), lr=1., momentum=0.9, weight_decay=5e-4, nesterov=True)

In [14]:
from apex import amp, optimizers

model, optimizer = amp.initialize(model.cuda(), optimizer, opt_level='O3',keep_batchnorm_fp32=True)

Selected optimization level O3:  Pure FP16 training.
Defaults for this optimization level are:
enabled                : True
opt_level              : O3
cast_model_type        : torch.float16
patch_torch_functions  : False
keep_batchnorm_fp32    : False
master_weights         : False
loss_scale             : 1.0
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O3
cast_model_type        : torch.float16
patch_torch_functions  : False
keep_batchnorm_fp32    : True
master_weights         : False
loss_scale             : 1.0


In [15]:
import torchvision
from torchvision import datasets, transforms
#from autoaugment import ImageNetPolicy

def get_transform(random_crop=True):
    normalize = transforms.Normalize(
        #mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
        #std=[x / 255.0 for x in [63.0, 62.1, 66.7]]
        [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
        )
    transform = []
    transform.append(transforms.Resize(256))
    if random_crop:
        #transform.append(transforms.RandomRotation(30))
        transform.append(transforms.RandomResizedCrop(224))
        transform.append(transforms.RandomHorizontalFlip())
        transform.append(ImageNetPolicy())
        #transform.append(transforms.ColorJitter(hue=.05, saturation=.05),)
    else:
        transform.append(transforms.CenterCrop(224))
    transform.append(transforms.ToTensor())
    transform.append(normalize)
    return transforms.Compose(transform)
'''
class CustomDataset(datasets.ImageFolder):
    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image_id, sample, target) where target is class_index of
                the target class.
        """
        path, target = self.samples[index]
        #print(path)
        #print(target)
        sample = self.loader(path)
        if self.transform is not None:
            sample = self.transform(sample)
        if self.target_transform is not None:
            target = self.target_transform(target)

        image_id = path.split('/')[-1]

        return image_id, sample, target
''' 
data_dir = './data/food101/'
train_data = datasets.ImageFolder(data_dir + 'train', transform=get_transform(random_crop=True))
test_data = datasets.ImageFolder(data_dir + 'test', transform=get_transform(random_crop=False))
#testdata=datasets.ImageFolder(data_dir + r'\test', transform=test_transforms)

#trainloader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
#testloader = torch.utils.data.DataLoader(test_data, batch_size=128)
#test_loader=torch.utils.data.DataLoader(testdata, batch_size=64)    

In [16]:
from torch.utils import data

#data_dir = 'train/train_data'

#dataset = CustomDataset(data_dir, transform=get_transform(random_crop=True))

#split_size = int(len(dataset) * 0.9)
#train_set, valid_set = data.random_split(dataset, [split_size, len(dataset) - split_size])
tr_loader = data.DataLoader(dataset=train_data,
                            batch_size=256,
                            #sampler = RandomIdentitySampler(train_set, 4),
                            shuffle=True,
                            pin_memory=True,
                            num_workers=16)

val_loader = data.DataLoader(dataset=test_data,
                             batch_size=256,
                             shuffle=False,
                             pin_memory=True,                             
                             num_workers=16)

In [18]:
for _ in range(2):
    inputs, labels = next(iter(tr_loader))
    
    print(inputs.shape)
    print(labels)
    

torch.Size([256, 3, 224, 224])
tensor([ 38,  63,   8,  36,  35,  89,  54,  36,  96,  63,  48,  12,  42,  91,
         80,  88,  24,   9,  31,  46,  25,  48,  39,  76,   3,  56,  52,  93,
         55,   7,  89,  76,   7,  10,  76,  31,   5,  29,   8,  84,  94,  84,
          7,   6,  31,  58,  60,  70,   0,  52,   2,  12,  27,  92,  19,  12,
         15,   5,  17,  28,  99,  72,  15,  30,  67,  86,  57,  58,  47,  55,
         56,  29,  31,  85,   2,   2,  26,  56,  83,  46,  34,  19,  58,  51,
         34,  98,  79,  18,  15,  76,  11,  40,  85,  47,  38,   5,  36,  27,
         82,   5,   9,  99,  81,  87,  81,   1,  56,  73,   4,  34,  55,   2,
         72,  71,   6,  75,  50,  67,  91,  94,  72,   6,  27,  28,  65,  93,
         32,  56,  13,  56,  29,  29,  90,  25,  30,  73,  86,  32,  23,  73,
         42,  86,  57,  65,  50,  51,  78,  60,  37,  95,  41,  25,  94,  27,
         42,  72,  84,  40,  56,  64,  41,  47,  37,  83,  89,  64,  76,  37,
         66,  98,  14,  19,  24, 

In [17]:
import warnings
warnings.filterwarnings('ignore')

In [18]:
criterion = nn.CrossEntropyLoss().cuda()

In [19]:
torch.cuda.synchronize()
model.train()
for _ in range(2):
    inputs, labels = next(iter(tr_loader))
    print(1)
    inputs = inputs.cuda(non_blocking=True)        
    labels = labels.cuda(non_blocking=True)    
    print(2)    
    logits = model(inputs)
    print(3)                       
    loss = criterion(logits, labels)                   
    print(4)                   
    with amp.scale_loss(loss, optimizer) as scaled_loss:
        scaled_loss.backward()
    print(5)                            
    model.zero_grad()
    print(10)                                
torch.cuda.synchronize()

1
2
3
4
5
10
1
2
3
4
5
10


In [20]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

In [21]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [22]:
len(tr_loader)

296

In [23]:
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=len(tr_loader)
                                                , epochs=30, pct_start=0.2)

In [24]:
import datetime
import time
high = 0.0
epoch_time = AverageMeter('Epoch', ':6.3f')
batch_time = AverageMeter('Batch', ':6.3f')
data_time = AverageMeter('Data', ':6.3f')
losses = AverageMeter('Loss', ':.5f')
learning_rates = AverageMeter('LearningRate', ':.5f')
top1 = AverageMeter('Acc@1', ':6.2f')
top5 = AverageMeter('Acc@5', ':6.2f')

for epoch in range(30):  # loop over the dataset multiple times
    time_ = datetime.datetime.now()    
    model.train()
    running_loss = 0.0
    running_corrects = 0
    total = 0
    progress = ProgressMeter(
        len(tr_loader),
        [batch_time, data_time, losses, top1, top5, learning_rates],
        prefix="Epoch: [{}]".format(epoch))
    
    end = time.time()    
    for i, (inputs, labels) in enumerate(tr_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        #print(inputs.shape)
        #print(labels.shape)
        data_time.update(time.time() - end)
        inputs = inputs.cuda(non_blocking=True)
        labels = labels.cuda(non_blocking=True)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        #_, preds = torch.max(outputs, 1)
        #loss.backward()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
            
        optimizer.step()
        scheduler.step()
        # print statistics
        acc1, acc5 = accuracy(outputs, labels, topk=(1, 5))
        losses.update(loss.item(), inputs.size(0))
        learning_rates.update(scheduler.get_lr()[0])        
        top1.update(acc1[0], inputs.size(0))
        top5.update(acc5[0], inputs.size(0))

        
        batch_time.update(time.time() - end)
        if i % 100 == 99:    # print every 2000 mini-batches
            progress.display(i)
            #running_loss = 0.0
    elapsed = datetime.datetime.now() - time_
    print('{} elapsed for {}'.format(elapsed, epoch+1))

    
print('Finished Training')

Epoch: [0][ 99/296]	Batch 329.551 (173.891)	Data 329.257 (173.591)	Loss 4.48752 (4.55249)	Acc@1   2.73 (  2.68)	Acc@5  13.28 ( 10.00)	LearningRate 0.00475 (0.00425)
Epoch: [0][199/296]	Batch 613.670 (320.220)	Data 613.376 (319.922)	Loss 4.37504 (4.48318)	Acc@1   3.91 (  3.65)	Acc@5  14.84 ( 12.75)	LearningRate 0.00698 (0.00500)
0:14:10.467842 elapsed for 1
Epoch: [1][ 99/296]	Batch 45.864 (349.759)	Data 45.569 (349.460)	Loss 4.15750 (4.40601)	Acc@1   7.81 (  4.66)	Acc@5  21.88 ( 15.58)	LearningRate 0.01531 (0.00785)
Epoch: [1][199/296]	Batch 84.419 (292.406)	Data 84.095 (292.104)	Loss 4.23987 (4.37232)	Acc@1   7.81 (  5.12)	Acc@5  23.44 ( 16.72)	LearningRate 0.02134 (0.00995)
0:01:59.898964 elapsed for 2
Epoch: [2][ 99/296]	Batch 46.779 (227.691)	Data 46.472 (227.386)	Loss 3.95070 (4.30554)	Acc@1  11.72 (  6.05)	Acc@5  28.52 ( 18.89)	LearningRate 0.03572 (0.01515)
Epoch: [2][199/296]	Batch 85.767 (207.291)	Data 85.456 (206.986)	Loss 3.88253 (4.26968)	Acc@1  12.50 (  6.56)	Acc@5  28.12 

In [25]:
torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'loss': loss,    
    
}, './checkpoint/food_darknet25_fp16_5e4_ep030.b0.pth')

In [26]:
def classification_val(model, val_loader):
    correct = 0
    total = 0    
    
    model.eval()
    with torch.no_grad():
        for data in val_loader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct/total

In [27]:
cls_results = classification_val(model, val_loader) 

In [28]:
cls_results

0.8268118811881188

In [27]:
np.mean(cls_results)

0.8161584158415841

In [31]:
cls_results

[0.8245940594059405,
 0.8245940594059405,
 0.8245940594059405,
 0.8245940594059405,
 0.8245940594059405,
 0.8245940594059405,
 0.8245940594059405,
 0.8245940594059405,
 0.8245940594059405,
 0.8245940594059405]

In [31]:
np.mean(cls_results)

0.7828868760840239

In [32]:
np.std(cls_results)

0.002308887647354633

In [33]:
cls_results

[0.7867604548082482,
 0.7841587974561572,
 0.783773366737329,
 0.7795336288302178,
 0.7843515128155714,
 0.7842551551358643,
 0.7832915783387936,
 0.7815571401040663,
 0.7826170745808441,
 0.778570052033147]

In [29]:
def val_retrieval(model, val_loader):
    feats = None
    data_ids = None

    model.eval()
    with torch.no_grad():
        for idx, (images, labels) in enumerate(val_loader):
            images = images.to(device)
            #labels = labels.to(device)

            feat = model(images, feature=True)
            feat = feat.detach().cpu().numpy()

            feat = feat/np.linalg.norm(feat, axis=1)[:, np.newaxis]

            if feats is None:
                feats = feat
            else:
                feats = np.append(feats, feat, axis=0)

            if data_ids is None:
                data_ids = labels
            else:
                data_ids = np.append(data_ids, labels, axis=0)

        score_matrix = feats.dot(feats.T)
        np.fill_diagonal(score_matrix, -np.inf)
        top1_reference_indices = np.argmax(score_matrix, axis=1)

        top1_reference_ids = [
            [data_ids[idx], data_ids[top1_reference_indices[idx]]] for idx in
            range(len(data_ids))]

    total_count = len(top1_reference_ids)
    correct = 0
    for ids in top1_reference_ids:
        if ids[0] == ids[1]:
            correct += 1        
    return correct/total_count

In [30]:
val_retrieval(model, val_loader)

0.760950495049505

In [35]:
retrieval_result = [val_retrieval(model, val_loader) for i in range(3)]

In [36]:
np.mean(retrieval_result)

0.7564752475247524

In [37]:
retrieval_result

[0.7564752475247525, 0.7564752475247525, 0.7564752475247525]

In [36]:
np.mean(retrieval_result)

0.6494603969936404

In [37]:
np.std(retrieval_result)

0.00523044716416355

In [38]:
retrieval_result

[0.6546540759298516,
 0.6394295625361341,
 0.6442474465214878,
 0.6489689728271343,
 0.6478126806706495,
 0.6587974561572557,
 0.6544613605704375,
 0.6481981113894777,
 0.6481017537097706,
 0.6499325496242051]