In [1]:
import argparse
import numpy as np
import os
import sys
import inspect
import random
import tabulate
import time
import torch
import torch.nn.functional as F
from torchsummary import summary

current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parent_dir = os.path.dirname(current_dir)
sys.path.insert(0, parent_dir) 

import data
import models
import utils
import regularization

In [2]:
class GlobalArguments():
    
    def __init__(self):
        self.model       = 'ConvFCSimple'
        self.dataset     = 'CIFAR100'
        self.data_path   = '../Data/'
        self.batch_size  = 128
        self.num_workers = 4
        self.transform   = 'VGG'
        self.use_test    = True
        self.ckpt        = '../Checkpoints/ConvFCSimple/CIFAR100_STEP200/0/checkpoint-200.pt'
        self.device      = 0
        self.seed        = 0
        self.dir         = '../Checkpoints/test'
        self.regularizer = None
        
        self.momentum    = 0.9
        self.wd          = 1e-4
        self.cycle       = 200
        self.epochs      = 1600
        self.lr_1        = 0.005
        self.lr_2        = 0.0001
        self.version     = 'classic'
        self.boost_lr    = 'auto'
        self.scheduler   = 'slide'
        self.independent = True
        
args = GlobalArguments()

In [3]:
assert args.cycle % 2 == 0, 'Cycle length should be even'

os.makedirs(args.dir, exist_ok=True)
with open(os.path.join(args.dir, 'fge.sh'), 'w') as f:
    f.write(' '.join(sys.argv))
    f.write('\n')

In [4]:
torch.backends.cudnn.benchmark = True
if args.seed == 0:
    args.seed = random.randint(0, 1000000)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)

device = 'cuda:' + str(args.device) if torch.cuda.is_available() else 'cpu'
torch.cuda.set_device(device)
print ('Device :', device)

Device : cuda:0


In [5]:
if (args.boost_lr == 'auto'):
    boost_lr = 1.0
else:
    boost_lr = float(args.boost_lr)

if   args.dataset == "CIFAR10":
    num_classes = 10
elif args.dataset == "CIFAR100":
    num_classes = 100

if   args.version == 'classic':
    criterion = torch.nn.MSELoss(reduction='none')
elif args.version == 'simple':
    criterion = torch.nn.CrossEntropyLoss(reduction='none')
else:
    raise AssertionError('I don`t know this implementation of gradient boosting')

if   args.scheduler == 'cyclic':
    scheduler = utils.cyclic_learning_rate
elif args.scheduler == 'linear':
    scheduler = utils.linear_learning_rate
elif args.scheduler == 'slide':
    scheduler = utils.slide_learning_rate
else:
    raise AssertionError('I don`t know such scheduler')

In [6]:
architecture = getattr(models, args.model)
model = architecture.base(num_classes=num_classes, **architecture.kwargs)

checkpoint = torch.load(args.ckpt)
# start_epoch = checkpoint['epoch'] + 1
start_epoch = checkpoint['epoch']
model.load_state_dict(checkpoint['model_state'])
model.cuda()

summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 34, 34]             224
              ReLU-2            [-1, 8, 34, 34]               0
         MaxPool2d-3            [-1, 8, 32, 32]               0
            Conv2d-4            [-1, 8, 32, 32]             584
              ReLU-5            [-1, 8, 32, 32]               0
         MaxPool2d-6            [-1, 8, 15, 15]               0
            Conv2d-7            [-1, 8, 15, 15]             584
              ReLU-8            [-1, 8, 15, 15]               0
         MaxPool2d-9              [-1, 8, 7, 7]               0
           Linear-10                  [-1, 100]          39,300
             ReLU-11                  [-1, 100]               0
           Linear-12                  [-1, 100]          10,100
Total params: 50,792
Trainable params: 50,792
Non-trainable params: 0
---------------------------------

In [7]:
loaders, num_classes = data.loaders_gb(
    args.dataset,
    args.data_path,
    args.batch_size,
    args.num_workers,
    args.transform,
    args.use_test,
    shuffle_train=True,
    logits_generator=regularization.dataset_logits_generator(
        model,
        transform=getattr(getattr(data.Transforms, args.dataset), args.transform).train,
        batch_size=args.batch_size),
)

Files already downloaded and verified
Initial logits :
Shape : torch.Size([50000, 100]) Logits_mean : 0.03137224540114403
Max : 27.365541458129883 Min : -31.16400718688965
You are going to run models on the test set. Are you sure?
Files already downloaded and verified


In [9]:
architecture = getattr(models, 'vgg16_bn')
model = architecture.base(num_classes=num_classes, **architecture.kwargs)
model.cuda()
summary(model, (3, 32, 32))


optimizer = torch.optim.SGD(
    model.parameters(),
    lr=args.lr_1,
    momentum=args.momentum,
    weight_decay=args.wd
)
# optimizer.load_state_dict(checkpoint['optimizer_state'])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,792
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
            Conv2d-4           [-1, 64, 32, 32]          36,928
       BatchNorm2d-5           [-1, 64, 32, 32]             128
              ReLU-6           [-1, 64, 32, 32]               0
         MaxPool2d-7           [-1, 64, 16, 16]               0
            Conv2d-8          [-1, 128, 16, 16]          73,856
       BatchNorm2d-9          [-1, 128, 16, 16]             256
             ReLU-10          [-1, 128, 16, 16]               0
           Conv2d-11          [-1, 128, 16, 16]         147,584
      BatchNorm2d-12          [-1, 128, 16, 16]             256
             ReLU-13          [-1, 128, 16, 16]               0
        MaxPool2d-14            [-1, 12

In [10]:
# test_res = utils.test(loaders['test'], model, criterion)
# print ('Initial quality: ', test_res['accuracy'])

ensemble_size = 0
predictions_sum = np.zeros((len(loaders['test'].dataset), num_classes))

columns = ['ep', 'lr', 'tr_loss', 'tr_acc', 'te_nll', 'te_loss', 'te_acc', 'ens_acc', 'time']

if args.regularizer is None:
    regularizer = None
elif args.regularizer == 'MSE2':
    regularizer = regularization.TwoModelsMSE(model, args.reg_wd).reg

utils.save_checkpoint(
    args.dir,
    start_epoch,
    name='fge',
    model_state=model.state_dict(),
    optimizer_state=optimizer.state_dict(),
    boost_weight=1.)

logits_sum, targets = utils.logits(loaders['test'], model)
print ('Inintial accuracy :', torch.eq(logits_sum.argmax(dim=1), targets).float().mean())

Inintial accuracy : tensor(0.0089)


In [None]:
for epoch in range(args.epochs):
    time_ep = time.time()
    lr_schedule = scheduler(epoch, args.cycle, args.lr_1, args.lr_2)
    
    train_res = utils.train_gb(
        loaders['train'],
        model,
        optimizer,
        criterion,
        lr_schedule=lr_schedule,
        regularizer=regularizer,
        gb_version=args.version,
        boost_lr=boost_lr)
    test_res = utils.test_gb(
        loaders['test'],
        model,
        criterion,
        boost_lr=boost_lr)
    time_ep = time.time() - time_ep
    ens_acc = None

    if (epoch + 1) % args.cycle == 0:
        if args.boost_lr == 'auto':
            os.makedirs(args.dir + '/boost_lr', exist_ok=True)
            boost_lr = regularization.adjust_boost_lr(
                loaders['train'],
                model,
                save_info=args.dir + '/boost_lr/' + str(epoch) + '.pt')
        print ('Boost_lr : ', boost_lr)
        ensemble_size += 1
        logits, targets = utils.logits(loaders['test'], model)
        logits_sum += boost_lr * logits
        ens_acc = 100.0 * torch.eq(logits_sum.argmax(dim=1), targets).float().mean().item()
        
        regularization.logits_info(logits, logits_sum=logits_sum)
        
        utils.save_checkpoint(
            args.dir,
            start_epoch + epoch,
            name='fge',
            model_state=model.state_dict(),
            optimizer_state=optimizer.state_dict(),
            boost_weight=boost_lr
        )

#     if args.regularizer is not None and (epoch + 1) % (args.cycle) == 0:
#         regularizer = regularization.TwoModelsMSE(model, args.reg_wd).reg
#     if args.regularizer is not None and (epoch + 1) % (args.cycle // 2) == args.cycle // 2:
#         regularizer = None

#     if args.weighted_samples is not None and (epoch + 1) % args.cycle == 0:
#     if (epoch + 1) % args.cycle == 0:
        loaders['train'].dataset.update_logits(
            boost_lr,
            logits_generator=regularization.dataset_logits_generator(
                model,
                transform=getattr(getattr(
                        data.Transforms,
                        args.dataset),
                    args.transform).train,
                batch_size = args.batch_size))
        loaders['test'].dataset.update_logits(
            boost_lr,
            logits_generator=regularization.dataset_logits_generator(
                model,
                transform=getattr(getattr(
                        data.Transforms,
                        args.dataset),
                    args.transform).test,
                batch_size = args.batch_size))
        
        if args.independent:
            print ("I am making a new model")
            model = architecture.base(num_classes=num_classes, **architecture.kwargs)
            model.cuda()
            optimizer = torch.optim.SGD(
                model.parameters(),
                lr=args.lr_1,
                momentum=args.momentum,
                weight_decay=args.wd
            )
        
    values = [epoch, lr_schedule(1.0), train_res['loss'], train_res['accuracy'], test_res['nll'], test_res['loss'], test_res['accuracy'], ens_acc, time_ep]
    table = tabulate.tabulate([values], columns, tablefmt='simple', floatfmt='9.6f')
    if epoch % 40 == 0:
        table = table.split('\n')
        table = '\n'.join([table[1]] + table)
    else:
        table = table.split('\n')[2]
    print(table)

----  ---------  ---------  ---------  ---------  ---------  ---------  ---------  ---------
  ep         lr    tr_loss     tr_acc     te_nll    te_loss     te_acc  ens_acc         time
----  ---------  ---------  ---------  ---------  ---------  ---------  ---------  ---------
   0   0.005000   0.019008  42.860000   0.007442   2.325694  39.590000             27.130669
   1   0.005000   0.007223  42.908000   0.007413   2.325687  39.600000             26.195138
   2   0.005000   0.007140  42.966000   0.007405   2.325686  39.610000             26.354546
   3   0.005000   0.007113  42.948000   0.007402   2.325685  39.590000             26.208734
   4   0.005000   0.007099  42.948000   0.007400   2.325689  39.580000             26.174862
   5   0.005000   0.007091  42.938000   0.007399   2.325677  39.590000             26.249727
   6   0.005000   0.007086  42.974000   0.007398   2.325670  39.570000             26.224368
   7   0.005000   0.007083  42.956000   0.007398   2.325679  39.580000