# Dataloader

In [1]:
'''
Project         : Incremental learning for surgical instrument classification and feature extraction
Lab             : MMLAB, National University of Singapore
contributors    : Mobarak, lalith, mengya
Note            : Dataloader for End-to-End incremental learning, code adopted from our previous work.
'''

import os
import sys
import random
import numpy as np
from glob import glob
from PIL import Image

import torch
from torch.utils.data import Dataset, DataLoader

if sys.version_info[0] == 2:
    import xml.etree.cElementTree as ET
else:
    import xml.etree.ElementTree as ET

class SurgicalClassDataset18_incremental(Dataset):
    def __init__(self, filenames, fine_tune_size = None, is_train=None):
        
        self.is_train = is_train
        self.img_list = []
        
        # Using readlines() 
        for i, txt_file in enumerate(filenames):
            curr_file = open((txt_file), 'r') 
            Lines = curr_file.readlines()  
            if (fine_tune_size is not None) and (i == len(filenames)-1):
                indices = np.random.permutation(len(Lines))
                Lines = [Lines[i] for i in indices[0:fine_tune_size]]
            for line in Lines: self.img_list.append(line. rstrip())
            #print(self.img_list)
            curr_file.close()
        
    def __len__(self): return len(self.img_list)

    def __getitem__(self, index):
        _img_dir = self.img_list[index]
        #print(_img_dir)
        _img = Image.open(_img_dir).convert('RGB')
        _target = int(_img_dir[:-4].split('_')[-1:][0])
        _img = np.asarray(_img, np.float32) / 255
        _img = torch.from_numpy(np.array(_img).transpose(2, 0, 1,)).float()
        _target = torch.from_numpy(np.array(_target)).long()
        return _img, _target

# CBS filters

In [2]:
'''
Gaussian and laplacian filters for curicullum learning
'''
import math

import torch
import torch.nn as nn


def get_gaussian_filter(kernel_size=3, sigma=2, channels=3):
    '''
    Gaussian 2D filter
    '''
    # Create a x, y coordinate grid of shape (kernel_size, kernel_size, 2)
    x_coord = torch.arange(kernel_size)
    x_grid = x_coord.repeat(kernel_size).view(kernel_size, kernel_size)
    y_grid = x_grid.t()
    xy_grid = torch.stack([x_grid, y_grid], dim=-1).float()

    mean = (kernel_size - 1)/2.
    variance = sigma**2.

    # Calculate the 2-dimensional gaussian kernel which is the product of two gaussian distributions 
    # for two different variables (in this case called x and y)
    gaussian_kernel = (1./(2.*math.pi*variance)) *\
                      torch.exp( -torch.sum((xy_grid - mean)**2., dim=-1) / (2*variance))

    # Make sure sum of values in gaussian kernel equals 1.
    gaussian_kernel = gaussian_kernel / torch.sum(gaussian_kernel)

    # Reshape to 2d depthwise convolutional weight
    gaussian_kernel = gaussian_kernel.view(1, 1, kernel_size, kernel_size)
    gaussian_kernel = gaussian_kernel.repeat(channels, 1, 1, 1)

    if kernel_size == 3: padding = 1
    elif kernel_size == 5: padding = 2
    else: padding = 0

    gaussian_filter = nn.Conv2d(in_channels=channels, out_channels=channels,
                                kernel_size=kernel_size, groups=channels,
                                bias=False, padding=padding)

    gaussian_filter.weight.data = gaussian_kernel
    gaussian_filter.weight.requires_grad = False
    
    return gaussian_filter


def get_laplaceOfGaussian_filter(kernel_size=3, sigma=2, channels=3):
    '''
    laplacian 2D filter
    '''
    # Create a x, y coordinate grid of shape (kernel_size, kernel_size, 2)
    x_coord = torch.arange(kernel_size)
    x_grid = x_coord.repeat(kernel_size).view(kernel_size, kernel_size)
    y_grid = x_grid.t()
    xy_grid = torch.stack([x_grid, y_grid], dim=-1).float()
    mean = (kernel_size - 1)/2.

    used_sigma = sigma
    # Calculate the 2-dimensional gaussian kernel which is
    log_kernel = (-1./(math.pi*(used_sigma**4))) \
                        * (1-(torch.sum((xy_grid - mean)**2., dim=-1) / (2*(used_sigma**2)))) \
                        * torch.exp(-torch.sum((xy_grid - mean)**2., dim=-1) / (2*(used_sigma**2)))
       
    # Make sure sum of values in gaussian kernel equals 1.
    log_kernel = log_kernel / torch.sum(log_kernel)

    # Reshape to 2d depthwise convolutional weight
    log_kernel = log_kernel.view(1, 1, kernel_size, kernel_size)
    log_kernel = log_kernel.repeat(channels, 1, 1, 1)

    if kernel_size == 3: padding = 1
    elif kernel_size == 5: padding = 2
    else: padding = 0

    log_filter = nn.Conv2d( in_channels=channels, out_channels=channels, kernel_size=kernel_size, 
                            groups=channels, bias=False, padding=padding)

    log_filter.weight.data = log_kernel
    log_filter.weight.requires_grad = False
    
    return log_filter

# ResNet models

In [3]:
'''
    ResNet (Pytorch implementation), together with curricullum learning filters
    Reference:
    [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Deep Residual Learning for Image Recognition. arXiv:1512.03385
'''

import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        
        self.planes = planes
        self.enable_cbs = False
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut_kernel = True
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def get_new_kernels(self, fil2, fil3, kernel_size, std):
        self.enable_cbs = True
        if (fil2 == 'gau'): 
            self.kernel1 = get_gaussian_filter(kernel_size=kernel_size, sigma= std, channels=self.planes)
        elif (fil2 == 'LOG'): 
            self.kernel1 = get_laplaceOfGaussian_filter(kernel_size=kernel_size, sigma= std, channels=self.planes)

        if (fil3 == 'gau'): 
            self.kernel2 = get_gaussian_filter(kernel_size=kernel_size, sigma= std, channels=self.planes)
        elif (fil3 == 'LOG'): 
            self.kernel2 = get_laplaceOfGaussian_filter(kernel_size=kernel_size, sigma= std, channels=self.planes)

    def forward(self, x):
        out = self.conv1(x)
        
        if self.enable_cbs: out = F.relu(self.bn1(self.kernel1(out)))         
        else: out = F.relu(self.bn1(out))         
        
        out = self.conv2(out)
        
        if self.enable_cbs: out = self.bn2(self.kernel2(out))
        else: out = self.bn2(out)

        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, args):
               
        super(ResNet, self).__init__()
        self.in_planes = 64
        self.std = args.std
        self.enable_cbs = args.use_cbs
        self.factor = args.std_factor
        self.epoch = args.cbs_epoch
        self.kernel_size = args.kernel_size

        self.fil1 = args.fil1
        self.fil2 = args.fil2
        self.fil3 = args.fil3

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.linear = nn.Linear(512*block.expansion, args.num_classes)

        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        if self.enable_cbs: out = F.relu(self.bn1(self.kernel1(out)))
        else: out = F.relu(self.bn1(out))
            
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        
        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = self.linear(out)
        return out


    def get_new_kernels(self, epoch_count):
        if epoch_count % self.epoch == 0 and epoch_count is not 0:
            self.std *= self.factor
        if (self.fil1 == 'gau'): 
            self.kernel1 = get_gaussian_filter(kernel_size=self.kernel_size, sigma= self.std, channels=64)
        elif (self.fil1 == 'LOG'): 
            self.kernel1 = get_laplaceOfGaussian_filter(kernel_size=self.kernel_size, sigma= self.std, channels=64)

        for child in self.layer1.children():
            child.get_new_kernels(self.fil2, self.fil3, self.kernel_size, self.std)

        for child in self.layer2.children():
            child.get_new_kernels(self.fil2, self.fil3, self.kernel_size, self.std)

        for child in self.layer3.children():
            child.get_new_kernels(self.fil2, self.fil3, self.kernel_size, self.std)

        for child in self.layer4.children():
            child.get_new_kernels(self.fil2, self.fil3, self.kernel_size, self.std)



def ResNet18(args): return ResNet(BasicBlock, [2,2,2,2], args)

# def ResNet34(args): return ResNet(BasicBlock, [3,4,6,3], args)
# def ResNet50(args): return ResNet(Bottleneck, [3,4,6,3], args)
# def ResNet101(args):return ResNet(Bottleneck, [3,4,23,3], args)

# def test():
#     net = ResNet18()
#     y = net(torch.randn(1,3,32,32))
#     print(y.size())

# Test function

In [4]:
import os
import copy
import time
import random
import argparse

import numpy as np
import PIL.Image as Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable


def test(args, model, test_loader, class_old, class_novel):
    '''
    arguments: net, test_loader, class_old, class_novel
    return: tcost, acc_avg
    '''

    acc_avg = 0
    num_exp = 0
    tstart = time.clock()

    # set net to eval
    model.eval()
    
    # loss
    if args.dist_loss_act == 'softmax': 
        dist_loss_act = nn.Softmax(dim=1)
    else:
        dist_loss_act = nn.Softmax(dim=1)

    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):

            # prepare target_onehot
            bs = np.shape(target)[0]
            target_onehot = np.zeros(shape = (bs, args.num_classes), dtype=np.int)
            for i in range(bs): target_onehot[i,target[i]] = 1
            target_onehot = torch.from_numpy(target_onehot)
            target_onehot = target_onehot.float()
            
            # indices for combined classes
            class_indices = torch.LongTensor(np.concatenate((class_old, class_novel), axis=0))

            # send image and target to cuda
            if args.cuda:
                data = data.cuda()
                target_onehot = target_onehot.cuda()
                class_indices = class_indices.cuda()
                dist_loss_act = dist_loss_act.cuda()

            # predict output
            output = model(data)

            # calculate output and target one_hot
            output = torch.index_select(output, 1, class_indices)
            output = dist_loss_act(output)
            output = output.cpu().data.numpy()
            target_onehot = torch.index_select(target_onehot, 1, class_indices)
            #target_onehot = target_onehot[:, np.concatenate((class_old, class_novel), axis=0)]

            # calculation accuracy
            acc = np.sum(np.equal(np.argmax(output, axis=-1), np.argmax(target_onehot.cpu().data.numpy(), axis=-1)))
            acc_avg += acc
            num_exp += np.shape(target)[0]

    # calculate average accuracy
    acc_avg /= num_exp
            
    # time calculation
    tend = time.clock()
    tcost = tend - tstart

    return(tcost, acc_avg)

# Train Function

In [5]:
import os
import copy
import time
import random

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable


def train (args, period, model, model_old, train_loader, loss_criterion, \
           optimizer, class_old, class_novel, finetune):
    '''
    arguments: period, net, net_old, train_loader, loss_criterion, loss_activation, optimizer, clss_old, class_novel, finetune
    returns: tcost, loss_avg, acc_avg
    '''

    acc_avg = 0
    num_exp = 0
    loss_avg = 0
    loss_cls_avg = 0
    loss_dist_avg = 0
    tstart = time.clock()

    # set net to train mode
    model.train()
    model_old.train()

    for batch_idx, (data, target) in enumerate(train_loader):

        #optimizer.zero_grad()

        # prepare target_onehot
        bs = np.shape(target)[0]
        target_onehot = np.zeros(shape = (bs, args.num_classes), dtype=np.int)
        for i in range(bs): target_onehot[i,target[i]] = 1
        target_onehot = torch.from_numpy(target_onehot)
        target_onehot = target_onehot.float()

        # indices for combined classes
        class_indices = torch.LongTensor(np.concatenate((class_old, class_novel), axis=0))
        
        # send data to cuda
        if args.cuda:
            data = data.cuda()
            target_onehot = target_onehot.cuda()
            class_indices = class_indices.cuda()

        # predict output
        output = model(data)

        # loss for network
        output_new_onehot = torch.index_select(output, 1, class_indices)
        target_onehot = torch.index_select(target_onehot, 1, class_indices)
        combined_loss = loss_criterion(output_new_onehot, target_onehot)

        ''' ===== Distillation loss based on old net ====='''
        if (period > 0):

            # distillation loss activation
            if args.dist_loss_act == 'softmax': dist_loss_act = nn.Softmax(dim=1)
            if args.cuda: dist_loss_act = dist_loss_act.cuda()
            
            # indices of old class
            if not finetune:
                class_indices = torch.LongTensor(class_old)
                if args.cuda: class_indices = class_indices.cuda()
                    
            # current_network output
            dist = torch.index_select(output, 1, class_indices)
            if args.use_ts: dist = dist/args.tscale

            with torch.no_grad():
                # old network output
                output_old = model_old(data)
                output_old = torch.index_select(output_old, 1, class_indices)
            
            target_dist = Variable(output_old)
            if args.use_ts: target_dist = target_dist/args.tscale
            #loss_dist = loss_criterion(dist, loss_activation(target_dist))
            
            if(args.dist_loss == 'ce'):
                loss_dist = F.binary_cross_entropy(dist_loss_act(dist), dist_loss_act(target_dist))
            else: loss_dist = 0.0
            
        else: loss_dist = 0.0
        '''----------------------------------------------'''

        # loss calculatoin
        loss = combined_loss + args.dist_ratio*loss_dist
        loss_avg += loss.item()
        loss_cls_avg += combined_loss.item()
        if period == 0: loss_dist_avg += 0
        else:loss_dist_avg += loss_dist.item()

        acc = np.sum(np.equal(np.argmax(output_new_onehot.cpu().data.numpy(), axis=-1), np.argmax(target_onehot.cpu().data.numpy(), axis=-1)))
        acc_avg += acc
        num_exp += np.shape(target)[0]

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # average calculation
    loss_avg /= num_exp
    loss_cls_avg /= num_exp
    loss_dist_avg /= num_exp
        
    # average calculation
    acc_avg /= num_exp

    # time calculation
    tend = time.clock()
    tcost = tend - tstart

    return(tcost, loss_avg, acc_avg)

In [6]:
'''
Project         : Incremental learning for surgical instrument classification and feature extraction
Lab             : MMLAB, National University of Singapore
contributors    : Mobarak, lalith, mengya
Note            : Lable smoothing loss, code adopted from our previous work and modified.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F

class CELossWithLS(torch.nn.Module):
    def __init__(self, smoothing=0.1, gamma=3.0, isCos=True, ignore_index=-1):
        super(CELossWithLS, self).__init__()
        self.complement = 1.0 - smoothing
        self.smoothing = smoothing
        self.log_softmax = nn.LogSoftmax(dim=1)
        self.gamma = gamma
        self.ignore_index = ignore_index

    def forward(self, logits, target):
        with torch.no_grad():
            smoothen_ohlabel = target * self.complement + self.smoothing / target.shape[1]
        
        target_labels = torch.argmax(target, dim=1)
        #print(target_labels)
        logs = self.log_softmax(logits[target_labels!=self.ignore_index])
        pt = torch.exp(logs)
        return -torch.sum((1-pt).pow(self.gamma)*logs * smoothen_ohlabel[target_labels!=self.ignore_index], dim=1).mean()

In [7]:
import tqdm

def save_model(args, best_model):
    '''
    save model
    '''
    if not os.path.exists('./weights'): os.mkdir('weights/')
    
    filename = os.path.join('weights', args.log_name + '_model.tar')
    torch.save(best_model.state_dict(), filename)


def main(args):
    
    print('ls', args.use_ls, 'ts', args.use_ts, 'cbs', args.use_cbs)
    # learning rate schedules
    schedules = range(args.schedule_interval, args.epoch_base, args.schedule_interval)

    # class order in icremental learning
    class_order = np.arange(args.num_classes) #np.random.permutation(args.num_class)
    print('class order:', class_order)

    # check for pre-trained model
    model_path = args.checkpointfile + '_%d_%s%s' % (0, ''.join(str(e) for e in class_order[args.num_class_novel[0]:args.num_class_novel[1]]), '.pkl')
    flag_model = os.path.exists(model_path)

    # network
    model = ResNet18(args)
    model_old = copy.deepcopy(model)
    
    # curicullum learning
    if args.use_cbs:
        model.get_new_kernels(0)
        model_old.get_new_kernels(0)  
    
    # loss
    if args.use_ls: 
        loss_criterion = CELossWithLS(smoothing = 0.1, gamma=0.0, isCos=False, ignore_index=-1)
    else: 
        loss_criterion = CELossWithLS(smoothing= 0.0, gamma=0.0, isCos=False, ignore_index=-1)
        
    # gpu
    num_gpu = torch.cuda.device_count()
    if num_gpu > 0:
        device_ids = np.arange(num_gpu).tolist()
        print('device_ids:', device_ids)
        model = nn.DataParallel(model, device_ids=device_ids).cuda()
        model_old = nn.DataParallel(model_old, device_ids=device_ids).cuda()
        loss_criterion = loss_criterion.cuda()
    else: print('only cpu is available')
        
 

    # initializing classes, accuracy and memory array
    memory_train = []                                  # train memory array
    class_old = np.array([], dtype=int)                # old class array
    acc_nvld_basic = np.zeros((args.period_train))     # accuracy list
    acc_nvld_finetune = np.zeros((args.period_train))  # accuracy list

    
    for period in range(args.period_train):

        print('===================== period = %d ========================='%(period))

        # current 10 classes
        class_novel = class_order[args.num_class_novel[period]:args.num_class_novel[period+1]]
        print('class_novel:', class_novel)

        # combined train dataloader
        combined_train_files = memory_train + args.novel_train_files[period:period+1]
        combined_train_dataset = SurgicalClassDataset18_incremental(filenames= combined_train_files, is_train=True)
        combined_train_loader = DataLoader(dataset=combined_train_dataset, batch_size= args.batch_size, shuffle=True, num_workers=2, drop_last=False)
        print('train files: \t size: ', len(combined_train_loader.dataset), ' , files: ', combined_train_files)

        # test dataloader
        combined_test_files = args.novel_test_files[0:period+1]
        test_dataset = SurgicalClassDataset18_incremental(filenames= combined_test_files, is_train=False)
        test_loader = DataLoader(dataset=test_dataset, batch_size= args.batch_size, shuffle=True, num_workers=2, drop_last=False)
        print('train files: \t size: ', len(test_loader.dataset), ' , files: ', combined_test_files)

        # initialize variables
        lrc = args.lr
        acc_training = []
        print('current lr = %f' % (lrc))

        # epoch training
        for epoch in range(args.epoch_base):
        
            # load pretrained model
            if period == 0 and flag_model:
                print('load model: %s' % model_path)
                model.load_state_dict(torch.load(model_path))
            
            if args.use_cbs:
                model.module.get_new_kernels(epoch)
                model_old.module.get_new_kernels(epoch)
                model.cuda()
                model_old.cuda()

            ''' ====== training combined ======''' 
            # decaying learning rate
            if epoch in schedules:
                lrc *= args.gamma
                print('current lr = %f' % (lrc))

            # Optimizer
            optimizer = torch.optim.SGD(model.parameters(), lr=lrc, momentum=args.momentum, weight_decay=args.decay)

            # train
            tcost, loss_avg, acc_avg = train(args, period, model, model_old, combined_train_loader, 
                                             loss_criterion, optimizer, class_old, class_novel, False)

            acc_training.append(acc_avg)
            print('Training Period: %d \t Epoch: %d \t time = %.1f \t loss = %.6f \t acc = %.4f' % (period, epoch, tcost, loss_avg, acc_avg))
            '''--------------------------------'''

            ''' ====== Test combined ======'''
            # test model
            tcost, acc_avg = test(args, model, test_loader,class_old, class_novel)

            acc_nvld_basic[period] = acc_avg
            print('Test(n&o)Period: %d \t Epoch: %d \t time = %.1f \t\t\t\t acc = %.4f' % (period, epoch, tcost, acc_avg))

            # exit if pre-trained model / loss converged
            if period == 0 and flag_model: break
            if len(acc_training)>20 and acc_training[-1]>args.stop_acc and acc_training[-5]>args.stop_acc:
                print('training loss converged')
                break
            '''----------------------------'''

        ''' copy net-old for finetuning '''
        model_old = copy.deepcopy(model)
        '''-----------------------------'''

        ''' ===== Finetuning ====='''
        if period > 0:
            
            acc_finetune_train = []
            lrc = args.lr*args.ft_lr_factor # finetune lr
            print('finetune current lr = %f' % (lrc))

            for epoch in range(args.epoch_finetune):

                # fine tune train_dataloaders
                ft_size = (args.num_class_novel[period+1]-args.num_class_novel[period])*args.memory_size
                ft_combined_train_dataset = SurgicalClassDataset18_incremental(filenames= combined_train_files, fine_tune_size = ft_size, is_train=True)
                ft_combined_train_loader = DataLoader(dataset=ft_combined_train_dataset, batch_size= args.batch_size, shuffle=True, num_workers=2, drop_last=False)
                if(epoch == 0):  print('finetune train size:', len(ft_combined_train_loader.dataset))

                ''' ===== training combined =====''' 
                # learning rate
                if epoch in schedules:
                    lrc *= args.gamma
                    print('current lr = %f'%(lrc))

                # optimizer
                # criterion = nn.CrossEntropyLoss()
                optimizer = torch.optim.SGD(model.parameters(), lr=lrc, momentum=args.momentum, weight_decay=args.decay)

                # train
                tcost, loss_avg, acc_avg = train(args, period, model, model_old, ft_combined_train_loader, 
                                                 loss_criterion, optimizer, class_old, class_novel, True)

                acc_finetune_train.append(acc_avg)
                print('Finetune Training Period: %d \t Epoch: %d \t time = %.1f \t loss = %.6f \t acc = %.4f'%(period, epoch, tcost, loss_avg, acc_avg))
                '''------------------------------'''

                ''' ===== Test combined ====='''
                # test
                tcost, acc_avg = test(args, model, test_loader, class_old, class_novel)

                acc_nvld_finetune[period] = acc_avg
                print('Finetune Test(n&o) Period: %d \t Epoch: %d \t time = %.1f \t\t\t\t acc = %.4f' % (period, epoch, tcost, acc_avg))

                if len(acc_finetune_train) > 20 and acc_finetune_train[-1] > args.stop_acc and acc_finetune_train[-5] > args.stop_acc:
                    print('finetune training loss converged')
                    break
                '''--------------------------'''
                
            print('------------------- result ------------------------')
            print('Period: %d, basic acc = %.4f, finetune acc = %.4f' % (period, acc_nvld_basic[period], acc_nvld_finetune[period]))
            print('---------------------------------------------------')

        if period == args.period_train-1:
            print('------------------- ave result ------------------------')
            print('basic acc = %.4f, finetune acc = %.4f' % (np.mean(acc_nvld_basic[1:], keepdims=False), np.mean(acc_nvld_finetune[1:], keepdims=False)))
            print('---------------------------------------------------')

        print('===========================================================')

        # save model
        model_path = args.checkpointfile + '_%d_%s%s' % (0, ''.join(str(e) for e in class_order[args.num_class_novel[0]:args.num_class_novel[period+1]]), '.pkl')
        print('save model: %s' % model_path)
        torch.save(model.state_dict(), model_path)

        ''' ===== random images selection ====='''
        #remove memory files from old runs
        if os.path.exists(('data_files/memory_'+str(period)+'.txt')): 
            os.remove(('data_files/memory_'+str(period)+'.txt'))

        curr_file = open((args.novel_train_files[period]), 'r') 
        memory_file = open(('data_files/memory_'+str(period)+'.txt'), 'a')
        Lines = curr_file.readlines()        
        indices = np.random.permutation(len(Lines))
        Lines = [Lines[i] for i in indices[0:((args.num_class_novel[period+1]-args.num_class_novel[period])*args.memory_size)]]
        for line in Lines: memory_file.write(line)
        curr_file.close()
        memory_file.close()

        # add new memory file to memory train list
        memory_train.append('data_files/memory_'+str(period)+'.txt')
        print('memory_train', memory_train)
        '''------------------------------------'''

        #append new class images (create new)
        class_old = np.append(class_old, class_novel, axis=0)
    
    print('acc_base    : ', acc_nvld_basic)     # accuracy list
    print('acc_finetune: ', acc_nvld_finetune)

    print('xxx')
    
#     if args.save_model:
#         print('saving_model')
#         save_model(args, best_model)
#     else: print('save is disabled')

In [8]:
import os
import argparse

import torch


os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2"

#python3 main.py --dataset cifar10 --alg res --data ./data/

def seed_everything(seed=27):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

if __name__ == '__main__':
    
    # File locations 
    novel_train_files = ['data_files/class0_8_train.txt', 'data_files/class9_10_train.txt']
    novel_test_files = ['data_files/class0_8_test.txt', 'data_files/class9_10_test.txt']
    #novel_train_files = ['data_files/class0_10_train.txt']
    #novel_test_files = ['data_files/class0_10_test.txt']
    #novel_train_files = ['data_files/class0_8_train.txt']
    #novel_test_files = ['data_files/class0_8_test.txt']
    
    
    '''--------------------------------------------------- Arguments ------------------------------------------------------------'''
    parser = argparse.ArgumentParser(description='Incremental learning for feature extraction')

    # incremental learning
    parser.add_argument('--epoch_base',         type=int,       default=30,          help='30')
    parser.add_argument('--epoch_finetune',     type=int,       default=15,          help='15')
    parser.add_argument('--batch_size',         type=int,       default=20,          help='20')
    parser.add_argument('--period_train',       type=int,       default=2,           help='2')
    parser.add_argument('--num_classes',        type=int,       default=11,          help='11')
    parser.add_argument('--num_class_novel',                    default=[0,9,11],    help='[0,9,11]')
    parser.add_argument('--memory_size',                        default=50,          help='50')

    parser.add_argument('--stop_acc',           type=float,     default=0.998,       help='number of epochs')

    # model
    parser.add_argument('--alg',                type=str,       default='res',       help='res')

    # datasets
    parser.add_argument('--novel_train_files',  default=novel_train_files,           help='list of train files')
    parser.add_argument('--novel_test_files',   default=novel_test_files,            help='list of test files')

    # learning rate
    parser.add_argument('--schedule_interval',  type=int,       default=3,           help='decay epoch rate: 3')
    parser.add_argument('--lr',                 type=float,     default=0.001,       help='learn rate: 0.001') 
    parser.add_argument('--gamma',              type=float,     default=0.8,         help='decay lr factor: 0.8')
    parser.add_argument('--ft_lr_factor',       type=float,     default=0.1,         help='ft learn rate: 0.1')
    
    # loss
    parser.add_argument('--dist_loss',          type=str,       default='ce',        help='dist_loss')
    parser.add_argument('--dist_loss_act',      type=str,       default='softmax',   help='dist_loss_act')
    parser.add_argument('--dist_ratio',         type=float,     default=0.5,         help='dist_loss_ratio')
    
    # optimizer
    parser.add_argument('--momentum',           type=float,     default=0.6,         help='learning momentum') 
    parser.add_argument('--decay',              type=float,     default=0.0001,      help='learning rate')
    
    # Label smoothing
    parser.add_argument('--use_ls',             type=bool,      default=True,        help='list of test files')

    # Temperature scaling
    parser.add_argument('--use_ts',             type=bool,      default=False,       help='use temp_scale')
    parser.add_argument('--tscale',             type=float,     default=3.0,         help='Temp scaling')
    
    # CBS ARGS
    parser.add_argument('--use_cbs',            type=bool,      default=True,       help='use CBS')
    parser.add_argument('--std',                type=float,     default=1.0,         help='')
    parser.add_argument('--std_factor',         type=float,     default=0.9,         help='')
    parser.add_argument('--cbs_epoch',          type=int,       default=5,           help='')
    parser.add_argument('--kernel_size',        type=int,       default=3,           help='')
    parser.add_argument('--fil1',               type=str,       default='LOG',       help='gau, LOG')
    parser.add_argument('--fil2',               type=str,       default='gau',       help='gau, LOG')
    parser.add_argument('--fil3',               type=str,       default='gau',       help='gau, LOG')
    
    parser.add_argument('--save_model',         type=bool,      default=False,       help='store_true')
    parser.add_argument('--checkpointfile',     type=str,       default='checkpoint/incremental/inc_ResNet18_cbs_ls')
   
    args = parser.parse_args(args=[])
    '''-------------------------------------------------------------------------------------------------------------------------'''
    
    if torch.cuda.is_available(): args.cuda = True
    
    seed_everything()
    main(args)

#python3 main.py --dataset cifar10 --alg res --data ./data/

ls True ts False cbs True
class order: [ 0  1  2  3  4  5  6  7  8  9 10]
device_ids: [0, 1, 2]
class_novel: [0 1 2 3 4 5 6 7 8]
train files: 	 size:  6785  , files:  ['data_files/class0_8_train.txt']
train files: 	 size:  1260  , files:  ['data_files/class0_8_test.txt']
current lr = 0.001000
Training Period: 0 	 Epoch: 0 	 time = 192.6 	 loss = 0.086081 	 acc = 0.4352
Test(n&o)Period: 0 	 Epoch: 0 	 time = 12.7 				 acc = 0.2992
Training Period: 0 	 Epoch: 1 	 time = 191.1 	 loss = 0.072062 	 acc = 0.5889
Test(n&o)Period: 0 	 Epoch: 1 	 time = 12.7 				 acc = 0.2968
Training Period: 0 	 Epoch: 2 	 time = 191.1 	 loss = 0.065662 	 acc = 0.6435
Test(n&o)Period: 0 	 Epoch: 2 	 time = 12.7 				 acc = 0.2913
current lr = 0.000800
Training Period: 0 	 Epoch: 3 	 time = 191.2 	 loss = 0.061315 	 acc = 0.6865
Test(n&o)Period: 0 	 Epoch: 3 	 time = 12.7 				 acc = 0.2817
Training Period: 0 	 Epoch: 4 	 time = 190.3 	 loss = 0.058915 	 acc = 0.7105
Test(n&o)Period: 0 	 Epoch: 4 	 time = 12.8 		

Training Period: 1 	 Epoch: 20 	 time = 26.2 	 loss = 0.070720 	 acc = 0.7178
Test(n&o)Period: 1 	 Epoch: 20 	 time = 14.8 				 acc = 0.3308
current lr = 0.000210
Training Period: 1 	 Epoch: 21 	 time = 26.3 	 loss = 0.070455 	 acc = 0.7105
Test(n&o)Period: 1 	 Epoch: 21 	 time = 14.7 				 acc = 0.3247
Training Period: 1 	 Epoch: 22 	 time = 26.3 	 loss = 0.069058 	 acc = 0.7164
Test(n&o)Period: 1 	 Epoch: 22 	 time = 14.7 				 acc = 0.3315
Training Period: 1 	 Epoch: 23 	 time = 26.3 	 loss = 0.069533 	 acc = 0.7281
Test(n&o)Period: 1 	 Epoch: 23 	 time = 14.7 				 acc = 0.3329
current lr = 0.000168
Training Period: 1 	 Epoch: 24 	 time = 26.3 	 loss = 0.069579 	 acc = 0.7295
Test(n&o)Period: 1 	 Epoch: 24 	 time = 14.8 				 acc = 0.3329
Training Period: 1 	 Epoch: 25 	 time = 26.3 	 loss = 0.073448 	 acc = 0.7178
Test(n&o)Period: 1 	 Epoch: 25 	 time = 14.8 				 acc = 0.3267
Training Period: 1 	 Epoch: 26 	 time = 26.3 	 loss = 0.073862 	 acc = 0.7120
Test(n&o)Period: 1 	 Epoch: 26 	

In [9]:
if args.use_ts: print('a')

a


In [None]:
epoch count: 0 	 accuracy: 42.86
best acc: 0 	 best acc: 42.86
epoch count: 1 	 accuracy: 48.08
best acc: 1 	 best acc: 48.08
epoch count: 2 	 accuracy: 50.50
best acc: 2 	 best acc: 50.50
epoch count: 3 	 accuracy: 57.78
best acc: 3 	 best acc: 57.78
epoch count: 4 	 accuracy: 58.14
best acc: 4 	 best acc: 58.14
epoch count: 5 	 accuracy: 63.66
best acc: 5 	 best acc: 63.66
epoch count: 6 	 accuracy: 63.52
best acc: 5 	 best acc: 63.66
epoch count: 7 	 accuracy: 62.68
best acc: 5 	 best acc: 63.66
epoch count: 8 	 accuracy: 67.59
best acc: 8 	 best acc: 67.59
epoch count: 9 	 accuracy: 62.43
best acc: 8 	 best acc: 67.59
epoch count: 10 	 accuracy: 68.83
best acc: 10 	 best acc: 68.83
epoch count: 11 	 accuracy: 70.76
best acc: 11 	 best acc: 70.76
epoch count: 12 	 accuracy: 70.52
best acc: 11 	 best acc: 70.76
epoch count: 13 	 accuracy: 66.54
best acc: 11 	 best acc: 70.76
epoch count: 14 	 accuracy: 67.69
best acc: 11 	 best acc: 70.76