In [3]:
 %%writefile Finetune_SGD_EBLL.py


from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as pltD
import time
import copy
import os
import shutil
import sys
sys.path.append('../utils')
from ImageFolderTrainVal import *
from AlexNet_EBLL import *
#from test_network import *
from SGD_Training import *
import torch.nn.functional as F
#from Elastic_utils import Elastic_Training
import pdb

Overwriting Finetune_SGD_EBLL.py


In [4]:
 %%writefile -a Finetune_SGD_EBLL.py

def add_task_autoencoder_for_training(current_model):
    
    new_model=torch.nn.Module()
    new_model.add_module('features',current_model.features)
    new_model.add_module('autoecnoder',AutoEncoder(256 * 6 * 6),100)
    new_model.add_module('classifier',current_model.classifier)

Appending to Finetune_SGD_EBLL.py


In [5]:
 %%writefile -a Finetune_SGD_EBLL.py


def exp_lr_scheduler(optimizer, epoch, init_lr=0.0008, lr_decay_epoch=45):
    """Decay learning rate by a factor of 0.1 every lr_decay_epoch epochs."""
    lr = init_lr * (0.1**(epoch // lr_decay_epoch))
    print('lr is '+str(lr))
    if epoch % lr_decay_epoch == 0:
        print('LR is set to {}'.format(lr))

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    return optimizer

Appending to Finetune_SGD_EBLL.py


In [8]:
 %%writefile -a Finetune_SGD_EBLL.py
    
    
def distillation_loss(y, teacher_scores, T, scale):
    """Computes the distillation loss (cross-entropy).
     
    """
    
    maxy,xx=y.max(1)
    maxy=maxy.view(y.size(0),1)
    norm_y=y-maxy.repeat(1,scale)
    ysafe=norm_y/T
    exsafe=torch.exp(ysafe)
    sumex=exsafe.sum(1)
    ######Tscores
    maxT,xx=teacher_scores.max(1)
    maxT=maxT.view(maxT.size(0),1)
    teacher_scores=teacher_scores-maxT.repeat(1,scale)
    p_teacher_scores=F.softmax(teacher_scores)   
    p_teacher_scores=p_teacher_scores.pow(1/T)
    p_t_sum=p_teacher_scores.sum(1)
    p_t_sum=p_t_sum.view(p_t_sum.size(0),1)
    p_teacher_scores=p_teacher_scores.div(p_t_sum.repeat(1,scale))
    #  Y = sum(sum(sum(log(sumex) - sum(c .* x_safe,3),1),2),4) ;
    
    loss=torch.sum(torch.log(sumex)-torch.sum(p_teacher_scores*ysafe,1))
   
    loss=loss/teacher_scores.size(0)
    return loss

Appending to Finetune_SGD_EBLL.py


In [10]:
 %%writefile -a Finetune_SGD_EBLL.py



def train_autoencoder(model,optimizer,task_criterion,encoder_criterion, lr_scheduler,lr,dset_loaders,dset_sizes,use_gpu, num_epochs,exp_dir='./',resume='',alpha=1e-6):
    best_model = model
    best_acc = 10e5#arbitrary big number
    if os.path.isfile(resume):
        print("=> loading checkpoint '{}'".format(resume))
        checkpoint = torch.load(resume)
        start_epoch = checkpoint['epoch']

        model.load_state_dict(checkpoint['state_dict'])
        
        print('load')
        optimizer.load_state_dict(checkpoint['optimizer'])
 
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(resume, checkpoint['epoch']))
    else:
            start_epoch=0
            print("=> no checkpoint found at '{}'".format(resume))
    
    print(str(start_epoch))
    #pdb.set_trace()

    #------------------
    for epoch in range(start_epoch, num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                #optimizer = lr_scheduler(optimizer, epoch,lr)
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_encoder_loss = 0.0
            running_task_loss=0.0
            running_corrects = 0

            # Iterate over data.
            for data in dset_loaders[phase]:
                # get the inputs
                inputs, labels = data
                    #==========
                
                

               
                # wrap them in Variable
                if use_gpu:

                    inputs, labels = Variable(inputs.cuda()), \
                        Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()
                model.zero_grad()
                
                # forward
                
                outputs,encoder_input,encoder_output = model(inputs)
                encoder_input = Variable(encoder_input)
                _, preds = torch.max(outputs.data, 1)
                task_loss = task_criterion(outputs, labels)
              
                encoder_loss=encoder_criterion(encoder_output,encoder_input)
                # Compute distillation loss.
                
                total_loss=alpha*encoder_loss+task_loss
                
                if phase == 'train':
                    total_loss.backward()
                    #print('step')
                    optimizer.step()

                # statistics
                running_task_loss += task_loss.data[0]
                running_encoder_loss+=encoder_loss.data[0]
                running_loss+=total_loss.data[0]
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dset_sizes[phase]
            encoder_loss=running_encoder_loss/dset_sizes[phase]
            epoch_acc = running_corrects / dset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            print('{} Encoder LOSS: {:.4f} Acc: {:.4f}'.format(
                phase, encoder_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and encoder_loss< best_acc:
              
                del labels
                del inputs
                del task_loss
                
                del preds
                best_acc = encoder_loss
                
                #best_model = copy.deepcopy(model)
                torch.save(model,os.path.join(exp_dir, 'best_model.pth.tar'))
                
        #epoch_file_name=exp_dir+'/'+'epoch-'+str(epoch)+'.pth.tar'
        epoch_file_name=exp_dir+'/'+'epoch'+'.pth.tar'
        save_checkpoint({
            'epoch': epoch + 1,
            'encoder_loss':encoder_loss,
            'arch': 'alexnet',
            'model': model,
            'state_dict': model.state_dict(),
            'optimizer' : optimizer.state_dict(),
                },epoch_file_name)
        print()


    print('Best val Acc: {:4f}'.format(best_acc))
    return model

Appending to Finetune_SGD_EBLL.py


In [11]:
 %%writefile -a Finetune_SGD_EBLL.py


def train_model_ebll(model,original_model, criterion, code_criterion,optimizer, lr_scheduler,lr,dset_loaders,dset_sizes,use_gpu, num_epochs,exp_dir='./',resume='',temperature=2,alpha=1e-6):
    print('dictoinary length'+str(len(dset_loaders)))
    #set orginal model to eval mode
    original_model.eval()
    
    since = time.time()

    best_model = model
    best_acc = 0.0
    if os.path.isfile(resume):
        print("=> loading checkpoint '{}'".format(resume))
        checkpoint = torch.load(resume)
        start_epoch = checkpoint['epoch']

        model.load_state_dict(checkpoint['state_dict'])
        
        print('load')
        optimizer.load_state_dict(checkpoint['optimizer'])
 
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(resume, checkpoint['epoch']))
    else:
            start_epoch=0
            print("=> no checkpoint found at '{}'".format(resume))
    
    print(str(start_epoch))
    #pdb.set_trace()

    #------------------
    for epoch in range(start_epoch, num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                optimizer = lr_scheduler(optimizer, epoch,lr)
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            running_code_loss=0.0
            # Iterate over data.
            for data in dset_loaders[phase]:
                # get the inputs
                inputs, labels = data
                    #==========
                if phase == 'train':    
                    original_inputs=inputs.clone()
                

               
                # wrap them in Variable
                if use_gpu:
                    if phase == 'train':
                        original_inputs = original_inputs.cuda()
                        original_inputs = Variable(original_inputs, requires_grad=False)
                    inputs, labels = Variable(inputs.cuda()), \
                        Variable(labels.cuda())
                else:
                    if phase == 'train': 
                       
                        original_inputs = Variable(original_inputs, requires_grad=False)
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()
                model.zero_grad()
                original_model.zero_grad()
                # forward
                #tasks_outputs and target_logits are lists of outputs for each task in the previous model and current model
                orginal_logits,orginal_codes =original_model(original_inputs)
                 # Move to same GPU as current model.
                target_logits = [Variable(item.data, requires_grad=False)
                                     for item in orginal_logits]
                
                target_codes = [Variable(item.data, requires_grad=False)
                     for item in orginal_codes]
                del orginal_logits
                scale = [item.size(-1) for item in target_logits]
                tasks_outputs,tassk_codes = model(inputs)
                _, preds = torch.max(tasks_outputs[-1].data, 1)
                task_loss = criterion(tasks_outputs[-1], labels)
                
                # Compute distillation loss.
                dist_loss = 0.0
                code_loss = 0.0
                # Apply distillation loss to all old tasks.
                
                if phase == 'train': 
                    for idx in range(len(target_logits)):
                        dist_loss += distillation_loss(tasks_outputs[idx], target_logits[idx], temperature, scale[idx])
                    # compute code loss for the previous tasks
                    for idx in range(len(target_codes)):
                        code_loss += code_criterion(tassk_codes[idx], target_codes[idx])
     
                total_loss=dist_loss+task_loss+alpha*code_loss
                #backprobagate and update
                if phase == 'train':
                    total_loss.backward()
                    
                    optimizer.step()

                # statistics
                running_loss += task_loss.data[0]
                if phase == 'train':
                    running_code_loss += code_loss.data[0]
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dset_sizes[phase]
            epoch_acc = running_corrects / dset_sizes[phase]
            epoch_code_loss=running_code_loss/dset_sizes[phase]
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            if phase == 'train':
               
                print('FIRST TASK CODE LOSS', str(epoch_code_loss))
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                del tasks_outputs
                del labels
                del inputs
                del task_loss
                del preds
                best_acc = epoch_acc
                #best_model = copy.deepcopy(model)
                torch.save(model,os.path.join(exp_dir, 'best_model.pth.tar'))
                
        #epoch_file_name=exp_dir+'/'+'epoch-'+str(epoch)+'.pth.tar'
        epoch_file_name=exp_dir+'/'+'epoch'+'.pth.tar'
        save_checkpoint({
            'epoch': epoch + 1,
            'epoch_acc':epoch_acc,
            'arch': 'alexnet',
            'model': model,
            'state_dict': model.state_dict(),
            'optimizer' : optimizer.state_dict(),
                },epoch_file_name)
        print()

   
    print('Best val Acc: {:4f}'.format(best_acc))
    return model

Appending to Finetune_SGD_EBLL.py


In [12]:
 %%writefile -a Finetune_SGD_EBLL.py

def fine_tune_Adam_Autoencoder(dataset_path,previous_task_model_path,init_model_path='',exp_dir='',batch_size=200, num_epochs=100,lr=0.1,init_freeze=1,lr_decay_epoch=45,pretrained=True,alpha=1e-6):
    """call encoder training using Adadelta optimizer"""
    print('lr is ' + str(lr))
    
    dsets = torch.load(dataset_path)
    dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=batch_size,
                                               shuffle=True, num_workers=4)
                for x in ['train', 'val']}
    dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
    dset_classes = dsets['train'].classes

    use_gpu = torch.cuda.is_available()
    resume=os.path.join(exp_dir,'epoch.pth.tar')
    
    if os.path.isfile(resume):
            checkpoint = torch.load(resume)
            model_ft = checkpoint['model']
    
    else: 
        if not os.path.isfile(previous_task_model_path):
            model_ft = models.alexnet(pretrained=pretrained)

        else:
            model_ft=torch.load(previous_task_model_path)
        if hasattr(model_ft,'reg_params'):
            model_ft.reg_params=None
      
        model_ft=AlexNet_ENCODER(model_ft)

       
        num_ftrs = model_ft.classifier[6].in_features 

    if use_gpu:
        model_ft = model_ft.cuda()
       
    #train the autoencoder using two losses: task loss on the reconstructed data and reconstruction MSE
    task_criterion = nn.CrossEntropyLoss()
    encoder_criterion = nn.MSELoss()

   
    optimizer_ft =  optim.Adadelta(model_ft.autoencoder.parameters(),  lr)

        
    
   
    model_ft = train_autoencoder(model_ft,optimizer_ft, task_criterion,encoder_criterion,exp_lr_scheduler,lr, dset_loaders,dset_sizes,use_gpu,num_epochs,exp_dir,resume,alpha=alpha)


    return model_ft

Appending to Finetune_SGD_EBLL.py


In [13]:
 %%writefile -a Finetune_SGD_EBLL.py


def fine_tune_SGD_EBLL(dataset_path,previous_task_model_path,autoencoder_model_path,init_model_path='',exp_dir='',batch_size=200, num_epochs=100,lr=0.0004,init_freeze=1,lr_decay_epoch=45,pretrained=True,alpha=1e-6):
    """call EBLL training on a given task in a sequence"""
    print('lr is ' + str(lr))
    #data loader
    dsets = torch.load(dataset_path)
    dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=batch_size,
                                               shuffle=True, num_workers=4)
                for x in ['train', 'val']}
    dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
    dset_classes = dsets['train'].classes

    use_gpu = torch.cuda.is_available()
    resume=os.path.join(exp_dir,'epoch.pth.tar')
    #load the recently trained autoencoder
    autoencoder_model=torch.load(autoencoder_model_path)
    if os.path.isfile(resume):
            checkpoint = torch.load(resume)
            model_ft = checkpoint['model']
            previous_model=torch.load(previous_task_model_path)
            if not (type(previous_model) is AlexNet_EBLL):
                previous_model=AlexNet_EBLL(previous_model,autoencoder_model.autoencoder)
            original_model=copy.deepcopy(previous_model)
            del checkpoint
            del previous_model
    else: 
        if not os.path.isfile(previous_task_model_path):
            model_ft = models.alexnet(pretrained=pretrained)

        else:
            model_ft=torch.load(previous_task_model_path)
        if hasattr(model_ft,'reg_params'):
            model_ft.reg_params=None
        if not (type(model_ft) is AlexNet_EBLL):
            #make a new Alexnet_ebll instant that has additionally the trained autoencoder to preserve the codes
            model_ft=AlexNet_EBLL(model_ft,autoencoder_model.autoencoder)
        else:
            #add the new autoencoder to previous Alexnet_ebll
            #in case of a sequence longer than 2
            
            model_ft.autoencoders.add_module(str(len(model_ft.autoencoders._modules.items())), autoencoder_model.autoencoder.encode)

        original_model=copy.deepcopy(model_ft)
        num_ftrs = model_ft.classifier[6].in_features 
        #initialize a new head from the trained feature extractor, or randomly
        if not init_freeze:   

            model_ft.classifier.add_module(str(len(model_ft.classifier._modules)),nn.Linear(num_ftrs, len(dset_classes)))
        else:

            init_model=torch.load(init_model_path)
            model_ft.classifier.add_module(str(len(model_ft.classifier._modules)), init_model.classifier[6])
            del init_model
 
        if not os.path.exists(exp_dir):
            os.makedirs(exp_dir)
    if use_gpu:
        model_ft = model_ft.cuda()
        original_model=original_model.cuda()
   
    criterion = nn.CrossEntropyLoss()
    encoder_criterion = nn.MSELoss()
    # Observe that all parameters are being optimized
    #optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0008, momentum=0.9)
    params = list(model_ft.features.parameters()) + list(model_ft.classifier.parameters())
    optimizer_ft =  optim.SGD(params, lr, momentum=0.9)

    model_ft = train_model_ebll(model_ft,original_model, criterion, encoder_criterion,optimizer_ft,exp_lr_scheduler,lr, dset_loaders,dset_sizes,use_gpu,num_epochs,exp_dir,resume,alpha=alpha)
    
    return model_ft

Appending to Finetune_SGD_EBLL.py


In [14]:
 %%writefile -a Finetune_SGD_EBLL.py
    
    
def test_model(model_path,dataset_path,batch_size=200,task_index=0,check=0):
    model=torch.load(model_path)
    if check:
        model=model['model']
    model=model.cuda()
    dsets = torch.load(dataset_path)
    dset_loaders = {x: torch.utils.data.DataLoader(dsets[x],batch_size ,
                                                   shuffle=True, num_workers=4)
                for x in ['train', 'val']}
    dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
    dset_classes = dsets['train'].classes
    class_correct = list(0. for i in range(len(dset_classes)))
    class_total = list(0. for i in range(len(dset_classes)))
    for data in dset_loaders['val']:
        images, labels = data
        images=images.cuda()
        labels=labels.cuda()
        outputs,codes = model(Variable(images))
        _, predicted = torch.max(outputs[task_index].data, 1)
        c = (predicted == labels).squeeze()
        #pdb.set_trace()
        for i in range(len(predicted)):
            label = labels[i]
            class_correct[label] += c[i]
            class_total[label] += 1
        del images
        del labels
        del outputs
        del data
    if 0:
        for i in range(len(dset_classes)):
            print('Accuracy of %5s : %2d %%' % (
            dset_classes[i], 100 * class_correct[i] / class_total[i]))
    accuracy=np.sum(class_correct)*100/np.sum(class_total)
    print('Accuracy: ' +str(accuracy))
    return accuracy

Appending to Finetune_SGD_EBLL.py


In [15]:
 %%writefile -a Finetune_SGD_EBLL.py


def fine_tune_freeze(dataset_path,model_path,exp_dir,batch_size=100, num_epochs=100,lr=0.0004):
    """train the new head alone, in case of warmup phase"""
    print('lr is ' + str(lr))
    
    dsets = torch.load(dataset_path)
    dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=batch_size,
                                               shuffle=True, num_workers=4)
                for x in ['train', 'val']}
    dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
    dset_classes = dsets['train'].classes

    use_gpu = torch.cuda.is_available()
    resume=os.path.join(exp_dir,'epoch.pth.tar')
    if os.path.isfile(resume):
            checkpoint = torch.load(resume)
            model_ft = checkpoint['model']
    if not os.path.isfile(model_path):
        model_ft = models.alexnet(pretrained=True)
       
    else:
        model_ft=torch.load(model_path)
    if type(model_ft) is AlexNet_EBLL:
        
        this_model_ft=models.alexnet(pretrained=True)
        
        this_model_ft.features=model_ft.features
        this_model_ft.classifier=model_ft.classifier
        model_ft=this_model_ft
        num_ftrs = model_ft.classifier[6].in_features 
        keep_poping=True
        while keep_poping:
            x=model_ft.classifier._modules.popitem()
            if x[0]=='6':
                keep_poping=False
    else:            
        num_ftrs = model_ft.classifier[6].in_features 
        
    model_ft.classifier._modules['6'] = nn.Linear(num_ftrs, len(dset_classes))    
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)
    if use_gpu:
        model_ft = model_ft.cuda()

    criterion = nn.CrossEntropyLoss()


    
    optimizer_ft =  optim.SGD(model_ft.classifier._modules['6'].parameters(), lr, momentum=0.9)

        
    
  
    model_ft = train_model(model_ft, criterion, optimizer_ft,exp_lr_scheduler,lr, dset_loaders,dset_sizes,use_gpu,num_epochs,exp_dir,resume)
    
    return model_ft


Appending to Finetune_SGD_EBLL.py
